In [1]:
import numpy as np
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, Dense, Dropout, Activation, Flatten, MaxPooling2D, BatchNormalization
from keras import optimizers
from keras.layers.core import Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras import backend as K
from keras import regularizers
from keras.callbacks import EarlyStopping, TensorBoard

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


## Model

In [15]:
class Vgg:
    def __init__(self):
        self.num_classes = None
        self.x_test = None
        self.y_test = None

    def build_model(self, input_shape=[32,32,3], num_classes=10, l2_reg=1e-7):
        self.num_classes = num_classes
        model = Sequential()
        
        # Block 1:
        model.add(Conv2D(32, (3, 3), padding='same', input_shape=input_shape, 
                         kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())

        model.add(Conv2D(32, (3, 3), padding='same', 
                         kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.3))

        # Block 2:
        model.add(Conv2D(64, (3, 3), padding='same',
                         kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())

        model.add(Conv2D(64, (3, 3), padding='same',
                         kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.3))

        # Block 3:
        model.add(Conv2D(128, (3, 3), padding='same',
                         kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(128, (3, 3), padding='same',
                         kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))
        
        # Block 4:
        model.add(Flatten())
        model.add(Dense(512, kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())

        model.add(Dropout(0.5))
        model.add(Dense(self.num_classes))
        model.add(Activation('softmax'))
        
        return model


    def predict(self, x, model, batch_size=128):
        return model.predict(x, batch_size)
    
    
    def train(self, model, batch_size=128, max_epoches=250, lr=0.001, lr_decay=2e-6, 
              initial_epoch=1, callbacks=[]):
        (x_train, y_train), (self.x_test, self.y_test) = cifar10.load_data()
        y_train = keras.utils.to_categorical(y_train, self.num_classes)
        self.y_test = keras.utils.to_categorical(self.y_test, self.num_classes)
        
        datagen = ImageDataGenerator(
            featurewise_center=True,
            featurewise_std_normalization=True,
            rotation_range=15,
            width_shift_range=0.2,
            height_shift_range=0.2,
            horizontal_flip=True)

        # compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied)
        datagen.fit(x_train)
        
        #optimization details
        adam = optimizers.Adam(lr=lr, decay=lr_decay)
        model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])


        # training process in a for loop with saving weights every 5 epoches.
        for epoch in range(initial_epoch, max_epoches, 5):
            if epoch > initial_epoch:
                model.save_weights('cifar10_vgg_' + str(epoch-1) + '.h5')
                
            val_indices = np.random.permutation(np.arange(x_train.shape[0]))[:batch_size]
            historytemp = model.fit_generator(
                datagen.flow(x_train, y_train, batch_size=batch_size),
                steps_per_epoch=x_train.shape[0] // batch_size, epochs=epoch+4, 
                validation_data=(x_train[val_indices], y_train[val_indices]), 
                initial_epoch=epoch-1, callbacks=callbacks
            )
        model.save_weights('cifar10_vgg_weights.h5')
        model.save('cifar10_vgg_model.h5')
        
    
    def get_test_score(self, model):
        # Score trained model.
        scores = model.evaluate(self.x_test, self.y_test, verbose=1)
        print('Test loss:', scores[0])
        print('Test accuracy:', scores[1])
        return model

In [47]:
def normalize(X, mean, std):
    return (X-mean)/(std+1e-7)

In [60]:
class Vgg:
    def __init__(self):
        self.num_classes = None
        self.x_test = None
        self.y_test = None

    def build_model(self, input_shape=[32,32,3], num_classes=10, l2_reg=1e-7):
        self.num_classes = num_classes
        model = Sequential()
        
        # Block 1:
        model.add(Conv2D(32, (3, 3), padding='same', input_shape=input_shape, 
                         kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())
        model.add(Dropout(0.25))

        model.add(Conv2D(32, (3, 3), padding='same', 
                         kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))

        # Block 2:
        model.add(Conv2D(64, (3, 3), padding='same',
                         kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())
        model.add(Dropout(0.25))

        model.add(Conv2D(64, (3, 3), padding='same',
                         kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))

#         # Block 3:
#         model.add(Conv2D(128, (3, 3), padding='same',
#                          kernel_regularizer=regularizers.l2(l2_reg)))
#         model.add(LeakyReLU())
#         model.add(BatchNormalization())

#         model.add(MaxPooling2D(pool_size=(2, 2)))
#         model.add(Dropout(0.25))
        
        # Block 4:
        model.add(Flatten())
        model.add(Dense(512, kernel_regularizer=regularizers.l2(l2_reg)))
        model.add(LeakyReLU())
        model.add(BatchNormalization())

        model.add(Dropout(0.5))
        model.add(Dense(self.num_classes))
        model.add(Activation('softmax'))
        
        return model


    def predict(self, x, model, batch_size=128):
        return model.predict(x, batch_size)
    
    
    def train(self, model, batch_size=128, max_epoches=250, lr=0.001, lr_decay=2e-6, 
              initial_epoch=1, callbacks=[]):
        (x_train, y_train), (self.x_test, self.y_test) = cifar10.load_data()
        y_train = keras.utils.to_categorical(y_train, self.num_classes)
        self.y_test = keras.utils.to_categorical(self.y_test, self.num_classes)
        
        mean, std = np.mean(x_train), np.std(x_train)
        x_train = normalize(x_train, mean, std)
        self.x_test = normalize(self.x_test, mean, std)
        
        datagen = ImageDataGenerator(
            featurewise_center=False,
            featurewise_std_normalization=False,
            rotation_range=15,
            width_shift_range=0.2,
            height_shift_range=0.2,
            horizontal_flip=True)

        # compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied)
        datagen.fit(x_train)
        
        #optimization details
        adam = optimizers.Adam(lr=lr, decay=lr_decay)
        model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])


        # training process in a for loop with saving weights every 5 epoches.
        for epoch in range(initial_epoch, max_epoches, 5):
            if epoch > initial_epoch:
                model.save_weights('cifar10_vgg_' + str(epoch-1) + '.h5')
                
            val_indices = np.random.permutation(np.arange(x_train.shape[0]))[:batch_size]
            historytemp = model.fit_generator(
                datagen.flow(x_train, y_train, batch_size=batch_size),
                steps_per_epoch=x_train.shape[0] // batch_size, epochs=epoch+4, 
                validation_data=(x_train[val_indices], y_train[val_indices]), 
                initial_epoch=epoch-1, callbacks=callbacks
            )
        model.save_weights('cifar10_vgg_weights.h5')
        model.save('cifar10_vgg_model.h5')
        
    
    def get_test_score(self, model):
        # Score trained model.
        scores = model.evaluate(self.x_test, self.y_test, verbose=1)
        print('Test loss:', scores[0])
        print('Test accuracy:', scores[1])
        return model

## Train

In [61]:
vgg = Vgg()

In [62]:
model = vgg.build_model(l2_reg=1e-4)

In [63]:
callbacks = [
    EarlyStopping(monitor='val_loss', min_delta=0.05, patience=2, verbose=0, mode='auto'),
#     TensorBoard(log_dir='./logs', histogram_freq=1, batch_size=128, write_images=True)
]
vgg.train(model, batch_size=128, max_epoches=250, lr=0.001, lr_decay=2e-06, initial_epoch=1, callbacks=callbacks)
vgg.get_test_score(model)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 55/390 [===>..........................] - ETA: 8:34 - loss: 1.3938 - acc: 0.5473

KeyboardInterrupt: 

In [65]:
callbacks = [
    EarlyStopping(monitor='val_loss', min_delta=0.05, patience=2, verbose=0, mode='auto'),
#     TensorBoard(log_dir='./logs', histogram_freq=1, batch_size=128, write_images=True)
]
vgg.train(model, batch_size=32, max_epoches=250, lr=0.0008, lr_decay=2e-06, initial_epoch=5, callbacks=callbacks)
vgg.get_test_score(model)

Epoch 5/9
Epoch 6/9
  83/1562 [>.............................] - ETA: 10:32 - loss: 1.5101 - acc: 0.5331

KeyboardInterrupt: 

Очень слабо обучается. Попробую меньше штрафовать за веса, и увеличить lr.

In [66]:
model.save_weights('cifar10_vgg_weights.h5')

In [68]:
model = vgg.build_model(l2_reg=5e-5)

In [69]:
model.load_weights('cifar10_vgg_weights.h5')

In [71]:
callbacks = [
    EarlyStopping(monitor='val_loss', min_delta=0.05, patience=2, verbose=0, mode='auto'),
#     TensorBoard(log_dir='./logs', histogram_freq=1, batch_size=128, write_images=True)
]
vgg.train(model, batch_size=64, max_epoches=250, lr=0.003, lr_decay=2e-06, initial_epoch=6, callbacks=callbacks)
vgg.get_test_score(model)

Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 11/15
Epoch 12/15

KeyboardInterrupt: 

In [72]:
model.save_weights('cifar10_vgg_weights.h5')

In [73]:
vgg.get_test_score(model)

Test loss: 1.1491333335876466
Test accuracy: 0.6956


<keras.models.Sequential at 0x1730865c0>

## Отчет
Изначально, в качестве модели была выбрана архитектура VGG-16, как сравнительно небольшая из широко известных архитектур сверточных сетей. 

Когда не получилось настроить амазоновский сервер для вычислений на gpu, я понял, что ноутбук не успеет за несколько часов нормально обучить сеть таких размеров. 

После этого я повыбрасывал половину слоев(каждый "повторяющийся", т.е. conv(64)+conv(64) => conv(64)). 

Но этого также оказалось мало. 

В итоге пришлось отказаться от некоторых завершающих сверточных блоков, которые отвечали за более высокоуровневые куски изображений в угоду небольшого прироста скорости.

```

```

```

```

```

```


# Report

All creative approaches are highly welcome, but at the very least it would be great to mention
* the idea;
* brief history of tweaks and improvements;
* what is the final architecture and why?
* what is the training method and, again, why?
* Any regularizations and other techniques applied and their effects;


There is no need to write strict mathematical proofs (unless you want to).
 * "I tried this, this and this, and the second one turned out to be better. And i just didn't like the name of that one" - OK, but can be better
 * "I have analized these and these articles|sources|blog posts, tried that and that to adapt them to my problem and the conclusions are such and such" - the ideal one
 * "I took that code that demo without understanding it, but i'll never confess that and instead i'll make up some pseudoscientific explaination" - __not_ok__

### Hi, my name is `___ ___`, and here's my story

A long ago in a galaxy far far away, when it was still more than an hour before deadline, i got an idea:

##### I gonna build a neural network, that
* brief text on what was
* the original idea
* and why it was so

How could i be so naive?!

##### One day, with no signs of warning,
This thing has finally converged and
* Some explaination about what were the results,
* what worked and what didn't
* most importantly - what next steps were taken, if any
* and what were their respective outcomes

##### Finally, after __  iterations, __ mugs of [tea/coffee]
* what was the final architecture
* as well as training method and tricks

That, having wasted ____ [minutes, hours or days] of my life training, got

* accuracy on training: __
* accuracy on validation: __
* accuracy on test: __


[an optional afterword and mortal curses on assignment authors]