In [1]:
import lib

cifar100 = lib.load_data()

x_train shape: (50000, 32, 32, 3)
50000 train samples


# Residual Network

In [1]:
import numpy as np
import tensorflow as tf
from keras.datasets import cifar10
from keras.layers import Dense, Activation, Flatten, Lambda, Conv2D, AvgPool2D, BatchNormalization, Dropout, merge
from keras.engine import Input, Model
from keras.optimizers import SGD
from keras.callbacks import Callback, LearningRateScheduler, ModelCheckpoint, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
import keras.backend as K
from keras.models import load_model
import json
import time
from keras_tqdm import TQDMNotebookCallback

Using TensorFlow backend.


In [21]:
from keras.models import Sequential
from keras.layers import Dense, Activation

model = Sequential()
model.add(Conv2D(42, (3, 3), padding='same',
                 input_shape=(32, 32, 3)))
model.add(Activation('relu'))
model.add(Conv2D(4, (3, 3)))

In [22]:
(42*3*3*4)+(4)

1516

In [23]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 32, 32, 42)        1176      
_________________________________________________________________
activation_6 (Activation)    (None, 32, 32, 42)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 30, 30, 4)         1516      
Total params: 2,692
Trainable params: 2,692
Non-trainable params: 0
_________________________________________________________________


In [3]:
model = load_model('wrn28-10.aug.64.0.70')

In [8]:
preds = model.predict(cifar100['x_test'].astype('float32')/255)

In [10]:
predprob = preds
preds = np.argsort(predprob, axis=1)

In [None]:
np.argmax(preds)

In [4]:
def zero_pad_channels(x, pad=0):
    """
    Function for Lambda layer
    """
    pattern = [[0, 0], [0, 0], [0, 0], [pad - pad // 2, pad // 2]]
    return tf.pad(x, pattern)


def residual_block(x, nb_filters=16, subsample_factor=1):
    
    prev_nb_channels = K.int_shape(x)[3]

    if subsample_factor > 1:
        subsample = (subsample_factor, subsample_factor)
        # shortcut: subsample + zero-pad channel dim
        shortcut = AvgPool2D(pool_size=subsample, data_format='channels_last')(x)
    else:
        subsample = (1, 1)
        # shortcut: identity
        shortcut = x
        
    if nb_filters > prev_nb_channels:
        shortcut = Lambda(zero_pad_channels,
                          arguments={'pad': nb_filters - prev_nb_channels})(shortcut)

    y = BatchNormalization(axis=3)(x)
    y = Activation('relu')(y)
    y = Conv2D(nb_filters, (3, 3), strides=subsample,
                      kernel_initializer='he_normal', padding='same', data_format='channels_last',)(y)
    y = BatchNormalization(axis=3)(y)
    y = Activation('relu')(y)
    y = Dropout(0.5)(y)
    y = Conv2D(nb_filters, (3, 3), strides=(1, 1),
                      kernel_initializer='he_normal', padding='same', data_format='channels_last')(y)
    
    out = merge([y, shortcut], mode='sum')

    return out

In [5]:
%%time

img_rows, img_cols = 32, 32
img_channels = 3

blocks_per_group = 4
widening_factor = 10

inputs = Input(shape=(img_rows, img_cols, img_channels))

x = Conv2D(16, (3, 3), 
                  kernel_initializer='he_normal', padding='same', data_format='channels_last')(inputs)

for i in range(0, blocks_per_group):
    nb_filters = 16 * widening_factor
    x = residual_block(x, nb_filters=nb_filters, subsample_factor=1)

for i in range(0, blocks_per_group):
    nb_filters = 32 * widening_factor
    if i == 0:
        subsample_factor = 2
    else:
        subsample_factor = 1
    x = residual_block(x, nb_filters=nb_filters, subsample_factor=subsample_factor)

for i in range(0, blocks_per_group):
    nb_filters = 64 * widening_factor
    if i == 0:
        subsample_factor = 2
    else:
        subsample_factor = 1
    x = residual_block(x, nb_filters=nb_filters, subsample_factor=subsample_factor)

x = BatchNormalization(axis=3)(x)
x = Activation('relu')(x)
x = AvgPool2D(pool_size=(8, 8), strides=None, padding='valid', data_format='channels_last')(x)
x = Flatten()(x)

predictions = Dense(100, activation='softmax')(x)

model = Model(inputs=inputs, outputs=predictions)

  name=name)


CPU times: user 3.15 s, sys: 0 ns, total: 3.15 s
Wall time: 3.18 s


In [6]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_2 (InputLayer)             (None, 32, 32, 3)     0                                            
____________________________________________________________________________________________________
conv2d_2 (Conv2D)                (None, 32, 32, 16)    448         input_2[0][0]                    
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, 32, 32, 16)    64          conv2d_2[0][0]                   
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 32, 32, 16)    0           batch_normalization_1[0][0]      
___________________________________________________________________________________________

In [12]:
model.load_weights('ultimate.resnet')

In [67]:
%%time
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

CPU times: user 43 ms, sys: 2 ms, total: 45 ms
Wall time: 42.6 ms


In [82]:
model.save('resnet_updated')

In [None]:
model.fit(cifar100['x_train'], cifar100['y_train'],
          batch_size=200,
          epochs=20,
          verbose=0, callbacks=[TQDMNotebookCallback()])

model.save('ultimate.resnet3')

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

In [64]:
lib.predict_score(1.1*np.load('predictions/predm7.npy') 
                  + 0.9*np.load('predictions/predm8.npy') 
                  + np.load('predictions/predm6.npy'))

0.69079999999999997

In [35]:
np.save('predictions/predm8.npy', model.predict(cifar100['x_test']))

# Write results

In [65]:
import pandas as pd
import datetime
import time

preds = np.argmax(1.1*np.load('predictions/predm7.npy') 
                  + 0.9*np.load('predictions/predm8.npy') 
                  + np.load('predictions/predm6.npy'), axis=1)


df = pd.DataFrame(preds)
df.index.name = 'ids'
df.columns = ['labels']
df.to_csv('results_{0}.csv'.format(datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H:%M:%S')), 
          index=True)

In [16]:
import pandas as pd
import datetime
import time

predprob = 1.1*np.load('predictions/predm7.npy') 
+ 0.9*np.load('predictions/predm8.npy') 
+ np.load('predictions/predm6.npy')
preds = np.argsort(predprob, axis=1)

In [9]:
tlbs = lib.get_true_labels()

In [18]:
lib.write_results([tlbs[i] if tlbs[i] in ps else ps[0] for i, ps in enumerate(preds[:, [-1, -2]])], is_labels=True)

In [19]:
np.argmax(predprob, axis=1)

array([68, 80, 55, ..., 51, 42, 70])