In [2]:
import os
import h5py

import matplotlib.pyplot as plt
import time, pickle, pandas

import numpy as np

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras import backend
from keras import optimizers
from keras import regularizers

%matplotlib inline

Using TensorFlow backend.


# Loading 20 classes

In [3]:
nb_classes = 20
class_name = {
    0: 'ant_man',
    1: 'aquaman',
    2: 'batman',
    3: 'black_widow',
    4: 'captain_america',
    5: 'deadpool',
    6: 'deathstroke',
    7: 'doctor_manhattan',
    8: 'flash',
    9: 'green_arrow',
    10: 'green_lantern',
    11: 'hawkeye',
    12: 'hawkgirl',
    13: 'hulk',
    14: 'ironman',
    15: 'spiderman',
    16: 'superman',
    17: 'thor',
    18: 'wolverine',
    19: 'wonder_woman'
}

# Load the images

We load all the images using ImageDataGenerator. As our data is small we would like to generate more images to make the network learn better

In [5]:
img_width, img_height = 200, 200

train_data_dir = r'superheroes'
validation_data_dir = r'validate'
nb_train_samples = 10000
nb_validation_samples = 1000
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=8,
        class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=8,
        class_mode='categorical')
#One hot encoding
#y_train = np_utils.to_categorical(y_train, 10)
#y_test = np_utils.to_categorical(y_test, 10)

Found 7930 images belonging to 20 classes.
Found 494 images belonging to 20 classes.


# CNN Network with 3 layers

In [14]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, input_shape=(img_width, img_height, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(20))
model.add(Activation('softmax'))

# Compiling model

We use categorical_crossentropy as we do a multiclass classification. Used SGD optimizer for optimization tracked the accuracy as the first metric

In [15]:
model.compile(loss = 'categorical_crossentropy',
              optimizer = optimizers.SGD(lr=0.01, clipvalue=0.5),
              metrics=['accuracy'])

print(model.summary())

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_7 (Convolution2D)  (None, 198, 198, 32)  896         convolution2d_input_3[0][0]      
____________________________________________________________________________________________________
activation_10 (Activation)       (None, 198, 198, 32)  0           convolution2d_7[0][0]            
____________________________________________________________________________________________________
maxpooling2d_7 (MaxPooling2D)    (None, 99, 99, 32)    0           activation_10[0][0]              
____________________________________________________________________________________________________
convolution2d_8 (Convolution2D)  (None, 97, 97, 32)    9248        maxpooling2d_7[0][0]             
___________________________________________________________________________________________

We run the model for 20 epochs. We observe overfitting without any siignificant accuracy improvements. The obtained was also not satisfactory

In [16]:
nb_epoch = 20

hist_little_convet = model.fit_generator(
        train_generator,
        samples_per_epoch = nb_train_samples,
        nb_epoch = nb_epoch,
        validation_data = validation_generator,
        nb_val_samples = nb_validation_samples,
        verbose = 1,
        initial_epoch = 0,
)

Epoch 1/20
   48/10000 [..............................] - ETA: 245s - loss: 3.0412 - acc: 0.0833    

  'to RGBA images')






Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Measuring the top 3 accuracy

In [17]:
model1 = Sequential()
model1.add(Convolution2D(32, 3, 3, input_shape=(img_width, img_height, 3)))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))

model1.add(Convolution2D(32, 3, 3))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))

model1.add(Convolution2D(64, 3, 3))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))

model1.add(Flatten())
model1.add(Dense(64))
model1.add(Activation('relu'))
model1.add(Dropout(0.5))
model1.add(Dense(20))
model1.add(Activation('softmax'))

Since we were comparing our output to that of Stanford people, we wanted to track if top 3 accuracy is any good for measurment. It was promising at first but didn't reach the desired value

In [20]:
import functools
import keras
top3_acc = functools.partial(keras.metrics.top_k_categorical_accuracy, k=3)

top3_acc.__name__ = 'top3_acc'
model1.compile(loss = 'categorical_crossentropy',
              optimizer = optimizers.SGD(lr=0.01, clipvalue=0.5),
              metrics=['accuracy', top3_acc])

print(model1.summary())

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_10 (Convolution2D) (None, 198, 198, 32)  896         convolution2d_input_4[0][0]      
____________________________________________________________________________________________________
activation_15 (Activation)       (None, 198, 198, 32)  0           convolution2d_10[0][0]           
____________________________________________________________________________________________________
maxpooling2d_10 (MaxPooling2D)   (None, 99, 99, 32)    0           activation_15[0][0]              
____________________________________________________________________________________________________
convolution2d_11 (Convolution2D) (None, 97, 97, 32)    9248        maxpooling2d_10[0][0]            
___________________________________________________________________________________________

In [21]:
nb_epoch = 10

hist_little_convet = model1.fit_generator(
        train_generator,
        samples_per_epoch = nb_train_samples,
        nb_epoch = nb_epoch,
        validation_data = validation_generator,
        nb_val_samples = nb_validation_samples,
        verbose = 1,
        initial_epoch = 0,
)

Epoch 1/10
  144/10000 [..............................] - ETA: 278s - loss: 3.0400 - acc: 0.0556 - top3_acc: 0.1667    

  'to RGBA images')






Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Using transfer learning using VGG16 to improve results

In [4]:
def build_vgg16(framework='tf'):

    if framework == 'th':
        # build the VGG16 network in Theano weight ordering mode
        backend.set_image_dim_ordering('th')
    else:
        # build the VGG16 network in Tensorflow weight ordering mode
        backend.set_image_dim_ordering('tf')
        
    model = Sequential()
    if framework == 'th':
        model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))
    else:
        model.add(ZeroPadding2D((1, 1), input_shape=(img_width, img_height, 3)))
        
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    return model

In [5]:
weights_path = './notebooks/vgg16_weights.h5'
th_model = build_vgg16('th')
assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
f = h5py.File(weights_path)
for k in range(f.attrs['nb_layers']):
    if k >= len(th_model.layers):
        # we don't look at the last (fully-connected) layers in the savefile
        break
    g = f['layer_{}'.format(k)]
    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
    th_model.layers[k].set_weights(weights)
f.close()
print('Model loaded.')
tf_model = build_vgg16('tf')
for th_layer, tf_layer in zip(th_model.layers, tf_model.layers):
    if th_layer.__class__.__name__ == 'Convolution2D':
      kernel, bias = th_layer.get_weights()
      kernel = np.transpose(kernel, (2, 3, 1, 0))
      tf_layer.set_weights([kernel, bias])
    else:
      tf_layer.set_weights(tf_layer.get_weights())

Model loaded.


# Adding fully connected layers to make us predict for multi-class classification

In [6]:
top_model = Sequential()
Flatten(input_shape=tf_model.output_shape[1:])
top_model.add(Flatten(input_shape=tf_model.output_shape[1:]))
top_model.add(Dense(1024, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(20, activation='softmax'))
print (tf_model.summary())
print(top_model.summary())

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
zeropadding2d_14 (ZeroPadding2D) (None, 202, 202, 3)   0           zeropadding2d_input_2[0][0]      
____________________________________________________________________________________________________
conv1_1 (Convolution2D)          (None, 200, 200, 64)  1792        zeropadding2d_14[0][0]           
____________________________________________________________________________________________________
zeropadding2d_15 (ZeroPadding2D) (None, 202, 202, 64)  0           conv1_1[0][0]                    
____________________________________________________________________________________________________
conv1_2 (Convolution2D)          (None, 200, 200, 64)  36928       zeropadding2d_15[0][0]           
___________________________________________________________________________________________

We freeze the layers of VGG16 before we traing our new layers. This helps us to stabilize the outer layers before backpropogating the loss

In [7]:
for layer in tf_model.layers:
    layer.trainable = False
tf_model.add(top_model)

We added a checkpoint to save the model so that we can use it for results later

In [10]:
for layer in tf_model.layers:
    print(layer.trainable)
tf_model.compile(loss = 'categorical_crossentropy',
              optimizer = 'adadelta',
              metrics=['accuracy'])
checkpoint_callback = ModelCheckpoint('./models/proj_to_freeze.hdf5', monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
True


# Training wth freezing layers

In [None]:
tf_model.fit_generator(
        train_generator,
        samples_per_epoch = nb_train_samples,
        nb_epoch = 5,
        validation_data = validation_generator,
        nb_val_samples = nb_validation_samples,
        verbose = 1,
        initial_epoch = 0,
        callbacks=[checkpoint_callback]
)

Epoch 1/5
   64/10000 [..............................] - ETA: 1238s - loss: 4.7965 - acc: 0.0781

  'to RGBA images')






Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fa6e0408860>

# Unfreeze the layers

In [None]:
tf_model2 = load_model('./models/proj_to_freeze.hdf5')
for layer in tf_model2.layers:
    layer.trainable = True
tf_model2.compile(loss = 'categorical_crossentropy',
              optimizer = optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])
checkpoint_callback2 = ModelCheckpoint('./models/proj_not_to_freeze.hdf5', monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
print("proceed")

proceed


# Traning with unfreezing

In [None]:
tf_model2.fit_generator(
        train_generator,
        samples_per_epoch = nb_train_samples,
        nb_epoch = 10,
        validation_data = validation_generator,
        nb_val_samples = nb_validation_samples,
        verbose = 1,
        initial_epoch = 0,
        callbacks=[checkpoint_callback2]
)


Epoch 1/10
  128/10000 [..............................] - ETA: 1349s - loss: 1.5493 - acc: 0.5938

  'to RGBA images')






Epoch 2/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fa6c8553320>