## Train a simple CNN from scratch
It is not expected to have good results, as we are not using transfer learning here.

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from os import listdir
from os.path import join

from keras.preprocessing import image
from keras.utils import to_categorical, plot_model

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.regularizers import l2

Using TensorFlow backend.


In [2]:
def read_img(img_id, train_or_test, size=None):
    img = image.load_img(join(data_dir, train_or_test, '%s.jpg' % img_id), target_size=size)
    img = image.img_to_array(img)
    return img

In [3]:
NUM_CLASSES = 120
SEED = 1993
np.random.seed(seed=SEED)
data_dir = '../data'

labels = pd.read_csv(join(data_dir, 'labels.csv'))
print('Number of all train images: {}'.format(len(labels)))
print("Train data has {} classes.".format(len(labels.groupby('breed').count())))
assert len(labels.groupby('breed').count()) == NUM_CLASSES, 'Number of classes in training set is not 120!'

sample_submission = pd.read_csv(join(data_dir, 'sample_submission.csv'))
print('Number of all test images: {}'.format(len(sample_submission)))

# Split to train and validation sets
l_val = labels.groupby('breed').apply(pd.DataFrame.sample, frac=0.2).reset_index(drop=True)
l_tr = labels.loc[~labels['id'].isin(l_val['id'])]
l_tr_index = {label:i for i,label in enumerate(np.unique(l_tr.breed))}
l_tr_temp = [l_tr_index[label] for label in l_tr.breed]
l_val_temp = [l_tr_index[label] for label in l_val.breed]
y_tr = to_categorical(l_tr_temp ,num_classes=120)
y_val = to_categorical(l_val_temp ,num_classes=120)
print('y_tr shape: {}'.format(y_tr.shape))
print('y_val shape: {}'.format(y_val.shape))

Number of all train images: 10222
Train data has 120 classes.
Number of all test images: 10357
y_tr shape: (8185, 120)
y_val shape: (2037, 120)


In [4]:
# Load train images
INPUT_SIZE = 120
x_tr = np.zeros((len(l_tr), INPUT_SIZE, INPUT_SIZE, 3))
for i, img_id in enumerate(l_tr.id):
    x_tr[i] = read_img(img_id, 'train', size=(INPUT_SIZE, INPUT_SIZE))
print("x_tr shape {}".format(x_tr.shape))

x_val = np.zeros((len(l_val), INPUT_SIZE, INPUT_SIZE, 3))
for i, img_id in enumerate(l_val.id):
    x_val[i] = read_img(img_id, 'train', size=(INPUT_SIZE, INPUT_SIZE))
print("x_val shape {}".format(x_val.shape))

x_tr shape (8185, 120, 120, 3)
x_val shape (2037, 120, 120, 3)


In [5]:
# Setup model
model = Sequential()
model.add(Conv2D(22, (3, 3), input_shape=(INPUT_SIZE, INPUT_SIZE, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(22, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(22, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(120, kernel_regularizer=l2(0.00001)))
model.add(Activation('relu'))
model.add(Dropout(0.6))
model.add(Dense(120, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 118, 118, 22)      616       
_________________________________________________________________
activation_1 (Activation)    (None, 118, 118, 22)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 59, 59, 22)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 57, 57, 22)        4378      
_________________________________________________________________
activation_2 (Activation)    (None, 57, 57, 22)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 28, 28, 22)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 26, 26, 22)        4378      
__________

In [6]:
# Train
model.fit(x_tr, y_tr,
          validation_data=(x_val, y_val),
          batch_size=100,
          epochs=100,
          verbose=1,
          shuffle=True)

Train on 8185 samples, validate on 2037 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100

KeyboardInterrupt: 

In [8]:
loss_tr, acc_tr = model.evaluate(x_tr, y_tr, batch_size=100)
print('Final model train loss: {} | accuracy {}'.format(loss_tr, acc_tr))

loss_val, acc_val = model.evaluate(x_val, y_val, batch_size=100)
print('Final model val loss: {} | accuracy {}'.format(loss_val, acc_val))

Final model train loss: 1.8943831761144034 | accuracy 0.6544899210816268
Final model val loss: 5.754765331013623 | accuracy 0.02405498255452047


Sorry for the impatient interrupt. This is clearly not going anywhere. How surprising?!
It is fun though, but it obviously cannot compare to dozen of conv layers and tons of train images of imagenet models.
Also, I had to scale the image size down quite a bit to fit it into my laptop GPU.
Next time, transfer learning!