In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from os import listdir
from os.path import join

from keras.preprocessing import image
from keras.utils import to_categorical, plot_model

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

Using TensorFlow backend.


In [2]:
def read_img(img_id, train_or_test, size=None):
    """Read and resize image.
    # Arguments
        img_id: string
        train_or_test: string 'train' or 'test'.
        size: resize the original image.
    # Returns
        Image as numpy array.
    """
    img = image.load_img(join(data_dir, train_or_test, '%s.jpg' % img_id), target_size=size)
    img = image.img_to_array(img)
    return img

In [3]:
INPUT_SIZE = 150
NUM_CLASSES = 120
SEED = 1993
np.random.seed(seed=SEED)
data_dir = '../data'
labels = pd.read_csv(join(data_dir, 'labels.csv'))
sample_submission = pd.read_csv(join(data_dir, 'sample_submission.csv'))
print(len(listdir(join(data_dir, 'train'))), len(labels))
print(len(listdir(join(data_dir, 'test'))), len(sample_submission))

print("We should have {} clasess and the train data has {} classes."
      .format(NUM_CLASSES, len(labels.groupby('breed').count())))

labels_index = {label:i for i,label in enumerate(np.unique(labels.breed))}
labels_temp = [labels_index[label] for label in labels.breed]
labels_onehot_encoded = to_categorical(labels_temp ,num_classes=120)


10222 10222
10357 10357
We should have 120 clasess and the train data has 120 classes.


In [5]:
train_x = []
train_y = []
for i, img_id in enumerate(labels.id):
    train_x.append(read_img(img_id, 'train', size=(INPUT_SIZE, INPUT_SIZE)))
    train_y.append(labels_onehot_encoded[i])

train_x = np.asarray(train_x)
train_y = np.asarray(train_y)
print("train_x shape {}".format(train_x.shape))
print("train_y shape {}".format(train_y.shape))

train_x shape (10222, 150, 150, 3)
train_y shape (10222, 120)


In [16]:
# Setup model
model = Sequential()
model.add(Conv2D(22, (3, 3), input_shape=(INPUT_SIZE, INPUT_SIZE, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(22, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(22, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(140))
model.add(Activation('relu'))
model.add(Dropout(0.8))
model.add(Dense(120, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

plot_model(model)

In [18]:
# Train
model.fit(train_x, train_y,
          batch_size=100,
          epochs=100,
          verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100

KeyboardInterrupt: 