In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import keras
from keras.applications.vgg19 import VGG19
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten

import os

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from tqdm import tqdm # helpful for loops
import cv2 # OpenCV


Using TensorFlow backend.


In [2]:
df_train = pd.read_csv('labels.csv')
df_test = pd.read_csv('sample_submission.csv')

In [3]:
df_train.tail()

Unnamed: 0,id,breed
10217,ffd25009d635cfd16e793503ac5edef0,borzoi
10218,ffd3f636f7f379c51ba3648a9ff8254f,dandie_dinmont
10219,ffe2ca6c940cddfee68fa3cc6c63213f,airedale
10220,ffe5f6d8e2bff356e9482a80a6e29aac,miniature_pinscher
10221,fff43b07992508bc822f33d8ffd902ae,chesapeake_bay_retriever


In [4]:
targets_series = pd.Series(df_train['breed'])
one_hot = pd.get_dummies(targets_series, sparse = True)

In [5]:
one_hot_labels = np.asarray(one_hot)

Resize to 90x90

In [6]:
im_size = 90

In [7]:
x_train = []
y_train = []
x_test = []

Create one hot encoded array of breed names for training set and resize images

In [8]:
i = 0
for f, breed in tqdm(df_train.values):
    img = cv2.imread('train/{}.jpg'.format(f))
    label = one_hot_labels[i]
    x_train.append(cv2.resize(img, (im_size, im_size)))
    y_train.append(label)
    i += 1

100%|███████████████████████████████████████████████████████████████████████████| 10222/10222 [01:37<00:00, 104.81it/s]


Do same for test set

In [9]:
for f in tqdm(df_test['id'].values):
    img = cv2.imread('test/{}.jpg'.format(f))
    x_test.append(cv2.resize(img, (im_size, im_size)))

100%|████████████████████████████████████████████████████████████████████████████| 10357/10357 [01:50<00:00, 93.69it/s]


In [10]:
y_train_raw = np.array(y_train, np.uint8)
x_train_raw = np.array(x_train, np.float32) / 255.
x_test  = np.array(x_test, np.float32) / 255.

Check shape

In [14]:
print(x_train_raw.shape)
print(y_train_raw.shape)
print(x_test.shape)

(10222, 90, 90, 3)
(10222, 120)
(10357, 90, 90, 3)


In [15]:
num_class = y_train_raw.shape[1]
num_class

120

120 dog breeds in total

## Create test and training sets

In [16]:
X_train, X_valid, Y_train, Y_valid = train_test_split(x_train_raw, y_train_raw, 
                                    test_size=0.3, random_state=100)

Using vgg19

In [17]:
# Create the base pre-trained model
# Can't download weights in the kernel
base_model = VGG19(#weights='imagenet',
    weights = None, include_top=False, input_shape=(im_size, im_size, 3))

# Add a new top layer
x = base_model.output
x = Flatten()(x)
predictions = Dense(num_class, activation='softmax')(x)

# This is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# First: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy', 
              optimizer='SGD', 
              metrics=['accuracy'])

callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 90, 90, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 90, 90, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 90, 90, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 45, 45, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 45, 45, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 45, 45, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 22, 22, 128)       0         
__________

In [18]:
model.fit(X_train, Y_train, epochs=1, validation_data=(X_valid, Y_valid), 
          verbose=1, batch_size = 10)


Train on 7155 samples, validate on 3067 samples
Epoch 1/1






<keras.callbacks.History at 0x1a51da85dd8>

In [19]:
preds = model.predict(x_test, verbose=1)



array([ 0.00835556,  0.00853121,  0.00836687,  0.00851014,  0.00827073,
        0.0082788 ,  0.00837732,  0.00847739,  0.0083684 ,  0.00847653,
        0.00832976,  0.008464  ,  0.00826276,  0.00841523,  0.00835403,
        0.00834495,  0.00827578,  0.00837536,  0.0082967 ,  0.00836602,
        0.00839202,  0.00830929,  0.00823732,  0.00820819,  0.00821757,
        0.00830929,  0.00842428,  0.00823661,  0.00834666,  0.00824574,
        0.00839891,  0.00833957,  0.00828837,  0.0084256 ,  0.00823529,
        0.00836075,  0.00830028,  0.00833076,  0.00825199,  0.0083774 ,
        0.00835908,  0.0083459 ,  0.00842251,  0.00827303,  0.00825865,
        0.00831328,  0.00824012,  0.00823021,  0.00826993,  0.0082363 ,
        0.00830675,  0.00827994,  0.00848503,  0.00827832,  0.00823001,
        0.00834625,  0.00832453,  0.00832008,  0.00833294,  0.00841315,
        0.0083438 ,  0.00842397,  0.00827949,  0.00832033,  0.00832474,
        0.00821096,  0.00825325,  0.00834115,  0.00839081,  0.00

In [None]:
sub = pd.DataFrame(preds)
# Set column names to those generated by the one-hot encoding earlier
col_names = one_hot.columns.values
sub.columns = col_names
# Insert the column id from the sample_submission at the start of the data frame
sub.insert(0, 'id', df_test['id'])
sub.head(5)