In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
%matplotlib inline
import pandas as pd

### Reading the data 

In [2]:
DATA_DIR = '/home/ubuntu/Downloads/train/'
images_path_list = [file_name for file_name in os.listdir(DATA_DIR) if file_name.endswith('jpg')]

In [3]:
im_list = []
for im_ind, im_path in enumerate(images_path_list):
    if im_ind % 1000 ==0:
        print ('at image {} out of {}'.format(im_ind, len(images_path_list)))
    im = cv2.imread(os.path.join(DATA_DIR,im_path))
    im = cv2.cvtColor(im,cv2.COLOR_BGR2RGB)
    im = cv2.resize(im,(64,64))
    im_list.append(im)


at image 0 out of 10222
at image 1000 out of 10222
at image 2000 out of 10222
at image 3000 out of 10222
at image 4000 out of 10222
at image 5000 out of 10222
at image 6000 out of 10222
at image 7000 out of 10222
at image 8000 out of 10222
at image 9000 out of 10222
at image 10000 out of 10222


In [4]:
labels_df = pd.read_csv('/home/ubuntu/Downloads/labels.csv')
labels_list = []
for im_path in images_path_list:
    labels_list.append( (str(labels_df[labels_df['id'] == (im_path[:-4])]['breed'].values[0])) )


In [5]:
X = np.array(im_list)
Y = np.array(labels_list)

In [6]:
labels_dict = dict([ [bread_name,k] for k, bread_name in enumerate(set(labels_list))])
Y = [labels_dict[y] for y in Y]

from keras.utils import np_utils
Y = np_utils.to_categorical(Y)

Using TensorFlow backend.


### Dividing into train and test 

In [7]:
total_samples_num = len(Y)

train_percent = 0.7
val_percent = 0.2
test_percent = 0.1

train_sample_num = int(total_samples_num * train_percent)
val_sample_num = int(total_samples_num * val_percent)
test_sample_num = int(total_samples_num * test_percent)

last_train_index = train_sample_num 
last_val_index = last_train_index + val_sample_num
first_test_index = last_val_index

In [8]:
train_X = X[:last_train_index]
val_X = X[last_train_index:last_val_index]
test_X = X[first_test_index :]

In [9]:
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()
# scaler.fit(train_X)
# scaler.transform(val_X)
# scaler.transform(test_X)

In [10]:
train_Y = Y[:last_train_index]
val_Y = Y[last_train_index:last_val_index]
test_Y = Y[first_test_index :]

In [11]:
train_X = train_X.astype(np.float32)
val_X = val_X.astype(np.float32)
test_X = test_X.astype(np.float32)

train_X = train_X/255.0
val_X = val_X/255.0
test_X = test_X/255.0

In [12]:
from keras.models import Sequential, load_model
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core import Activation, Flatten, Dense, Dropout
from keras.layers import BatchNormalization
from keras.callbacks import EarlyStopping , ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator

In [13]:
data_gen = ImageDataGenerator(horizontal_flip=True, height_shift_range=0.05, width_shift_range=0.05) 
early_stop = EarlyStopping(monitor='val_loss', patience=4,verbose=1)

fBestModel = 'dogs_best_model.h5' 
best_model = ModelCheckpoint(fBestModel, verbose=0, save_best_only=True)

model = Sequential()
model.add(Convolution2D(filters=96, kernel_size=3,padding='valid',
                        input_shape=(64, 64,3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=2,strides=2))

model.add(Convolution2D(filters=96, kernel_size=3,padding='valid'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=2,strides=2))

model.add(Convolution2D(filters=96, kernel_size=3,padding='valid'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=2,strides=2))
model.add(Flatten())
model.add(Activation('relu'))

model.add(Dropout(0.5))
model.add(Dense(120))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=["accuracy"])

model.fit_generator(data_gen.flow( train_X, train_Y ), validation_data=(val_X, val_Y),
                    epochs=100, callbacks=[early_stop,best_model])
# model.fit( train_X, train_Y , validation_data=(val_X, val_Y), callbacks=[early_stop], epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 00012: early stopping


<keras.callbacks.History at 0x7f0ac1faf438>

In [14]:
best_model = load_model('dogs_best_model.h5')

In [15]:
best_model.evaluate(val_X,val_Y)



[4.2850019087296873, 0.083659491237478942]

In [16]:
best_model.evaluate(test_X,test_Y)



[4.3888850580329191, 0.070381231613289699]