In [46]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
import cv2

from keras.applications import inception_v3
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input as inception_v3_preprocessor


from keras.applications.vgg16 import VGG16

from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model

from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy

from sklearn.model_selection import train_test_split

from tqdm import tqdm

from os import makedirs
from os.path import expanduser, exists, join



In [84]:
!ls ./keras-pretrained-models/

cache_dir = expanduser(join('~', '.keras'))
if not exists(cache_dir):
    makedirs(cache_dir)
models_dir = join(cache_dir, 'models')
if not exists(models_dir):
    makedirs(models_dir)
    
!cp ./keras-pretrained-models/*notop* ~/.keras/models/
!cp ./keras-pretrained-models/imagenet_class_index.json ~/.keras/models/
!cp ./keras-pretrained-models/resnet50* ~/.keras/models/

[35mKuszma.JPG[m[m
[35mimagenet_class_index.json[m[m
[35minception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5[m[m
[35minception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5[m[m
[35minception_v3_weights_tf_dim_ordering_tf_kernels.h5[m[m
[35minception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5[m[m
[35mresnet50_weights_tf_dim_ordering_tf_kernels.h5[m[m
[35mresnet50_weights_tf_dim_ordering_tf_kernels_notop.h5[m[m
[35mvgg16_weights_tf_dim_ordering_tf_kernels_notop.h5[m[m
[35mxception_weights_tf_dim_ordering_tf_kernels.h5[m[m
[35mxception_weights_tf_dim_ordering_tf_kernels_notop.h5[m[m


## Prepare data

In [3]:
# load train dataframe
data_frame = pd.read_csv('./dog-breed-identification/labels.csv')

In [4]:
labels_df = data_frame['breed']
labels = labels_df.values

print(labels.shape)

(10222,)


In [5]:
# One hot code the labels - need this for the model
one_hot = pd.get_dummies(labels, sparse = True)
one_hot_labels = np.asarray(one_hot)

In [6]:
# add the actual path name of the pics to the data set
data_frame['image_path'] = data_frame.apply( lambda x: ('./dog-breed-identification/train/' + x["id"] + ".jpg" ), axis=1)
data_frame.head()

Unnamed: 0,id,breed,image_path
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull,./dog-breed-identification/train/000bec180eb18...
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo,./dog-breed-identification/train/001513dfcb2ff...
2,001cdf01b096e06d78e9e5112d419397,pekinese,./dog-breed-identification/train/001cdf01b096e...
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick,./dog-breed-identification/train/00214f311d5d2...
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever,./dog-breed-identification/train/0021f9ceb3235...


In [7]:
# Convert the images to arrays which is used for the model. Inception uses image sizes of 299 x 299
train_data = np.array([img_to_array(load_img(img, target_size=(299, 299))) for img in data_frame['image_path'].values.tolist()]).astype('float32')

In [8]:
# Split the data into train and validation. The stratify parm will insure  train and validation  
# will have the same proportions of class labels as the input dataset.
x_train, x_validation, y_train, y_validation = train_test_split(train_data, labels, test_size=0.2, stratify=np.array(labels), random_state=100)

In [9]:
# Need to know how many rows in each of the train/test split so we can 
# calculate steps_per_epoch and validatoin_steps for the model.fit_generator
print ('x_train shape = ', x_train.shape)
print ('x_validation shape = ', x_validation.shape)


x_train shape =  (8177, 299, 299, 3)
x_validation shape =  (2045, 299, 299, 3)


In [13]:
# Need to convert the train and validation labels into one hot encoded format
y_train = pd.get_dummies(y_train).as_matrix()
y_validation = pd.get_dummies(y_validation).as_matrix()

In [14]:
# Create train generator.
train_datagen = ImageDataGenerator(rescale=1./255, 
                                   rotation_range=30, 
                                   # zoom_range = 0.3, 
                                   width_shift_range=0.2,
                                   height_shift_range=0.2, 
                                   horizontal_flip = 'true')
train_generator = train_datagen.flow(x_train, y_train, shuffle=False, batch_size=10, seed=10)

In [15]:
# Create validation generator
val_datagen = ImageDataGenerator(rescale = 1./255)
val_generator = train_datagen.flow(x_validation, y_validation, shuffle=False, batch_size=10, seed=10)

## Model definition

In [85]:
# Get the InceptionV3 model so we can do transfer learning
base_model = InceptionV3(weights = 'imagenet', include_top = False, input_shape=(299, 299, 3))

In [86]:
# Add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

In [87]:
# Add a fully-connected layer and a logistic layer with 20 classes 
#(there will be 120 classes for the final submission)
x = Dense(512, activation='relu')(x)
predictions = Dense(120, activation='softmax')(x)

In [88]:
# The model we will train
model = Model(inputs = base_model.input, outputs = predictions)

In [89]:
# first: train only the top layers i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

In [90]:
# Compile with Adam
model.compile(Adam(lr=.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [91]:
# Train the model
train_history = model.fit_generator(train_generator,
                      steps_per_epoch = 175,
                      validation_data = val_generator,
                      validation_steps = 44,
                      epochs = 10)

Epoch 1/10

KeyboardInterrupt: 