In [1]:
import numpy as np 
import pandas as pd

from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
import cv2
from create_model_trang import create_cnn_model_trang
from keras.callbacks import ModelCheckpoint  

from keras.applications import inception_v3
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input as inception_v3_preprocessor

from keras.layers import Dense, GlobalAveragePooling2D, Dropout
from keras.models import Model

from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy

from sklearn.model_selection import train_test_split

from tqdm import tqdm

from os import makedirs
from os.path import expanduser, exists, join

Using TensorFlow backend.


In [2]:
# Use keras pretrained models
# Source: https://www.kaggle.com/gaborfodor/resnet50-example

!ls ../keras-pretrained-models/

cache_dir = expanduser(join('~', '.keras'))
if not exists(cache_dir):
    makedirs(cache_dir)
models_dir = join(cache_dir, 'models')
if not exists(models_dir):
    makedirs(models_dir)
    
!cp ../keras-pretrained-models/*notop* ~/.keras/models/
!cp ../keras-pretrained-models/imagenet_class_index.json ~/.keras/models/
!cp ../keras-pretrained-models/resnet50* ~/.keras/models/

imagenet_class_index.json
inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5
inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
inception_v3_weights_tf_dim_ordering_tf_kernels.h5
inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Kuszma.JPG
resnet50_weights_tf_dim_ordering_tf_kernels.h5
resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
xception_weights_tf_dim_ordering_tf_kernels.h5
xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [3]:
# Set train and test folders
train_folder = '../train/'
test_folder = '../test/'

In [4]:
# Add image paths to the training data
training_data = pd.read_csv('../labels.csv')
training_data['image'] = training_data.apply( lambda x: (train_folder + x["id"] + ".jpg" ), axis=1)
training_data.head(10)

Unnamed: 0,id,breed,image
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull,../train/000bec180eb18c7604dcecc8fe0dba07.jpg
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo,../train/001513dfcb2ffafc82cccf4d8bbaba97.jpg
2,001cdf01b096e06d78e9e5112d419397,pekinese,../train/001cdf01b096e06d78e9e5112d419397.jpg
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick,../train/00214f311d5d2247d5dfe4fe24b2303d.jpg
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever,../train/0021f9ceb3235effd7fcde7f7538ed62.jpg
5,002211c81b498ef88e1b40b9abf84e1d,bedlington_terrier,../train/002211c81b498ef88e1b40b9abf84e1d.jpg
6,00290d3e1fdd27226ba27a8ce248ce85,bedlington_terrier,../train/00290d3e1fdd27226ba27a8ce248ce85.jpg
7,002a283a315af96eaea0e28e7163b21b,borzoi,../train/002a283a315af96eaea0e28e7163b21b.jpg
8,003df8b8a8b05244b1d920bb6cf451f9,basenji,../train/003df8b8a8b05244b1d920bb6cf451f9.jpg
9,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound,../train/0042188c895a2f14ef64a918ed9c7b64.jpg


In [5]:
# Group image paths into an array to train on
train_data = np.array([img_to_array(load_img(img, target_size=(224, 224))) for img in training_data['image'].values.tolist()]).astype('float32')

In [6]:
# Split the data into train and validation. Since we only have train and validation folders, need to divide train into training and validation sets. 
# Save validation folder for later testing
x_train, x_validation, y_train, y_validation = train_test_split(train_data, 
                                                                training_data["breed"], 
                                                                test_size=0.2, 
                                                                stratify=np.array(training_data["breed"]), 
                                                                random_state=1234)

In [7]:
# Need to convert the train and validation labels into one hot encoded format
y_train = pd.get_dummies(y_train.reset_index(drop=True)).as_matrix()
y_validation = pd.get_dummies(y_validation.reset_index(drop=True)).as_matrix()


  
  This is separate from the ipykernel package so we can avoid doing imports until


In [8]:
# Create train generator.
train_datagen = ImageDataGenerator(rescale=1./255, 
                                   width_shift_range=0.2,
                                   height_shift_range=0.2, 
                                   horizontal_flip = True)
train_generator = train_datagen.flow(x_train, y_train, shuffle=False, batch_size=20, seed=100)

In [9]:
# Create validation generator
val_datagen = ImageDataGenerator(rescale = 1./255)
val_generator = train_datagen.flow(x_validation, y_validation, shuffle=False, batch_size=20, seed=100)

In [10]:
# CNN from scratch
#model = create_cnn_model_trang()
#model.compile(loss='categorical_crossentropy',
#              optimizer='adam',
#              metrics=['accuracy'])

#checkpointer = ModelCheckpoint(filepath='out_trang/weights.newbestaugmented.from_scratch.hdf5', 
#                               verbose=1, save_best_only=True)
#batch_size = 20
#model.fit_generator(
#        train_generator,
#        steps_per_epoch=2000// batch_size,
#        epochs=10,
#        validation_data=val_generator,
#        validation_steps=800 // batch_size,
#        callbacks=[checkpointer])

In [None]:
# Get the InceptionV3 model as the base model for training
base_model = InceptionV3(weights = 'imagenet', include_top = False, input_shape=(224, 224, 3))
# Add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# Add a fully-connected layer and a logistic layer 
x = Dense(512, activation='relu')(x)
predictions = Dense(120, activation='softmax')(x)

model = Model(inputs = base_model.input, outputs = predictions)

for layer in base_model.layers:
    layer.trainable = False
    
checkpointer = ModelCheckpoint(filepath='out_trang/weights.pretrained.hdf5', 
                               save_best_only=True)
# Compile with Adam
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train the model
model.fit_generator(train_generator,
                      steps_per_epoch = 409,
                      validation_data = val_generator,
                      validation_steps = 102,
                      epochs = 20,
                      verbose = 2, callbacks=[checkpointer])

Epoch 1/20


In [12]:
# Get the best model
model.load_weights('out_trang/weights.pretrained.hdf5')

In [13]:
# Use the sample submission file to set up the test data - x_test
test_data = pd.read_csv('../sample_submission.csv')
# Create the x_test
x_test = []
for i in test_data['id'].values:
    img = cv2.imread('../test/{}.jpg'.format(i))
    x_test.append(cv2.resize(img, (224, 224)))
# Turn into an array
x_test = np.array(x_test, np.float32) / 255.

In [16]:
x_test.shape


(10357, 224, 224, 3)

In [19]:
# Predict x_test
predictions = model.predict(x_test, verbose=2)
print('Done predicting')
# Set column names to those generated by the one-hot encoding 
one_hot_ys = pd.get_dummies(training_data["breed"], sparse = True)
col_names = one_hot_ys.columns.values
results = pd.DataFrame(predictions, columns = col_names)

Done predicting


In [52]:
results.shape
results.head(10)
#results.values.shape

Unnamed: 0,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,beagle,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,1.855034e-06,0.0003899625,7.63638e-09,2.74897e-08,1.192248e-07,6.799458e-08,3.537692e-07,1.051061e-08,2.096706e-08,9.141769e-08,...,2.850548e-07,1.472437e-07,6.527374e-08,1.205259e-08,1.41284e-08,6.074931e-07,6.972702e-08,4.3717e-07,9.714668e-07,2.783599e-07
1,5.844348e-07,3.415014e-07,8.626504e-07,1.213516e-06,2.020889e-07,1.881012e-07,9.035094e-07,1.902623e-06,1.685907e-06,2.849708e-08,...,1.947458e-06,5.593035e-07,5.469791e-07,3.996846e-07,1.772072e-06,6.270842e-07,0.0001590641,1.449932e-05,2.461447e-06,1.861065e-07
2,1.093235e-06,0.0302701,1.541072e-06,1.470617e-05,1.992116e-06,1.855988e-06,2.051482e-07,2.392317e-08,1.164372e-06,7.504899e-06,...,2.934667e-06,8.39945e-07,1.703894e-05,3.373718e-06,1.524356e-06,0.0001257327,2.353436e-08,0.000424992,0.0008052345,1.177973e-07
3,0.001410973,0.03222752,2.776302e-07,5.451619e-06,0.0001078088,0.0007538999,2.028666e-06,4.33703e-05,3.081246e-06,3.752119e-05,...,6.505972e-05,4.498047e-06,2.984066e-06,3.330762e-07,3.017795e-07,5.640957e-05,4.153397e-06,0.008322868,1.072133e-06,3.802258e-05
4,0.001975308,1.282468e-05,1.181579e-07,1.924403e-08,1.957245e-08,8.614145e-08,9.912486e-06,7.850632e-08,2.753534e-07,4.001824e-07,...,8.210422e-06,8.934e-07,3.057538e-07,2.986014e-06,1.305031e-07,6.09351e-07,5.442112e-05,2.013133e-06,1.370903e-06,2.069501e-05
5,0.0002371156,0.0005845125,2.135876e-05,0.0001529689,3.591492e-06,1.696026e-07,1.58971e-05,4.61848e-07,1.031561e-05,1.862298e-05,...,0.007036803,1.254236e-06,1.008142e-06,1.217722e-06,5.846926e-05,1.943937e-06,5.150525e-06,3.405158e-05,0.001785121,3.214426e-06
6,1.994508e-06,5.28633e-06,4.511207e-07,5.231932e-06,6.751151e-08,7.520045e-07,0.9902549,4.014525e-07,4.430587e-08,7.801223e-08,...,3.989815e-07,5.794698e-07,4.155638e-07,7.209046e-09,5.660057e-08,1.750003e-06,1.452318e-05,2.408398e-06,4.293667e-06,0.001478313
7,6.392227e-08,2.495452e-08,6.009812e-08,5.618078e-08,1.329634e-08,4.311753e-09,5.97257e-08,9.413895e-08,4.749965e-08,1.063564e-09,...,3.316008e-07,6.991888e-09,1.207572e-08,7.381018e-09,5.005636e-08,4.84071e-09,0.000255039,1.15121e-06,1.149855e-07,2.920213e-08
8,1.419716e-07,4.647815e-06,2.63697e-08,8.029388e-06,3.384043e-07,1.129762e-08,7.001245e-08,1.42455e-09,3.4902e-08,1.913898e-07,...,5.659959e-08,1.419156e-08,9.37078e-07,1.247406e-06,4.56633e-07,3.733763e-06,1.528261e-08,0.0005914461,7.002678e-05,2.366444e-08
9,3.502096e-06,0.0003592719,1.783013e-07,2.772834e-06,1.082248e-05,2.023088e-06,4.791134e-07,2.017318e-08,3.956331e-07,7.273132e-06,...,7.794126e-06,1.0618e-07,4.877309e-06,2.476562e-07,1.541909e-06,0.0005754413,5.674919e-06,4.22429e-06,1.445336e-05,2.514354e-06


In [42]:
dog_breed_predictions = results.idxmax(axis=1)
dog_breed_predictions.head(5)


0    japanese_spaniel
1             samoyed
2      english_setter
3                 pug
4               lhasa
dtype: object

In [51]:
test_data.head(10)

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
1,00102ee9d8eb90812350685311fe5890,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
2,0012a730dfa437f5f3613fb75efcd4ce,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
3,001510bc8570bbeee98c8d80c8a95ec1,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
4,001a5f3114548acdefa3d4da05474c2e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
5,00225dcd3e4d2410dd53239f95c0352f,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
6,002c2a3117c2193b4d26400ce431eebd,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
7,002c58d413a521ae8d1a5daeb35fc803,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
8,002f80396f1e3db687c5932d7978b196,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
9,0036c6bcec6031be9e62a257b1c3c442,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
