In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
import cv2

from tensorflow.keras.applications import inception_v3
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input as inception_v3_preprocessor


from tensorflow.keras.applications.vgg16 import VGG16

from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy

from sklearn.model_selection import train_test_split

from tqdm import tqdm

from os import makedirs
from os.path import expanduser, exists, join



In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 16725568843623660130
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1456137830
locality {
  bus_id: 1
  links {
  }
}
incarnation: 1868229668028620689
physical_device_desc: "device: 0, name: GeForce GTX 770, pci bus id: 0000:01:00.0, compute capability: 3.0"
]


In [3]:
!ls ./keras-pretrained-models/

cache_dir = expanduser(join('~', '.keras'))
if not exists(cache_dir):
    makedirs(cache_dir)
models_dir = join(cache_dir, 'models')
if not exists(models_dir):
    makedirs(models_dir)
    
!cp ./keras-pretrained-models/*notop* ~/.keras/models/
!cp ./keras-pretrained-models/imagenet_class_index.json ~/.keras/models/
!cp ./keras-pretrained-models/resnet50* ~/.keras/models/

'ls' n’est pas reconnu en tant que commande interne
ou externe, un programme exécutable ou un fichier de commandes.
'cp' n’est pas reconnu en tant que commande interne
ou externe, un programme exécutable ou un fichier de commandes.
'cp' n’est pas reconnu en tant que commande interne
ou externe, un programme exécutable ou un fichier de commandes.
'cp' n’est pas reconnu en tant que commande interne
ou externe, un programme exécutable ou un fichier de commandes.


## Prepare data

In [4]:
# load train dataframe
data_frame = pd.read_csv('./dog-breed-identification/labels.csv')

In [5]:
# Get the top 20 breeds which is what we use in this notebook
top_breeds = sorted(list(data_frame['breed'].value_counts().head(20).index))
data_frame = data_frame[data_frame['breed'].isin(top_breeds)]

In [6]:
print(top_breeds)
data_frame.shape

['afghan_hound', 'airedale', 'australian_terrier', 'basenji', 'beagle', 'bernese_mountain_dog', 'blenheim_spaniel', 'cairn', 'entlebucher', 'great_pyrenees', 'irish_wolfhound', 'japanese_spaniel', 'leonberg', 'maltese_dog', 'miniature_pinscher', 'pomeranian', 'samoyed', 'scottish_deerhound', 'shih-tzu', 'tibetan_terrier']


(2184, 2)

In [7]:
# Get the labels of the top 20
target_labels = data_frame['breed']

In [8]:
# One hot code the labels - need this for the model
one_hot = pd.get_dummies(target_labels, sparse = True)
one_hot_labels = np.asarray(one_hot)

In [9]:
# add the actual path name of the pics to the data set
data_frame['image_path'] = data_frame.apply( lambda x: ('./dog-breed-identification/train/' + x["id"] + ".jpg" ), axis=1)
data_frame.head(21)

Unnamed: 0,id,breed,image_path
8,003df8b8a8b05244b1d920bb6cf451f9,basenji,./dog-breed-identification/train/003df8b8a8b05...
9,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound,./dog-breed-identification/train/0042188c895a2...
12,00693b8bc2470375cc744a6391d397ec,maltese_dog,./dog-breed-identification/train/00693b8bc2470...
29,00bee065dcec471f26394855c5c2f3de,cairn,./dog-breed-identification/train/00bee065dcec4...
48,013f8fdf6d638c7bb042f5f17e8a9fdc,tibetan_terrier,./dog-breed-identification/train/013f8fdf6d638...
57,0162b3e0144fb1d1ab82fbff3ace9938,shih-tzu,./dog-breed-identification/train/0162b3e0144fb...
63,0192d7174233ebdb3d720000413829ac,australian_terrier,./dog-breed-identification/train/0192d7174233e...
70,01b36cb1b80ab8c3a7d2b7128ad21bdc,samoyed,./dog-breed-identification/train/01b36cb1b80ab...
79,01e787576c003930f96c966f9c3e1d44,scottish_deerhound,./dog-breed-identification/train/01e787576c003...
80,01ee3c7ff9bcaba9874183135877670e,entlebucher,./dog-breed-identification/train/01ee3c7ff9bca...


In [10]:
# Convert the images to arrays which is used for the model. Inception uses image sizes of 299 x 299
train_data = np.array([img_to_array(load_img(img, target_size=(299, 299))) for img in data_frame['image_path'].values.tolist()]).astype('float32')

In [11]:
train_data

array([[[[137., 146.,  79.],
         [127., 136.,  69.],
         [124., 135.,  67.],
         ...,
         [110., 120.,  57.],
         [108., 120.,  58.],
         [ 89., 105.,  43.]],

        [[140., 146.,  82.],
         [124., 130.,  66.],
         [135., 144.,  79.],
         ...,
         [113., 123.,  60.],
         [ 89.,  99.,  38.],
         [ 89., 103.,  42.]],

        [[143., 144.,  86.],
         [151., 155.,  96.],
         [145., 151.,  89.],
         ...,
         [131., 137.,  77.],
         [ 94., 102.,  42.],
         [ 92., 103.,  43.]],

        ...,

        [[144., 166.,  94.],
         [ 75.,  96.,  29.],
         [167., 187., 124.],
         ...,
         [234., 225., 246.],
         [209., 198., 212.],
         [255., 246., 238.]],

        [[ 50.,  74.,   0.],
         [245., 255., 218.],
         [226., 241., 202.],
         ...,
         [154., 133., 112.],
         [ 42.,  25.,   7.],
         [106.,  99.,  91.]],

        [[ 84., 108.,  30.],
       

In [12]:
# Split the data into train and validation. The stratify parm will insure  train and validation  
# will have the same proportions of class labels as the input dataset.
x_train, x_validation, y_train, y_validation = train_test_split(train_data, target_labels, test_size=0.2, stratify=np.array(target_labels), random_state=100)

In [13]:
# Need to know how many rows in each of the train/test split so we can 
# calculate steps_per_epoch and validatoin_steps for the model.fit_generator
print ('x_train shape = ', x_train.shape)
print ('x_validation shape = ', x_validation.shape)


x_train shape =  (1747, 299, 299, 3)
x_validation shape =  (437, 299, 299, 3)


In [14]:
# Need to convert the train and validation labels into one hot encoded format
y_train = pd.get_dummies(y_train).as_matrix()
y_validation = pd.get_dummies(y_validation).as_matrix()

In [15]:
# Create train generator.
train_datagen = ImageDataGenerator(rescale=1./255, 
                                   rotation_range=30, 
                                   # zoom_range = 0.3, 
                                   width_shift_range=0.2,
                                   height_shift_range=0.2, 
                                   horizontal_flip = 'true')
train_generator = train_datagen.flow(x_train, y_train, shuffle=False, batch_size=10, seed=10)

In [16]:
# Create validation generator
val_datagen = ImageDataGenerator(rescale = 1./255)
val_generator = train_datagen.flow(x_validation, y_validation, shuffle=False, batch_size=10, seed=10)

## Model definition

In [17]:
# Get the InceptionV3 model so we can do transfer learning
base_model = InceptionV3(weights = 'imagenet', include_top = False, input_shape=(299, 299, 3))

In [18]:
# Add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

In [19]:
# Add a fully-connected layer and a logistic layer with 20 classes 
#(there will be 120 classes for the final submission)
x = Dense(512, activation='relu')(x)
predictions = Dense(20, activation='softmax')(x)

In [20]:
# The model we will train
model = Model(inputs = base_model.input, outputs = predictions)

In [21]:
# first: train only the top layers i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

In [22]:
# Compile with Adam
model.compile(Adam(lr=.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
train_history = model.fit_generator(train_generator,
                      steps_per_epoch = 175,
                      validation_data = val_generator,
                      validation_steps = 44,
                      epochs = 10,
                      verbose = 2)

Epoch 1/10
175/175 - 133s - loss: 1.9399 - accuracy: 0.5587 - val_loss: 0.5691 - val_accuracy: 0.8879
Epoch 2/10
