# CNN - Training - Dog Breed Detection

## Librairies

In [1]:
import os
import re
import h5py
import PIL
import cv2
import numpy as np   
import pandas as pd
from tqdm import tqdm
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Input, Dense, Activation, Flatten, Dropout, Lambda, BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.models import Model,load_model,Sequential
from tensorflow.keras.callbacks import TensorBoard,ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler,LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
normalize=0
#num_classes = 10
img_height = 224
img_width = 224

## Monitoring the training

In [3]:
def generate_unique_logpath(logdir, raw_run_name):
    i = 0
    while(True):
        run_name = raw_run_name + "-" + str(i)
        log_path = os.path.join(logdir, run_name)
        if not os.path.isdir(log_path):
            return log_path
        i = i + 1

## Pre-processing

In [4]:
def path_to_tensor(img_path):
    gray = cv2.imread(img_path, 0)
    resized_gray = cv2.resize(gray,(img_height,img_width))
    x = np.array(resized_gray)
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [5]:
train_dir = '../dogFlap/dog-breed-identification/train/'
test_dir = '../dogFlap/dog-breed-identification/test/'
list_train = [train_dir+f for f in os.listdir(train_dir) if re.search('jpg|JPG', f)]
list_test = [test_dir+f for f in os.listdir(test_dir) if re.search('jpg|JPG', f)]
print(list_train[0:4])
print()
print(list_test[0:4])

['../dogFlap/dog-breed-identification/train/000bec180eb18c7604dcecc8fe0dba07.jpg', '../dogFlap/dog-breed-identification/train/001513dfcb2ffafc82cccf4d8bbaba97.jpg', '../dogFlap/dog-breed-identification/train/001cdf01b096e06d78e9e5112d419397.jpg', '../dogFlap/dog-breed-identification/train/00214f311d5d2247d5dfe4fe24b2303d.jpg']

['../dogFlap/dog-breed-identification/test/000621fb3cbb32d8935728e48679680e.jpg', '../dogFlap/dog-breed-identification/test/00102ee9d8eb90812350685311fe5890.jpg', '../dogFlap/dog-breed-identification/test/0012a730dfa437f5f3613fb75efcd4ce.jpg', '../dogFlap/dog-breed-identification/test/001510bc8570bbeee98c8d80c8a95ec1.jpg']


In [6]:
data = pd.read_csv('../dogFlap/dog-breed-identification/labels.csv')
data.head(5)

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [7]:
train_labels = data.iloc[:,1].values
train_labels.shape[0]

10222

In [8]:
dog_names = data.groupby("breed").count()
dog_names = dog_names.rename(columns = {"id" : "count"})
dog_names = dog_names.sort_values("count", ascending=False)
dog_names.head()

Unnamed: 0_level_0,count
breed,Unnamed: 1_level_1
scottish_deerhound,126
maltese_dog,117
afghan_hound,116
entlebucher,115
bernese_mountain_dog,114


In [9]:
print(len(list_train))
print(len(list_test))
print(len(dog_names))
print('train_labels.shape',train_labels.shape)

10222
10357
120
train_labels.shape (10222,)


In [10]:
train_tensors = paths_to_tensor(list_train)

100%|███████████████████████████████████████████████████████████████████████████| 10222/10222 [00:20<00:00, 495.86it/s]


In [11]:
X_train, X_test, y_train, y_test = train_test_split(train_tensors,train_labels,test_size=0.30,random_state=42)

In [13]:
num_train  = X_train.shape[0]
num_test   = X_test.shape[0]

img_height = X_train.shape[1]
img_width  = X_train.shape[2]
X_train = X_train.reshape(num_train, img_width , img_height,1)
X_test  = X_test.reshape(num_test, img_width , img_height,1)

y_train = np.array(y_train)
label_encoder_train = LabelEncoder()
y_train = label_encoder_train.fit_transform(y_train)
y_train = to_categorical(y_train)
print(y_train.shape[0])
y_test = np.array(y_test)
label_encoder_test = LabelEncoder()
y_test = label_encoder_test.fit_transform(y_test)
y_test = to_categorical(y_test)
print(y_test.shape[0])

7155
3067


## Normalization

In [14]:
if normalize :
    scaler = MinMaxScaler()
    scaler.fit(X_train)
    X_train=scaler.transform(X_train)
    X_test=scaler.transform(X_test)

## Construction of the network

In [15]:
#--input layer
xi = Input(shape=(224,224,1)) 

#normalization layer
mean = X_train.mean(axis=0)
std = X_train.std(axis=0) + 1e-5
x = Lambda(lambda image, mu, std: (image - mu) / std,arguments={'mu': mean, 'std': std})(xi)

#--hidden layer1
x = Conv2D(filters=16,
           kernel_size=5,
           strides=1,#shift of the filter
           padding='same')(x)
x = Activation('relu')(x)    
x = MaxPooling2D(pool_size=2,strides=2)(x)

#hidden layer 2
x = Conv2D(filters=32,
          kernel_size=5,
           strides=1,#shift of the filter
           padding='same')(x)
x = Activation('relu')(x)    
x = MaxPooling2D(pool_size=2,strides=2)(x)

#hidden layer 3
x = Conv2D(filters=32,
          kernel_size=5,
           strides=1,#shift of the filter
           padding='same')(x)
x = Activation('relu')(x)    
x = MaxPooling2D(pool_size=2,strides=2)(x)

#hidden layer 4
x = Dense(128)(x)
x = Activation('relu')(x)

#hidden layer 5
x = Dense(64)(x)
x = Activation('relu')(x)

#--output layer
x = Flatten()(x)
xo = Dense(len(dog_names))(x)
yo = Activation('softmax')(xo)

model = Model(inputs=[xi], outputs=[yo])

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 1)]     0         
_________________________________________________________________
lambda (Lambda)              (None, 224, 224, 1)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 224, 224, 16)      416       
_________________________________________________________________
activation (Activation)      (None, 224, 224, 16)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 112, 112, 32)      12832     
_________________________________________________________________
activation_1 (Activation)    (None, 112, 112, 32)      0     

In [16]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [17]:
run_name = "linear"
logpath = generate_unique_logpath(".\logs_linear", run_name)
print(logpath)
tbcb = TensorBoard(log_dir=logpath)
checkpoint_filepath = os.path.join(logpath,  "best_model.h5")
checkpoint_cb = ModelCheckpoint(checkpoint_filepath, save_best_only=True)

.\logs_linear\linear-2


In [18]:
model.fit(X_train, y_train,
          batch_size=128,
          epochs=20,
          verbose=1,
          validation_split=0.1,
          callbacks=[tbcb,checkpoint_cb])

Train on 6439 samples, validate on 716 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x25c7e449948>

In [19]:
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 11.110258739858253
Test accuracy: 0.03488751
