### Dog breed classifier

In [None]:
import numpy as np 
import pandas as pd
from pathlib import Path
import os.path
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import os
import cv2

In [None]:
# Create a list with the filepaths for training and testing
train_img_Path = '../input/dog-breed-identification/train'

test_img_Path = '../input/dog-breed-identification/test'

labels = pd.read_csv(r'../input/dog-breed-identification/labels.csv')

In [None]:
labels.head()

* this dataset have 120 dogs breeds but in the given task predict only on 10 breeds.

In [None]:
# collecting given dog breeds data 
beagle = labels[labels['breed'] == 'beagle']
chihuahua = labels[labels['breed'] == 'chihuahua'] 
doberman = labels[labels['breed'] == 'doberman']
french_bulldog = labels[labels['breed'] == 'french_bulldog' ] 
golden_retriever = labels[labels['breed'] == 'golden_retriever'] 
malamute = labels[labels['breed'] == 'malamute' ]
pug = labels[labels['breed'] =='pug' ]
saint_bernard = labels[labels['breed'] == 'saint_bernard']
scottish_deerhound = labels[labels['breed'] == 'scottish_deerhound']
tibetan_mastiff = labels[labels['breed'] == 'tibetan_mastiff' ]

In [None]:
# saving it to target
target = pd.concat([beagle,chihuahua,doberman,french_bulldog,golden_retriever,malamute,pug,saint_bernard,scottish_deerhound,tibetan_mastiff],ignore_index = True)
target

In [None]:
print(f'Number of pictures in the training dataset: {target.shape[0]}\n')
print(f'Number of different labels: {len(target.breed.unique())}\n')
print(f'Labels: {target.breed.unique()}')

In [None]:
target['breed'].value_counts()

In [None]:
target['id'] = target['id'] + '.jpg'

In [None]:
plt.figure(figsize=(20,40))
i=1
for idx,s in target.head(6).iterrows():
    img_path = os.path.join(train_img_Path,s['id'])
    img=cv2.imread(img_path)
    img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    fig=plt.subplot(6,2,i)
    fig.imshow(img)
    fig.set_title(s['breed'])
    i+=1

In [None]:
#Extracting different classes
dog_breeds = sorted(target['breed'].unique())
n_classes = len(dog_breeds)
print(n_classes)

In [None]:
#Converting classes to numbers
class_to_num = dict(zip(dog_breeds,range(n_classes)))

In [None]:
#Function to load and convert images to array
from keras.preprocessing.image import load_img
from keras.utils import to_categorical

def images_to_array(data_dir,df,image_size):
    image_names = df['id']
    image_labels = df['breed']
    data_size = len(image_names)
    
    X = np.zeros([data_size,image_size[0],image_size[1],image_size[2]],dtype = np.uint8)
    y = np.zeros([data_size,1],dtype = np.uint8)
    
    for i in range(data_size):
        img_name = image_names[i]
        img_dir = os.path.join(data_dir,img_name)
        img_pixels = load_img(img_dir,target_size=image_size)
        X[i] = img_pixels
        y[i] = class_to_num[image_labels[i]]
        
    y = to_categorical(y)
    ind = np.random.permutation(data_size)
    X = X[ind]
    y = y[ind]
    print('Ouptut Data Size: ', X.shape)
    print('Ouptut Label Size: ', y.shape)
    return X, y  

In [None]:
#Selecting image size according to pretrained models
img_size = (299,299,3)
X, y = images_to_array(train_img_Path,target,img_size)

In [None]:
from keras.models import Model
from keras.layers import BatchNormalization, Dense, GlobalAveragePooling2D,Lambda, Dropout, InputLayer, Input

def get_features(model_name, data_preprocessor, input_size, data):
    #Prepare pipeline.
    input_layer = Input(input_size)
    preprocessor = Lambda(data_preprocessor)(input_layer)
    
    base_model = model_name(weights='../input/d/aeryss/keras-pretrained-models/ResNet50_NoTop_ImageNet.h5',
                            include_top=False,
                            input_shape=input_size)(preprocessor)
    
    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs = input_layer, outputs = avg)
    
    #Extract feature.
    feature_maps = feature_extractor.predict(data, batch_size=32, verbose=1)
    print('Feature maps shape: ', feature_maps.shape)
    return feature_maps


In [None]:
#Extracting features using ResNet50
from keras.applications.resnet import ResNet50, preprocess_input
ResNet50_preprocessor = preprocess_input
resnet50_features = get_features(ResNet50,
                                  ResNet50_preprocessor,
                                  img_size, X)

In [None]:
#Callbacks
from keras.callbacks import EarlyStopping
EarlyStop_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
my_callback=[EarlyStop_callback]

In [None]:
#Adding the final layers to the above base models where the actual classification is done in the dense layers
#Building Model
from keras.models import Sequential
model = Sequential()
model.add(InputLayer(resnet50_features.shape[1:]))
model.add(Dropout(0.7))
model.add(Dense(10,activation='softmax'))

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

# Training the CNN on the Train features and evaluating it on the val data
history = model.fit(resnet50_features,y,validation_split=0.1,callbacks=my_callback, epochs = 50, batch_size=32)

In [None]:
#Converting test images to array
def images_to_array2(data_dir,df, img_size):
    images_names = df['id']
    data_size = len(images_names)
    X = np.zeros([data_size, img_size[0], img_size[1], 3], dtype=np.uint8)
    
    for i in range(data_size):
        image_name = images_names[i]
        img_dir = os.path.join(data_dir, image_name+'.jpg')
        img_pixels = load_img(img_dir, target_size=img_size)
        X[i] = img_pixels
        
    print('Ouptut Data Size: ', X.shape)
    return X

In [None]:
# creating a dummy submission file
submission = pd.DataFrame({'id': sample_submission['id'],
                          'beagle':0.3333,
                           'chihuahua':0.3333,
                           'doberman' :0.3333,
                           'french_bulldog' :0.3333,
                           'golden_retriever':0.3333,
                           'malamute' :0.3333,
                           'pug' :0.3333,
                           'saint_bernard':0.3333,
                           'scottish_deerhound' :0.3333,
                           'tibetan_mastiff':0.3333})
submission

In [None]:
test_data = images_to_array2(test_img_Path, submission, img_size)

In [None]:
#Extract test data features.
resnet_features = get_features(ResNet50,ResNet50_preprocessor, img_size, test_data)

In [None]:
y_pred = model.predict(resnet_features, batch_size=32)

In [None]:
for breed in dog_breeds:
    submission[breed] = y_pred[:,class_to_num[breed]]
submission.to_csv('pred.csv', index=None)
submission

In [None]:
# saving model for further use
model.save('dogs_breed.h5')