# Downloading data from Kaggle

In [None]:
# Uploading the Kaggle Token
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"aarushisethi12","key":"630fb09863439daca4549bc825f24cb3"}'}

In [None]:
# Installing the kaggle API
!pip install -q kaggle

In [None]:
# Create a kaggle folder
# The kaggle API expects this to be in ~/.kaggle folder

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# Command to avoid warning
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!mkdir dog_dataset
%cd dog_dataset

/content/dog_dataset


In [None]:
!kaggle datasets list -s dogbreedidfromcomp

ref                                title                    size  lastUpdated          downloadCount  voteCount  usabilityRating  
---------------------------------  ----------------------  -----  -------------------  -------------  ---------  ---------------  
catherinehorng/dogbreedidfromcomp  dog-breed-id-from-comp  691MB  2020-06-26 03:09:05           1882          6  0.1764706        


In [None]:
# Download this data
!kaggle datasets download catherinehorng/dogbreedidfromcomp
%cd ..

Downloading dogbreedidfromcomp.zip to /content/dog_dataset
 99% 687M/691M [00:07<00:00, 93.5MB/s]
100% 691M/691M [00:07<00:00, 94.7MB/s]
/content


In [None]:
# Unzip the file and remove unwanted data
!unzip /content/dog_dataset/dogbreedidfromcomp.zip -d dog_dataset

In [None]:
!rm /content/dog_dataset/dogbreedidfromcomp.zip 
!rm /content/dog_dataset/sample_submission.csv

# Importing libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow

import tqdm as tqdm
from keras.preprocessing import image
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam

In [None]:
# view labels
labels_all = pd.read_csv('/content/dog_dataset/labels.csv')

print(labels_all.shape)
labels_all.head()

(10222, 2)


Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [None]:
# Getting the number of unique breeds present
breeds_all = labels_all['breed']
print('Total number of unique breeds:')
print(len(breeds_all.unique()))

Total number of unique breeds:
120


In [None]:
print(breeds_all.value_counts().head())

scottish_deerhound      126
maltese_dog             117
afghan_hound            116
entlebucher             115
bernese_mountain_dog    114
Name: breed, dtype: int64


In [None]:
print(breeds_all.value_counts().tail())

golden_retriever     67
brabancon_griffon    67
komondor             67
eskimo_dog           66
briard               66
Name: breed, dtype: int64


In [None]:
class_names = []
for i in breeds_all.value_counts().head(3).index:
  class_names.append(i)
print(class_names)

['scottish_deerhound', 'maltese_dog', 'afghan_hound']


In [None]:
labels = labels_all[labels_all['breed'].isin(class_names)]

In [None]:
labels.head()

Unnamed: 0,id,breed
9,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound
12,00693b8bc2470375cc744a6391d397ec,maltese_dog
79,01e787576c003930f96c966f9c3e1d44,scottish_deerhound
90,022b34fd8734b39995a9f38a4f3e7b6b,maltese_dog
146,0379145880ad3978f9b80f0dc2c03fba,afghan_hound


In [None]:
labels.reset_index(inplace=True, drop=True)

In [None]:
labels.head()

Unnamed: 0,id,breed
0,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound
1,00693b8bc2470375cc744a6391d397ec,maltese_dog
2,01e787576c003930f96c966f9c3e1d44,scottish_deerhound
3,022b34fd8734b39995a9f38a4f3e7b6b,maltese_dog
4,0379145880ad3978f9b80f0dc2c03fba,afghan_hound


In [None]:
print('Training data after screening: {}'.format(labels.shape[0]))

Training data after screening: 359


# Image preprocessing

In [None]:
# Creating a zero matrix first for initialization
X_data = np.zeros((labels.shape[0], 224, 224, 3), dtype='float32')

# One hot encoding
Y_data = label_binarize(labels['breed'], classes = class_names)

# Reading and converting image into numpy format and then normalizing it

for i in tqdm.tqdm(range(len(labels))):
  img = image.load_img('/content/dog_dataset/train/%s.jpg' % labels['id'][i], target_size=(224, 224))
  img = image.img_to_array(img)
  x = np.expand_dims(img.copy(), axis=0)
  X_data[i] = x/255.0


100%|██████████| 359/359 [00:01<00:00, 238.51it/s]


In [None]:
print(X_data.shape)

(359, 224, 224, 3)


In [None]:
print(Y_data.shape)

(359, 3)


# Model building

In [None]:
# Building the Model
model = Sequential()

model.add(Conv2D(filters = 64, kernel_size = (5,5), activation ='relu', input_shape = (224,224,3)))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters = 32, kernel_size = (3,3), activation ='relu', kernel_regularizer = 'l2'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters = 16, kernel_size = (7,7), activation ='relu', kernel_regularizer = 'l2'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters = 8, kernel_size = (5,5), activation ='relu', kernel_regularizer = 'l2'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(128, activation = "relu", kernel_regularizer = 'l2'))
model.add(Dense(64, activation = "relu", kernel_regularizer = 'l2'))
model.add(Dense(3, activation = "softmax"))

model.compile(loss = 'categorical_crossentropy', optimizer = Adam(0.0001),metrics=['accuracy'])

model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_10 (Conv2D)          (None, 220, 220, 64)      4864      
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 110, 110, 64)     0         
 2D)                                                             
                                                                 
 conv2d_11 (Conv2D)          (None, 108, 108, 32)      18464     
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 54, 54, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_12 (Conv2D)          (None, 48, 48, 16)        25104     
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 24, 24, 16)      

In [34]:
# Splitting the data set into training and testing data sets
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X_data, Y_data, test_size = 0.1)
# Splitting the training data set into training and validation data sets
Xtrain, Xval, Ytrain, Yval = train_test_split(Xtrain, Ytrain, test_size = 0.2)

In [None]:
# Training the model
epochs = 100
batch_size = 128

history = model.fit(Xtrain, Ytrain, batch_size = batch_size, epochs = epochs, validation_data = (Xval, Yval))

Epoch 1/100


In [None]:
# Plot the training history
plt.figure(figsize=(12, 5))
plt.plot(history.history['accuracy'], color='r')
plt.plot(history.history['val_accuracy'], color='b')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(['train', 'val'])

plt.show()

In [None]:
Y_pred = model.predict(Xtest)
score = model.evaluate(Xtest, Ytest)
print('Accuracy over the test set: \n ', round((score[1]*100), 2), '%')

In [None]:
# Plotting image to compare
plt.imshow(Xtest[1,:,:,:])
plt.show()

# Finding max value from predition list and comaparing original value vs predicted
print("Originally : ",labels['breed'][np.argmax(Ytest[1])])
print("Predicted : ",labels['breed'][np.argmax(Ypred[1])])

In [None]:
model.save('dog_breed.h5')