<a href="https://colab.research.google.com/github/SaumilShah-7/Dog-Breed-Identification-Kaggle/blob/master/Dog_Breed_Identification_Kaggle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
try:
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf
print(tf.__version__)

import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from matplotlib import pyplot
from zipfile import ZipFile
import cv2

In [0]:
from google.colab import files
files.upload()

!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [0]:
!kaggle competitions download -c dog-breed-identification

In [0]:
with ZipFile('train.zip', 'r') as zipobj:
  zipobj.extractall()
with ZipFile('test.zip', 'r') as zipobj:
  zipobj.extractall()
with ZipFile('labels.csv.zip', 'r') as zipobj:
  zipobj.extractall()
with ZipFile('labels.csv.zip', 'r') as zipobj:
  zipobj.extractall()
with ZipFile('sample_submission.csv.zip', 'r') as zipobj:
  zipobj.extractall()

In [0]:
y_df = pd.read_csv('labels.csv')

# print(y_df['breed'].value_counts())

y_breed = np.asarray(y_df['breed']).reshape(-1, 1)
ohe = OneHotEncoder(sparse=False)
y = ohe.fit_transform(y_breed)

labels_mapping = {np.argmax(y[i]): y_df.loc[i,'breed'] for i in range(len(y))}

image_height = 128
image_width = 128

x = []
for i in y_df['id']:
  image = cv2.imread('/content/train/'+i+'.jpg')
  x.append(cv2.resize(image, (image_height, image_width)))
x = np.asarray(x)

In [0]:
from google.colab.patches import cv2_imshow

cv2_imshow(x[1])
print(x[1].shape, y_breed[1])

In [0]:
mean = np.mean(x, axis=0)
std_dev = np.std(x, axis=0)

x = x.astype(np.float64)
x-=mean
x/=std_dev

In [0]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3, 3), input_shape=(image_width, image_height, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())

model.add(Dense(units=500, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(units=120, activation='softmax'))

print(model.summary())

In [0]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)
mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

history = model.fit(x, y, validation_split=0.2, epochs=300, batch_size=1500, callbacks=[es, mc])
saved_model = load_model('best_model.h5')

pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()