In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense, Convolution2D, MaxPooling2D , Flatten , Dropout 
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_accuracy, top_k_categorical_accuracy, categorical_crossentropy
from sklearn.metrics import classification_report,confusion_matrix
import tensorflow as tf
import cv2
import os
from os.path import join, exists, expanduser
from os import listdir, makedirs
import numpy as np
from tqdm import tqdm

# Use Keras CNN

## 1. loading the data

In [None]:
lables = pd.read_csv('../input/dog-breed-identification/labels.csv')
print (lables.head(5))
breed_count = lables['breed'].value_counts()
print (breed_count.head())
print (breed_count.shape)

In [None]:
targets = pd.Series(lables['breed'])
one_hot = pd.get_dummies(targets, sparse = True)
one_hot_labels = np.asarray(one_hot)


In [None]:
img_rows=128
img_cols=128
num_channel=1# 3 colour channes

In [None]:
img_1 = cv2.imread('../input/dog-breed-identification/train/000bec180eb18c7604dcecc8fe0dba07.jpg', 0)
plt.title('Original Image')
plt.imshow(img_1)

In [None]:
img_1_resize= cv2.resize(img_1, (img_rows, img_cols)) 
print (img_1_resize.shape)
plt.title('Resized Image')
plt.imshow(img_1_resize)


In [None]:
x_feature = []
y_feature = []

i = 0 # initialisation
for f, img in tqdm(lables.values): # f for format ,jpg
    train_img = cv2.imread('../input/dog-breed-identification/train/{}.jpg'.format(f),0)
    label = one_hot_labels[i]
    train_img_resize = cv2.resize(train_img, (img_rows, img_cols)) 
    x_feature.append(train_img_resize)
    y_feature.append(label)
    i += 1



In [None]:
x_train_data = np.array(x_feature, np.float32) / 255.   # /= 255 for normolisation
print (x_train_data.shape)
x_train_data = np.expand_dims(x_train_data, axis = 3)
print (x_train_data.shape)

In [None]:
y_train_data = np.array(y_feature, np.uint8)
print (y_train_data.shape)

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train_data, y_train_data, test_size=0.2, random_state=2)
print (x_train.shape)
print (x_val.shape)

In [None]:
submission = pd.read_csv('../input/dog-breed-identification/sample_submission.csv')
test_img = submission['id']
print (test_img.head(5))


In [None]:
x_test_feature = []

i = 0 # initialisation
for f in tqdm(test_img.values): # f for format ,jpg
    img = cv2.imread('../input/dog-breed-identification/test/{}.jpg'.format(f), 0)
    img_resize = cv2.resize(img, (img_rows, img_cols)) 
    x_test_feature.append(img_resize)

In [None]:
x_test_data = np.array(x_test_feature, np.float32) / 255. 
print (x_test_data.shape)
x_test_data = np.expand_dims(x_test_data, axis = 3)
print (x_test_data.shape)


In [None]:
model = Sequential()

In [None]:
# retifier ensure the non-linearity in the processing 
model.add(Convolution2D (filters = 64, kernel_size = (4,4),padding = 'Same', 
                         activation ='relu', input_shape = (img_rows, img_cols, num_channel))) 
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Convolution2D (filters = 64, kernel_size = (4,4),padding = 'Same', 
                         activation ='relu')) 
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten()) 
# fully connected ANN 
model.add(Dense(units = 120, activation = 'relu')) 
# output layer
model.add(Dense(units = 120, activation = 'softmax')) 

In [None]:
model.compile(optimizer = 'adam' , loss = "categorical_crossentropy", metrics=["accuracy"]) 
model.summary()

In [None]:
batch_size = 128 
nb_epochs = 2
history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=nb_epochs,
                    verbose=10, 
                    validation_data=(x_val, y_val),
                    initial_epoch=0)

In [None]:
fig, ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

In [None]:
results = model.predict(x_test_data)
prediction = pd.DataFrame(results)

# Set column names to those generated by the one-hot encoding earlier
col_names = one_hot.columns.values
prediction.columns = col_names
# Insert the column id from the sample_submission at the start of the data frame
prediction.insert(0, 'id', submission['id'])

submission = prediction
submission.to_csv('new_submission.csv', index=False)
results