In [0]:
#Understand the basic Image Classification pipeline and the data-driven
#approach (train/predict stages)
#● Data fetching and understand the train/val/test splits.
#● Implement and apply an optimal k-Nearest Neighbor (kNN) classifier (7.5
#points)
#● Print the classification metric report (2.5 points)
#● Implement and apply a deep neural network classifier including (feedforward neural network, RELU activations) (5 points)
#● Understand and be able to implement (vectorized) backpropagation (cost stochastic gradient descent, cross entropy loss, cost functions) (2.5 points)
#● Implement batch normalization for training the neural network (2.5 points)

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import h5py
from keras.utils import to_categorical
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Reshape
from keras.layers import Convolution2D, MaxPooling2D, BatchNormalization
from keras.optimizers import SGD
from keras.backend import clear_session
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


In [2]:
from google.colab import drive
drive.mount('/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive


In [0]:
SVHN_single_grey1 = pd.read_csv('/gdrive/My Drive/Data Science/SVHN_single_grey1.h5')

In [0]:
images_of_numbers_file = h5py.File("/X_test = np.array(images_of_numbers_file['X_test'])
list_of_keys = list(images_of_numbers_file.keys())
list_of_keys

In [0]:
X_train = images_of_numbers_file['X_train']
X_val = images_of_numbers_file['X_val']
y_test = images_of_numbers_file['y_test']
y_train = images_of_numbers_file['y_train']
y_val = images_of_numbers_file['y_val']

In [0]:
# Converting data into arrays and then, normalizing image data from 0-255 to 0-1
X_train = np.array(X_train)
X_test = np.array(X_test)
X_val = np.array(X_val)
X_train = X_train/255
X_test = X_test/255
X_val = X_val/255

In [0]:
# Data flatenning for KNN classifier
X_train_reshaped = np.reshape(X_train,(42000, 32*32))
X_test_reshaped = np.reshape(X_test,(18000, 32*32))
#Trying to find the right value of k:
list_opt = np.arange(1, 51)
accuracies = []
for Ks in list_opt:
 model_knn = KNeighborsClassifier(n_neighbors = Ks, algorithm = 'brute')
 model_knn = model_knn.fit(X_train_reshaped, y_train)

 # Evaluate the model and update the accuracies list
 y_pred = model_knn.predict(X_test_reshaped)
 acc_score = accuracy_score(y_test, y_pred)
 print(acc_score)
 accuracies.append(acc_score)

In [0]:
i = int(np.argmax(accuracies))
print("k=", list_opt[i], "achieved highest accuracy of", accuracies[i] * 100, "on validation data")

In [0]:
# Our final KNN model with k = 1
model_knn_2 = KNeighborsClassifier(n_neighbors = 47, algorithm = 'brute')
model_knn_2 = model_knn_2.fit(X_train_reshaped, y_train)
y_pred_2 = model_knn_2.predict(X_test_reshaped)


In [0]:
# The classification report is as follows:
class_report = classification_report(y_test, y_pred_2)
print(class_report)

In [0]:
accuracy = accuracy_score(y_test, y_pred_2)
c_matrix = confusion_matrix(y_test, y_pred_2)

In [0]:
# Converting labels to one hot vectors
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_val = to_categorical(y_val)


In [0]:
# Deep neural network
model_dnn = Sequential()
# 1st hidden layer
model_dnn.add(Dense(200, activation='relu'))
# 2nd hidden layer
model_dnn.add(Dense(100, activation='relu'))
# Output layer
model_dnn.add(Dense(10, activation='softmax'))
# Loss and Optimizer
model_dnn.compile(loss='categorical_crossentropy', optimizer='SGD', metrics=['accuracy'])
#Reshape data from 2D to 1D -> 32x32 to 1024
model_dnn.add(Reshape((1024,),input_shape=(32,32,)))
# 4th hidden layer
model_dnn.add(Dense(30, activation='relu'))
# 3rd hidden layer
model_dnn.add(Dense(60, activation='relu'))
# Dropout layer
model_dnn.add(Dropout(0.25))

In [0]:
# Training the model
model_dnn.fit(X_train, y_train, batch_size=32, nb_epoch=50, validation_data=(X_test, y_test))

In [0]:
model_dnn.summary()

In [0]:
# Clearing out tensorflow memory
clear_session()
# Define deep neural network Model
model_dnn_2 = Sequential()
#Reshape data from 2D to 1D -> 32x32 to 1024
model_dnn_2.add(Reshape((1024,),input_shape=(32,32,)))
# 1st hidden layer
model_dnn_2.add(Dense(200, activation='relu'))
# 2nd hidden layer
model_dnn_2.add(Dense(100, activation='relu'))
# Dropout layer
model_dnn_2.add(Dropout(0.25))
# 3rd hidden layer
model_dnn_2.add(Dense(60, activation='relu'))
# 4th hidden layer
model_dnn_2.add(Dense(30, activation='relu'))
# Output layer
model_dnn_2.add(Dense(10, activation='softmax'))
# Loss and Optimizer
sgd_optimizer = SGD(lr = 0.05)
model_dnn_2.compile(loss='categorical_crossentropy', optimizer=sgd_optimizer, metrics=['accuracy'])


In [0]:
# Training the model
model_dnn_2.fit(X_train, y_train, batch_size=32, nb_epoch=50, validation_data=(X_test, y_test))

In [0]:
model_dnn_2.summary()

In [0]:
# Define deep neural network Model
model_dnn_3 = Sequential()
#Reshape data from 2D to 1D -> 32x32 to 1024
model_dnn_3.add(Reshape((1024,),input_shape=(32,32,)))
#Normalize the data
model_dnn_3.add(BatchNormalization())
# 1st hidden layer
model_dnn_3.add(Dense(200, activation='relu'))
model_dnn_3.add(BatchNormalization())
# 2nd hidden layer
model_dnn_3.add(Dense(100, activation='relu'))
model_dnn_3.add(BatchNormalization())
# Dropout layer
model_dnn_3.add(Dropout(0.25))
# 3rd hidden layer
model_dnn_3.add(Dense(60, activation='relu'))
model_dnn_3.add(BatchNormalization())
# 4th hidden layer
model_dnn_3.add(Dense(30, activation='relu'))
model_dnn_3.add(BatchNormalization())
# Output layer
model_dnn_3.add(Dense(10, activation='softmax'))
# Loss and Optimizer
model_dnn_3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Training the model
model_dnn_3.fit(X_train, y_train, batch_size=32, nb_epoch=50, validation_data=(X_test, y_test))

In [0]:
# Prediction
y_pred_3 = model_dnn_3.predict(X_test)

In [0]:
# The classification report is as follows:
class_report = classification_report(y_test.argmax(axis=1), y_pred_3.argmax(axis=1))

In [0]:
# Also, finding the accuracy and the confusion matrix to evaluate the model:
accuracy = accuracy_score(y_test.argmax(axis=1), y_pred_3.argmax(axis=1))
c_matrix = confusion_matrix(y_test.argmax(axis=1), y_pred_3.argmax(axis=1))
print("The accuracy is", round(accuracy*100, 2), "% and the confusion matrix is\n", c_matrix)

In [0]:
#The accuracy of the KNN model is coming out to be 51.33%, whereas, the accuracy of the deep neural network model is coming out to be
#84.2%.
#The accuracy of KNN model is very poor as compared to the accuracy of the deep neural networks model.
#The advantage of KNN model is that we do not need to do hyperparameter tuning. NN needs a lot of hyperparameter tuning compared to KNN.
#KNN didn't work with large data. NN needs large training data to achieve an ecient accuracy.
#In this case, we will get large computation cost for KNN model during runtime.
#From the above classication metrics, we can see that KNN is denitely not a good choice for this image classication model and the use of
#DNN model is truly justied.


In [0]:
images_of_numbers_file.close()