In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import keras
import tensorflow as tf
from keras.datasets import cifar10, mnist
from keras.models import Sequential
from keras.callbacks import Callback
from keras.layers import Reshape, Dense, Activation, Dropout, Flatten, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPool2D
from keras.utils import np_utils

Using TensorFlow backend.


In [0]:
h5f = h5py.File('./drive/My Drive/SVHN_single_grey1.h5', 'r')

In [0]:
BATCH_SIZE = 32
INPUT_SIZE = 32
EPOCHS = 20

input_shape = (INPUT_SIZE,INPUT_SIZE,)

In [5]:
X_train = h5f['X_train'][:]
y_train = h5f['y_train'][:]
X_test = h5f['X_test'][:]
y_test = h5f['y_test'][:]
X_val = h5f['X_val'][:]
y_val = h5f['y_val'][:]

# Close this file
h5f.close()

print('Training set', X_train.shape, y_train.shape)
print('Validation set', X_val.shape, y_val.shape)
print('Test set', X_test.shape, y_test.shape)

Training set (42000, 32, 32) (42000,)
Validation set (60000, 32, 32) (60000,)
Test set (18000, 32, 32) (18000,)


In [6]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

# creating odd list of K for KNN
neighbors = list(range(1,11))


# empty list that will hold cv scores
cv_scores = []
# perform 10-fold cross validation
for k in neighbors:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train[:,:,0], y_train, cv=10, scoring='accuracy')
    cv_scores.append(scores.mean())
    print(f'Score calculated for neigbour {k} = {cv_scores[k-1]}')

Score calculated for neigbour 1 = 0.1082390300614829
Score calculated for neigbour 2 = 0.1147146740993373
Score calculated for neigbour 3 = 0.11611878981198565
Score calculated for neigbour 4 = 0.11666705537254904
Score calculated for neigbour 5 = 0.11673906873978383
Score calculated for neigbour 6 = 0.11521430645624042
Score calculated for neigbour 7 = 0.11623904672046885
Score calculated for neigbour 8 = 0.11723930705436562
Score calculated for neigbour 9 = 0.11866624065703316
Score calculated for neigbour 10 = 0.1179754808713894


In [7]:
# changing to misclassification error
MSE = [1 - x for x in cv_scores]

# determining best k
optimal_k = neighbors[MSE.index(min(MSE))]
print("The optimal number of neighbors is %d" % optimal_k)

The optimal number of neighbors is 9


In [8]:
from sklearn.metrics import accuracy_score

model = KNeighborsClassifier(n_neighbors= optimal_k)
model.fit(X_train[:,:,0], y_train)
y_pred = model.predict(X_test[:,:,0])

# Calculate Accuracy Rate by using accuracy_score()
print("Accuracy Rate, of k={%i} neighbours which is calculated by accuracy_score() is: %f" 
      % (optimal_k,accuracy_score(y_test, y_pred)))

Accuracy Rate, of k={9} neighbours which is calculated by accuracy_score() is: 0.115500


In [9]:
from sklearn.metrics import classification_report

map_characters = {0:'Zero',1:'One',2:'Two',3:'Three',4:'Four',5:'Five',6:'Six',7:'Seven', 8:'Eight',9:'Nine'}
                  
#print('\n',classification_report(np.where(y_test > 0)[1], np.argmax(y_pred, axis=1), target_names=list(map_characters.values())), sep='')    
print('\n',classification_report(y_test, y_pred, target_names=list(map_characters.values())), sep='')


              precision    recall  f1-score   support

        Zero       0.14      0.23      0.17      1814
         One       0.14      0.27      0.18      1828
         Two       0.11      0.14      0.12      1803
       Three       0.10      0.12      0.11      1719
        Four       0.10      0.09      0.09      1812
        Five       0.10      0.08      0.09      1768
         Six       0.10      0.07      0.08      1832
       Seven       0.11      0.06      0.08      1808
       Eight       0.11      0.05      0.07      1812
        Nine       0.09      0.04      0.05      1804

    accuracy                           0.12     18000
   macro avg       0.11      0.12      0.11     18000
weighted avg       0.11      0.12      0.11     18000



In [10]:
# Define Model
model = Sequential()

#Reshape data from 2D to 1D -> 32x32 to 1024
model.add(Reshape((1024,),input_shape=input_shape))

#Normalize the data
model.add(BatchNormalization())

#Add 1st hidden layer
model.add(Dense(200, activation='relu'))


#Add 2nd hidden layer
model.add(Dense(100, activation='relu'))

#Add 3rd hidden layer
model.add(Dense(60, activation='relu'))

#Add 4th hidden layer
model.add(Dense(30, activation='relu'))

#Add OUTPUT layer
model.add(Dense(10, activation='softmax'))


#Compile the model
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

W0818 20:45:15.751008 139712056141696 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0818 20:45:15.768057 139712056141696 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0818 20:45:15.838627 139712056141696 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0818 20:45:15.857213 139712056141696 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0818 20:45:15.913149 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_1 (Reshape)          (None, 1024)              0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 1024)              4096      
_________________________________________________________________
dense_1 (Dense)              (None, 200)               205000    
_________________________________________________________________
dense_2 (Dense)              (None, 100)               20100     
_________________________________________________________________
dense_3 (Dense)              (None, 60)                6060      
_________________________________________________________________
dense_4 (Dense)              (None, 30)                1830      
_________________________________________________________________
dense_5 (Dense)              (None, 10)                310       
Total para

In [11]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_val = tf.keras.utils.to_categorical(y_val, num_classes=10)

# Train the model
model.fit(X_train, y_train, validation_data=(X_val, y_val),epochs=EPOCHS, batch_size=BATCH_SIZE)

W0818 20:45:16.037294 139712056141696 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 42000 samples, validate on 60000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f111bdb06a0>

In [12]:
from sklearn.metrics import accuracy_score

y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

# Predicting the Test set results
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)

print(f'Training Accuracy score of the model = {accuracy_score(y_pred, y_test)}')

Training Accuracy score of the model = 0.7997777777777778


In [0]:
from sklearn.metrics import confusion_matrix


# Calculate the confusion matrix
Y_pred_classes = np.argmax(y_pred,axis = 1) 
Y_true = np.argmax(y_test,axis = 1)
cm = confusion_matrix(y_true=Y_true, y_pred=Y_pred_classes)

In [14]:
print(cm)

[[1695   20    5   15    6    2    8   11    7   45]
 [ 213 1496   22   17   28    1    5   34    3    9]
 [ 198   12 1438   27   13    7    2   47   11   48]
 [ 201   12   26 1341    8   70    7   23    9   22]
 [ 161   32   13   15 1546    3   13    9    1   19]
 [ 201   11    8   67    7 1380   53    7   13   21]
 [ 271   16    8   17   17   50 1404    9   30   10]
 [ 147   49   18   13    9    5    0 1557    3    7]
 [ 306   22   16   40    7   21   58    6 1286   50]
 [ 283   19   10   27   15   21    3   13   15 1398]]
