__Conclusion__
* The accuracy achieved was __0.9351__, which is pretty good when improvements were added. 
* When comparing to the model from Homework 2 assignment, the accuracy there was 0.87, so thats an increase of 0.065!

__Summary of Improvements__
* Since we are dealing with image data, I would think it would be best to use a few _convolutional_ and _pooling_ layers for this case
* Added a _Dropout layer_: To prevent overfitting
* Added _Learning Rate Scheduling_: Piecewise Constant Scheduling 
* Added _Batch Normalization_: To standardizes the inputs and stabilize the learning process
* Used activation function (_Elu_) and initialization of weights (_lecun_normal_)
* Added more layers and neurons
* Used a different type of optimizer (_Adam_)
* Added a _gradient clipping_: To prevent exploding gradients

__Imports__

In [5]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer

__Dataset__

In [6]:
digit = pd.read_csv("data/train.csv")
digit_test = pd.read_csv("data/test.csv")

In [7]:
digit.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
digit_test.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
x = digit.drop('label', axis = 1).values
y = digit.label.values

In [10]:
x.shape

(42000, 784)

In [11]:
y.shape

(42000,)

__Scaling__

In [12]:
x = x/255.0
test_df = digit_test.values/255.0

__Reshaping__

In [14]:
x_train = x.reshape(x.shape[0], 28, 28,1)
test_df = test_df.reshape(digit_test.shape[0], 28, 28,1)
y_train = y.reshape(-1,1)

__splitting data__

In [15]:
x_train, x_valid, y_train, y_valid = train_test_split(x_train,y_train,test_size = 0.20, 
                                                      random_state = 141)

In [16]:
y_train

array([[4],
       [2],
       [2],
       ...,
       [1],
       [9],
       [5]])

In [17]:
y_valid

array([[8],
       [6],
       [5],
       ...,
       [8],
       [2],
       [0]])

__Clear the Backend__

In [18]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

__Piecewise Constant Scheduling__
* Added an additional learning constant
* A dynamic learning process

In [19]:
def piecewise_constant_fn(epoch):
    if epoch < 5:
        return 0.01
    elif epoch < 10:
        return 0.005
    elif epoch < 15:
        return 0.003
    elif epoch < 20:
        return 0.001
    else:
        return 0.0001

In [20]:
def piecewise_constant(boundaries, values):
    boundaries = np.array([0] + boundaries)
    values = np.array(values)
    def piecewise_constant_fn(epoch):
        return values[np.argmax(boundaries > epoch) - 1]
    return piecewise_constant_fn
piecewise_constant_fn = piecewise_constant([5,10,15,20,25], [0.01, 0.005,0.003,0.001,0.0001])

In [21]:
lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)

__Model Creation__

In [36]:
model = keras.models.Sequential()
model.add(keras.layers.Conv2D(filters=64, kernel_size=(5, 5),padding="valid" ,activation='elu',kernel_initializer="lecun_normal", input_shape=(28,28,1)))
model.add(keras.layers.AveragePooling2D())
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Conv2D(filters=32, kernel_size=(5, 5),padding="valid", activation='elu',kernel_initializer="lecun_normal"))
model.add(keras.layers.AveragePooling2D())
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.GaussianDropout(0.25))
model.add(keras.layers.Flatten())
model.add(keras.layers.BatchNormalization())
#model.add(keras.layers.Dense(units=300, activation='elu',kernel_initializer="lecun_normal"))
#model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(units=128, activation='selu',kernel_initializer="lecun_normal"))
model.add(keras.layers.BatchNormalization())
#model.add(keras.layers.Dense(units=64, activation='elu',kernel_initializer="lecun_normal"))
model.add(keras.layers.Dense(units=64, activation='selu',kernel_initializer="lecun_normal"))
model.add(keras.layers.Dense(units=10, activation = 'softmax'))

In [37]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 24, 24, 64)        1664      
                                                                 
 average_pooling2d_4 (Averag  (None, 12, 12, 64)       0         
 ePooling2D)                                                     
                                                                 
 batch_normalization_14 (Bat  (None, 12, 12, 64)       256       
 chNormalization)                                                
                                                                 
 conv2d_7 (Conv2D)           (None, 8, 8, 32)          51232     
                                                                 
 average_pooling2d_5 (Averag  (None, 4, 4, 32)         0         
 ePooling2D)                                                     
                                                      

__Model Compile__

In [38]:
model.compile(loss="sparse_categorical_crossentropy",optimizer=keras.optimizers.Adam(clipvalue=1.0,clipnorm=1.0),metrics=["accuracy"])

__Model Fitting__

In [39]:
history = model.fit(x_train,y_train,epochs=25,validation_data=(x_valid,y_valid),callbacks=[lr_scheduler])

Epoch 1/25


2022-02-26 15:25:47.913170: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-02-26 15:26:21.398391: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


__Save the Model__

In [41]:
model.save("Digit_Recog_Mod_2.h5")

__Testing Model Against Test Data__

In [56]:
pred = model.predict(test_df)

In [57]:
pred.shape

(28000, 10)

__Double check this__ 

In [75]:
np.argmax(pred[0])

2

In [76]:
prediction = [np.argmax(i) for i in pred]
prediction

[2,
 0,
 9,
 0,
 3,
 7,
 0,
 3,
 0,
 3,
 5,
 7,
 4,
 0,
 4,
 3,
 3,
 1,
 9,
 0,
 9,
 1,
 1,
 5,
 7,
 4,
 2,
 7,
 4,
 7,
 7,
 5,
 4,
 2,
 6,
 2,
 5,
 5,
 1,
 6,
 7,
 7,
 4,
 9,
 8,
 7,
 8,
 2,
 6,
 7,
 6,
 8,
 8,
 3,
 8,
 2,
 1,
 2,
 2,
 0,
 4,
 1,
 7,
 0,
 0,
 0,
 1,
 9,
 0,
 1,
 6,
 5,
 8,
 8,
 2,
 8,
 9,
 9,
 2,
 3,
 5,
 4,
 1,
 0,
 9,
 2,
 4,
 3,
 6,
 7,
 2,
 0,
 6,
 6,
 1,
 4,
 3,
 9,
 7,
 4,
 0,
 9,
 2,
 0,
 7,
 3,
 0,
 5,
 0,
 8,
 0,
 0,
 4,
 7,
 1,
 7,
 1,
 1,
 3,
 3,
 3,
 7,
 2,
 8,
 6,
 3,
 8,
 7,
 7,
 4,
 3,
 5,
 6,
 0,
 0,
 0,
 3,
 1,
 3,
 6,
 4,
 3,
 4,
 5,
 5,
 8,
 7,
 7,
 2,
 8,
 4,
 3,
 5,
 6,
 5,
 3,
 7,
 5,
 7,
 8,
 3,
 0,
 4,
 5,
 1,
 2,
 7,
 6,
 3,
 0,
 2,
 7,
 8,
 6,
 1,
 3,
 7,
 4,
 1,
 2,
 4,
 8,
 5,
 2,
 4,
 9,
 2,
 1,
 6,
 0,
 6,
 1,
 4,
 9,
 6,
 0,
 9,
 7,
 6,
 9,
 1,
 9,
 0,
 9,
 9,
 0,
 8,
 4,
 6,
 2,
 0,
 9,
 3,
 6,
 3,
 2,
 1,
 6,
 3,
 4,
 2,
 3,
 1,
 2,
 2,
 0,
 4,
 6,
 1,
 0,
 0,
 4,
 9,
 1,
 7,
 3,
 2,
 3,
 8,
 6,
 8,
 6,
 2,
 8,
 5,
 5,
 4,
 8,
 3,
 5,


In [77]:
prediction = np.array(pred)

In [78]:
prediction

array([[3.42769563e-10, 1.09316254e-11, 9.99999762e-01, ...,
        6.06666140e-09, 1.79541413e-08, 2.02484962e-09],
       [9.99993324e-01, 5.10267526e-08, 4.17336992e-07, ...,
        2.68391227e-06, 5.14435383e-09, 5.90104570e-08],
       [2.25050627e-08, 7.59584617e-10, 2.03870676e-09, ...,
        6.62550725e-09, 7.00107591e-07, 9.99994159e-01],
       ...,
       [1.33922717e-11, 7.41779277e-11, 1.21062795e-11, ...,
        1.01922061e-10, 5.25073596e-10, 2.30983691e-10],
       [9.97655320e-07, 3.71412767e-08, 6.36304236e-08, ...,
        2.03642159e-07, 1.08511095e-07, 9.99983191e-01],
       [2.53379154e-12, 2.99128154e-13, 1.00000000e+00, ...,
        2.46397226e-11, 2.72287348e-09, 4.39626634e-11]], dtype=float32)

In [67]:
img_id = list(range(1,28001))

In [68]:
img_id = np.array(img_id)

In [69]:
img_id.shape

(28000,)

In [70]:
df = pd.DataFrame({"ImageId":img_id,"Label":prediction})

ValueError: Per-column arrays must each be 1-dimensional

In [None]:
df.head()

In [154]:
df.tail()

Unnamed: 0,ImageId,Label
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9
27999,28000,2


In [155]:
df.to_csv("Michael_Woo_Predictions_Digit_Recog_2.csv",index=False)

In [156]:
df_1 = pd.read_csv("Michael_Woo_Predictions_Digit_Recog.csv")
df_2 = pd.read_csv("Michael_Woo_Predictions_Digit_Recog_2.csv")

In [157]:
pd.concat([df_1,df_2]).drop_duplicates(keep=False)

Unnamed: 0,ImageId,Label
