# Final Scoring in the Competition and Ranking!

__Submission__

![title](submission_score.png)

__Leaderboard as of 03/12/2022__

![title](leaderboard_1.png)

__Imports__

In [94]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer

__Dataset__

In [95]:
digit = pd.read_csv("data/train.csv")
digit_test = pd.read_csv("data/test.csv")

__Training Dataset View__

In [96]:
digit.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


__Test Dataset View__

In [97]:
digit_test.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


__Get only values__

In [98]:
x = digit.drop('label', axis = 1).values
y = digit.label.values

In [99]:
x.shape

(42000, 784)

In [100]:
y.shape

(42000,)

__Re-scaling__

In [101]:
x = x/255.0
test_df = digit_test.values/255.0

__Reshaping__

In [102]:
x_train = x.reshape(x.shape[0], 28, 28,1)
test_df = test_df.reshape(digit_test.shape[0], 28, 28,1)
y_train = y.reshape(-1,1)

__Splitting Data__

In [103]:
x_train, x_valid, y_train, y_valid = train_test_split(x_train,y_train,test_size = 0.20, 
                                                      random_state = 141)

__Clear the Backend__

In [104]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

__Piecewise Constant Scheduling__
* Added an additional learning constant
* A dynamic learning process

In [105]:
def piecewise_constant_fn(epoch):
    if epoch < 5:
        return 0.01
    elif epoch < 10:
        return 0.005
    elif epoch < 15:
        return 0.003
    elif epoch < 20:
        return 0.001
    else:
        return 0.0001

In [106]:
def piecewise_constant(boundaries, values):
    boundaries = np.array([0] + boundaries)
    values = np.array(values)
    def piecewise_constant_fn(epoch):
        return values[np.argmax(boundaries > epoch) - 1]
    return piecewise_constant_fn
piecewise_constant_fn = piecewise_constant([5,10,15,20,25], [0.01, 0.005,0.003,0.001,0.0001])

In [107]:
lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)

__Model Creation__

In [108]:
model = keras.models.Sequential()
model.add(keras.layers.Conv2D(filters=64, kernel_size=(3, 3),padding="valid" ,
                              activation='elu',kernel_initializer="lecun_normal", input_shape=(28,28,1)))
model.add(keras.layers.AveragePooling2D())
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Conv2D(filters=32, kernel_size=(3, 3),padding="valid", 
                              activation='elu',kernel_initializer="lecun_normal"))
model.add(keras.layers.AveragePooling2D())
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.GaussianDropout(0.25))
model.add(keras.layers.Flatten())
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(units=128, activation='selu',kernel_initializer="lecun_normal"))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(units=64, activation='selu',kernel_initializer="lecun_normal"))
model.add(keras.layers.Dense(units=10, activation = 'softmax'))

In [109]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 6)         60        
                                                                 
 average_pooling2d (AverageP  (None, 13, 13, 6)        0         
 ooling2D)                                                       
                                                                 
 batch_normalization (BatchN  (None, 13, 13, 6)        24        
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 16)        880       
                                                                 
 average_pooling2d_1 (Averag  (None, 5, 5, 16)         0         
 ePooling2D)                                                     
                                                        

__Model Compile__

In [110]:
model.compile(loss="sparse_categorical_crossentropy",optimizer=keras.optimizers.Adam(clipvalue=1.0,clipnorm=1.0),metrics=["accuracy"])

__Model Fitting__

In [111]:
history = model.fit(x_train,y_train,epochs=30,validation_data=(x_valid,y_valid),callbacks=[lr_scheduler])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


__Save the Model__
* I already saved this model to local and cloud

In [112]:
#model.save("Digit_Recog_Mod_2.h5")

In [113]:
#ml = keras.models.load_model("Digit_Recog_Mod_2.h5")

__Testing Model Against Test Data__

In [114]:
pred = model.predict(test_df)

__Get the index of the max value in the array__

In [115]:
prediction = [np.argmax(i) for i in pred]

In [116]:
img_id = list(range(1,28001))
img_id = np.array(img_id)

In [117]:
img_id.shape

(28000,)

__Final Dataframe of Predictions__
* Has to be in this format for the competition

In [118]:
df = pd.DataFrame({"ImageId":img_id,"Label":prediction})
df.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3


In [119]:
df.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3


In [120]:
df.tail()

Unnamed: 0,ImageId,Label
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9
27999,28000,2


__Download to csv format__

In [121]:
df.to_csv("Michael_Woo_Predictions_Digit_Recog_3.csv",index=False)

In [122]:
df_1 = pd.read_csv("Michael_Woo_Predictions_Digit_Recog_2.csv")
df_2 = pd.read_csv("Michael_Woo_Predictions_Digit_Recog_3.csv")

In [123]:
pd.concat([df_1,df_2]).drop_duplicates(keep=False)

Unnamed: 0,ImageId,Label
511,512,6
844,845,4
904,905,9
1070,1071,0
1235,1236,1
...,...,...
27716,27717,8
27724,27725,8
27799,27800,7
27937,27938,4


In [124]:
300/28000

0.010714285714285714