### **Require Libraries and FrameWorks**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from google.colab import drive


from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline

import tensorflow as tf
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

%matplotlib inline

In [2]:
drive.mount('/content/Drive', force_remount=True)

Mounted at /content/Drive


### **Data preprocessing**

In [3]:
train = pd.read_csv('/content/Drive/MyDrive/data/digit reco kaggle/train.csv')
test = pd.read_csv('/content/Drive/MyDrive/data/digit reco kaggle/test.csv')

In [4]:
X = train.iloc[:,1:]
y = train.iloc[:,:1]

y_cate = to_categorical(y)

X = X/255.
test = test / 255.

In [5]:
X_train, X_val, y_train, y_val = train_test_split(X,y, test_size=0.2)

# for categerical
X_trains, X_vals, y_trains, y_vals = train_test_split(X,y_cate, test_size=0.2)

In [6]:
X_train = np.array(X_train).reshape((-1,28,28,1))
X_val = np.array(X_val).reshape((-1,28,28,1))

# for categerical

X_trains = np.array(X_trains).reshape((-1,28,28,1))
X_vals = np.array(X_vals).reshape((-1,28,28,1))

# test data
test_data = np.array(test).reshape((-1,28,28,1))

In [7]:
train['label'].value_counts()

1    4684
7    4401
3    4351
9    4188
2    4177
6    4137
0    4132
4    4072
8    4063
5    3795
Name: label, dtype: int64

**Data Augmentation**

In [8]:
datagen = ImageDataGenerator(rotation_range=10,zoom_range=0.1,width_shift_range=0.1,height_shift_range=0.1)

In [9]:
train_gen = datagen.flow(X_trains,y_trains,batch_size=128)
val_gen = datagen.flow(X_vals,y_vals,batch_size=128)

### **Model Building**

In [10]:
def lenet5(input_size=(28,28,1)):
  inputs = tf.keras.Input(input_size)

  X = tf.keras.layers.Conv2D(6,(5,5),strides=1, activation='relu')(inputs)

  X = tf.keras.layers.MaxPool2D((2,2),strides=2)(X)

  X = tf.keras.layers.Conv2D(16,(5,5),activation='relu')(X)

  X = tf.keras.layers.MaxPool2D((2,2),strides=2)(X)

  X = tf.keras.layers.Conv2D(32,(3,3),activation='relu')(X)

  X = tf.keras.layers.Flatten()(X)

  X = tf.keras.layers.Dense(128, activation='relu')(X)

  X = tf.keras.layers.Dense(84, activation='relu')(X)

  output = tf.keras.layers.Dense(10, activation='softmax')(X)

  Model = tf.keras.Model(inputs = inputs, outputs=output)

  return Model



In [11]:
lenet = lenet5()

lenet.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 24, 24, 6)         156       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 12, 6)        0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 8, 8, 16)          2416      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 4, 4, 16)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 2, 2, 32)          4640  

In [12]:
lenet.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"],
)

In [13]:
lenet.fit(X_train,y_train,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f7110f3d400>

In [14]:
val_pred = lenet.predict(X_val)



In [15]:
y_pred = lenet.predict(test_data)



cate

In [16]:
lenet.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"],
)

In [17]:
lenet.fit(X_trains,y_trains,epochs=20,verbose=2)

Epoch 1/20
1050/1050 - 6s - loss: 0.0314 - accuracy: 0.9905 - 6s/epoch - 6ms/step
Epoch 2/20
1050/1050 - 4s - loss: 0.0229 - accuracy: 0.9925 - 4s/epoch - 4ms/step
Epoch 3/20
1050/1050 - 4s - loss: 0.0196 - accuracy: 0.9937 - 4s/epoch - 4ms/step
Epoch 4/20
1050/1050 - 5s - loss: 0.0205 - accuracy: 0.9930 - 5s/epoch - 4ms/step
Epoch 5/20
1050/1050 - 5s - loss: 0.0169 - accuracy: 0.9939 - 5s/epoch - 4ms/step
Epoch 6/20
1050/1050 - 4s - loss: 0.0136 - accuracy: 0.9953 - 4s/epoch - 4ms/step
Epoch 7/20
1050/1050 - 4s - loss: 0.0143 - accuracy: 0.9951 - 4s/epoch - 4ms/step
Epoch 8/20
1050/1050 - 4s - loss: 0.0127 - accuracy: 0.9959 - 4s/epoch - 4ms/step
Epoch 9/20
1050/1050 - 4s - loss: 0.0135 - accuracy: 0.9952 - 4s/epoch - 4ms/step
Epoch 10/20
1050/1050 - 4s - loss: 0.0110 - accuracy: 0.9965 - 4s/epoch - 4ms/step
Epoch 11/20
1050/1050 - 4s - loss: 0.0133 - accuracy: 0.9958 - 4s/epoch - 4ms/step
Epoch 12/20
1050/1050 - 4s - loss: 0.0084 - accuracy: 0.9974 - 4s/epoch - 4ms/step
Epoch 13/20
1

<keras.callbacks.History at 0x7f711196b040>

In [18]:
lenet.evaluate(X_vals,y_vals)



[0.07281652092933655, 0.9876190423965454]

In [19]:
le_pred = lenet.predict(test_data)



In [20]:
np.argmax(le_pred,axis=1)

array([7, 2, 7, ..., 7, 2, 1])

model 3

In [25]:
def model3(input_size=(28,28,1)):
  inputs = tf.keras.Input(input_size)

  X = tf.keras.layers.Conv2D(6,(5,5),strides=1, activation='relu')(inputs)

  X = tf.keras.layers.Dropout(0.5)

  X = tf.keras.layers.MaxPool2D((2,2),strides=2)(X)

  X = tf.keras.layers.Conv2D(16,(5,5),activation='relu')(X)

  X = tf.keras.layers.Dropout(0.5)

  X = tf.keras.layers.MaxPool2D((2,2),strides=2)(X)

  X = tf.keras.layers.Conv2D(32,(3,3),activation='relu')(X)

  X = tf.keras.layers.Dropout(0.5)

  X = tf.keras.layers.Flatten()(X)

  X = tf.keras.layers.Dense(128, activation='relu')(X)

  X = tf.keras.layers.Dense(128, activation='relu')(X)

  X = tf.keras.layers.Dense(64, activation='relu')(X)

  X = tf.keras.layers.Dense(64, activation='relu')(X)

  output = tf.keras.layers.Dense(10, activation='softmax')(X)

  Model = tf.keras.Model(inputs = inputs, outputs=output)

  return Model



In [26]:
Model3 = model()

Model3.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_5 (Conv2D)           (None, 24, 24, 6)         156       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 12, 12, 6)        0         
 2D)                                                             
                                                                 
 conv2d_6 (Conv2D)           (None, 8, 8, 16)          2416      
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 4, 4, 16)         0         
 2D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 256)               0   

In [27]:
Model3.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"],
)

In [28]:
Model3.fit(X_trains,y_trains,epochs=10,verbose=2)

Epoch 1/10
1050/1050 - 4s - loss: 0.2815 - accuracy: 0.9133 - 4s/epoch - 4ms/step
Epoch 2/10
1050/1050 - 3s - loss: 0.0929 - accuracy: 0.9711 - 3s/epoch - 3ms/step
Epoch 3/10
1050/1050 - 4s - loss: 0.0654 - accuracy: 0.9797 - 4s/epoch - 4ms/step
Epoch 4/10
1050/1050 - 4s - loss: 0.0514 - accuracy: 0.9830 - 4s/epoch - 4ms/step
Epoch 5/10
1050/1050 - 3s - loss: 0.0422 - accuracy: 0.9858 - 3s/epoch - 3ms/step
Epoch 6/10
1050/1050 - 3s - loss: 0.0343 - accuracy: 0.9882 - 3s/epoch - 3ms/step
Epoch 7/10
1050/1050 - 4s - loss: 0.0304 - accuracy: 0.9901 - 4s/epoch - 4ms/step
Epoch 8/10
1050/1050 - 3s - loss: 0.0239 - accuracy: 0.9920 - 3s/epoch - 3ms/step
Epoch 9/10
1050/1050 - 3s - loss: 0.0229 - accuracy: 0.9930 - 3s/epoch - 3ms/step
Epoch 10/10
1050/1050 - 3s - loss: 0.0184 - accuracy: 0.9945 - 3s/epoch - 3ms/step


<keras.callbacks.History at 0x7f70ec5bc820>

In [29]:
val_pred = Model3.predict(X_vals)



In [30]:
test_pred = Model3.predict(test_data)



model


In [31]:
def model4(input_size=(28,28,1)):
  inputs = tf.keras.Input(input_size)

  X = tf.keras.layers.Conv2D(6,(5,5),strides=1, activation='relu')(inputs)

  X = tf.keras.layers.Dropout(0.5)

  X = tf.keras.layers.MaxPool2D((2,2),strides=2)(X)

  X = tf.keras.layers.Conv2D(16,(5,5),activation='relu')(X)

  X = tf.keras.layers.Dropout(0.5)

  X = tf.keras.layers.Flatten()(X)

  X = tf.keras.layers.Dense(128, activation='relu')(X)

  X = tf.keras.layers.Dense(256, activation='relu')(X)

  X = tf.keras.layers.Dense(256, activation='relu')(X)

  X = tf.keras.layers.Dense(64, activation='relu')(X)

  output = tf.keras.layers.Dense(10, activation='softmax')(X)

  Model = tf.keras.Model(inputs = inputs, outputs=output)

  return Model



In [32]:
Model4 = model()

Model4.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_7 (Conv2D)           (None, 24, 24, 6)         156       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 12, 12, 6)        0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 8, 8, 16)          2416      
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 4, 4, 16)         0         
 2D)                                                             
                                                                 
 flatten_3 (Flatten)         (None, 256)               0   

In [33]:
Model4.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"],
)

In [34]:
Model3.fit(X_trains,y_trains,epochs=10,verbose=2)

Epoch 1/10
1050/1050 - 3s - loss: 0.0167 - accuracy: 0.9940 - 3s/epoch - 3ms/step
Epoch 2/10
1050/1050 - 4s - loss: 0.0144 - accuracy: 0.9953 - 4s/epoch - 4ms/step
Epoch 3/10
1050/1050 - 3s - loss: 0.0119 - accuracy: 0.9958 - 3s/epoch - 3ms/step
Epoch 4/10
1050/1050 - 3s - loss: 0.0124 - accuracy: 0.9956 - 3s/epoch - 3ms/step
Epoch 5/10
1050/1050 - 3s - loss: 0.0116 - accuracy: 0.9959 - 3s/epoch - 3ms/step
Epoch 6/10
1050/1050 - 4s - loss: 0.0093 - accuracy: 0.9970 - 4s/epoch - 4ms/step
Epoch 7/10
1050/1050 - 3s - loss: 0.0088 - accuracy: 0.9968 - 3s/epoch - 3ms/step
Epoch 8/10
1050/1050 - 3s - loss: 0.0089 - accuracy: 0.9969 - 3s/epoch - 3ms/step
Epoch 9/10
1050/1050 - 3s - loss: 0.0101 - accuracy: 0.9964 - 3s/epoch - 3ms/step
Epoch 10/10
1050/1050 - 4s - loss: 0.0069 - accuracy: 0.9977 - 4s/epoch - 4ms/step


<keras.callbacks.History at 0x7f711117d700>

In [35]:
test_pred = Model3.predict(test_data)



model

In [36]:
def model5(input_size=(28,28,1)):
  inputs = tf.keras.Input(input_size)

  X = tf.keras.layers.Conv2D(6,(5,5),strides=1, activation='relu')(inputs)

  X = tf.keras.layers.MaxPool2D((2,2),strides=1)(X)

  X = tf.keras.layers.Dropout(0.5)(X)

  X = tf.keras.layers.Conv2D(24,(5,5),activation='relu')(X)

  X = tf.keras.layers.MaxPool2D((2,2),strides=1)(X)

  X = tf.keras.layers.Dropout(0.5)(X)

  X = tf.keras.layers.Conv2D(48,(5,5),activation='relu')(X)

  X = tf.keras.layers.Conv2D(64,(3,3),activation='relu')(X)

  X = tf.keras.layers.MaxPool2D((2,2),strides=2)(X)

  X = tf.keras.layers.Flatten()(X)

  X = tf.keras.layers.Dense(128, activation='relu')(X)

  X = tf.keras.layers.Dense(128, activation='relu')(X)

  X = tf.keras.layers.Dense(64, activation='relu')(X)

  output = tf.keras.layers.Dense(10, activation='softmax')(X)

  Model = tf.keras.Model(inputs = inputs, outputs=output)

  return Model



In [37]:
Model5 = model5()
Model5.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_9 (Conv2D)           (None, 24, 24, 6)         156       
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 23, 23, 6)        0         
 2D)                                                             
                                                                 
 dropout (Dropout)           (None, 23, 23, 6)         0         
                                                                 
 conv2d_10 (Conv2D)          (None, 19, 19, 24)        3624      
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 18, 18, 24)       0         
 2D)                                                       

In [38]:
Model5.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"],
)

In [39]:
history = Model5.fit_generator(train_gen,
                    epochs = 20,
                    validation_data = val_gen)

Epoch 1/20


  history = Model5.fit_generator(train_gen,


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [40]:
mod5_pred = Model5.predict(test_data)



In [41]:
np.argmax(mod5_pred,axis=1)

array([9, 7, 7, ..., 9, 2, 9])

# **Got ~92% Accuracy on Model5**

**model save**

In [42]:
'''Label = np.argmax(y_pred, axis=1)
Imageid = pd.DataFrame(list(range(1,len(Label)+1)), columns=['ImageId'])
subm = pd.DataFrame(Label,columns=['Label']) 
subm = pd.concat([Imageid,subm],axis=1)
subm.to_csv('/content/Drive/MyDrive/data/digit reco kaggle/sub1.csv',index=False)'''

"Label = np.argmax(y_pred, axis=1)\nImageid = pd.DataFrame(list(range(1,len(Label)+1)), columns=['ImageId'])\nsubm = pd.DataFrame(Label,columns=['Label']) \nsubm = pd.concat([Imageid,subm],axis=1)\nsubm.to_csv('/content/Drive/MyDrive/data/digit reco kaggle/sub1.csv',index=False)"