In [1]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/digit-recognizer/sample_submission.csv
/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv


In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [3]:
train_df = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test_df = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")
train_df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
print(train_df.shape)
print(test_df.shape)

(42000, 785)
(28000, 784)


In [5]:
train_x = train_df.drop(["label"],axis=1).astype("int32")
train_y = train_df["label"].astype("float32")
test_x = test_df.astype("float32")
train_x.shape, train_y.shape, test_x.shape

((42000, 784), (42000,), (28000, 784))

In [6]:
train_x.describe()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
count,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,...,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0
mean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.219286,0.117095,0.059024,0.02019,0.017238,0.002857,0.0,0.0,0.0,0.0
std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.31289,4.633819,3.274488,1.75987,1.894498,0.414264,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,254.0,254.0,253.0,253.0,254.0,62.0,0.0,0.0,0.0,0.0


In [7]:
train_x.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 784 entries, pixel0 to pixel783
dtypes: int32(784)
memory usage: 125.6 MB


In [8]:
train_x = train_x.values.reshape(-1,28,28,1)
train_x = train_x/255.0
test_x = test_x.values.reshape(-1,28,28,1)
test_x = test_x/255.0
train_x.shape, test_x.shape

((42000, 28, 28, 1), (28000, 28, 28, 1))

In [9]:
# One hot encoding
train_y = tf.keras.utils.to_categorical(train_y,10)
train_y.shape

(42000, 10)

In [10]:
train_y

array([[0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [11]:
train_df["label"].head()

0    1
1    0
2    1
3    4
4    0
Name: label, dtype: int64

In [12]:
print(train_y[0:5,:])

[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [13]:
# Defining the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32,(3,3),activation = "relu", input_shape=(28,28,1)),
    tf.keras.layers.Conv2D(32,(3,3),activation="relu"),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64,(3,3),activation="relu",padding = "Same"),
    tf.keras.layers.Conv2D(64,(3,3),activation ="relu",padding = "Same"),
    tf.keras.layers.MaxPooling2D(pool_size =(2,2), strides=(2,2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(64,(3,3),activation ="relu",padding = "Same"),
    tf.keras.layers.Conv2D(64,(3,3),activation ="relu",padding = "Same"),
    tf.keras.layers.MaxPooling2D(pool_size =(2,2), strides=(2,2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dropout(0.50),
    tf.keras.layers.Dense(10, activation="softmax"),
])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy') > 0.999):
            print("\nReached 99.9% accuracy, So here training Hold!!!")
            self.model.stop_training = True

callbacks = myCallback()

In [15]:
Optimizer = tf.keras.optimizers.Adam(
    learning_rate = 0.0005,
    beta_1 = 0.9,
    beta_2 = 0.999,
    epsilon = 1e-07,
    name='Adam'
)
model.compile(optimizer = Optimizer, loss = "categorical_crossentropy",metrics=["accuracy"])
model.fit(train_x,train_y,batch_size =50, epochs = 20, callbacks= [callbacks])

Epoch 1/20
[1m840/840[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 78ms/step - accuracy: 0.7228 - loss: 0.7894
Epoch 2/20
[1m840/840[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 78ms/step - accuracy: 0.9729 - loss: 0.0925
Epoch 3/20
[1m840/840[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 78ms/step - accuracy: 0.9823 - loss: 0.0621
Epoch 4/20
[1m840/840[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 78ms/step - accuracy: 0.9860 - loss: 0.0504
Epoch 5/20
[1m840/840[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 76ms/step - accuracy: 0.9880 - loss: 0.0398
Epoch 6/20
[1m840/840[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 76ms/step - accuracy: 0.9890 - loss: 0.0358
Epoch 7/20
[1m840/840[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 75ms/step - accuracy: 0.9912 - loss: 0.0271
Epoch 8/20
[1m840/840[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 76ms/step - accuracy: 0.9927 - loss: 0.0268
Epoch 9/20
[1m840/840[

<keras.src.callbacks.history.History at 0x78035848e350>

In [16]:
results = model.predict(test_x)
results = np.argmax(results,axis=1)
results = pd.Series(results,name="Label")

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step


In [17]:
sub_df = pd.read_csv("/kaggle/input/digit-recognizer/sample_submission.csv")
sub_df.head()

Unnamed: 0,ImageId,Label
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0


In [18]:
x_index  = sub_df["ImageId"]

In [19]:
submission = pd.DataFrame({
    'ImageId': x_index,
    'Label': results
})
submission.to_csv("Digit_Submission.csv",index=False)

In [20]:
submission.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3
