In [12]:
#Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
#Importing the dataset
train_data=pd.read_csv("train.csv")
test_data=pd.read_csv("test.csv")

In [3]:
train_data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
#Seperate Features and labels
X=train_data.drop('label',axis=1).values
y=train_data['label'].values

In [5]:
#Normalize the data
X=X/255.0
test_data=test_data/255.0

In [9]:
# Reshape data to fit the model
X=X.reshape(-1,28,28,1)
test_data=test_data.values.reshape(-1,28,28,1)

In [10]:
#Train-Test Split
X_train,X_val,y_train,y_val=train_test_split(X,y,test_size=0.2,random_state=42)

In [13]:
#Building the model
cnn = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

In [14]:
#Compiling the model
cnn.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [15]:
cnn.fit(X_train,y_train,epochs=15,validation_data=(X_val,y_val))

Epoch 1/15
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 19ms/step - accuracy: 0.6505 - loss: 1.0153 - val_accuracy: 0.9601 - val_loss: 0.1282
Epoch 2/15
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.9400 - loss: 0.2084 - val_accuracy: 0.9725 - val_loss: 0.0900
Epoch 3/15
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.9572 - loss: 0.1543 - val_accuracy: 0.9805 - val_loss: 0.0641
Epoch 4/15
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.9646 - loss: 0.1219 - val_accuracy: 0.9810 - val_loss: 0.0635
Epoch 5/15
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.9678 - loss: 0.1107 - val_accuracy: 0.9836 - val_loss: 0.0550
Epoch 6/15
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.9719 - loss: 0.0925 - val_accuracy: 0.9852 - val_loss: 0.0511
Epoc

<keras.src.callbacks.history.History at 0x263db047c20>

In [16]:
loss_value,accuracy=cnn.evaluate(X_val,y_val)

[1m263/263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9855 - loss: 0.0586


In [17]:
print("Validation Loss: ",loss_value)
print("Validation accuracy: ",accuracy)

Validation Loss:  0.05196232721209526
Validation accuracy:  0.9860714077949524


In [20]:
#Predicts the probabilities for each class (0-9) for each image
predictions=cnn.predict(test_data)

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step


In [21]:
#Finds the index (or class label) with the highest probability for each image.
predictions = np.argmax(predictions, axis=1)

In [22]:
predictions

array([2, 0, 9, ..., 3, 9, 2], dtype=int64)

In [23]:
submission=pd.DataFrame({'ImageId':range(1,len(predictions)+1),'Label':predictions})
submission.to_csv("submission.csv",index=False)