In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
#Load data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [3]:
#Check structure
print(train_df.shape)
print(train_df.head())

(42000, 785)
   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
0      1       0       0       0       0       0       0       0       0   
1      0       0       0       0       0       0       0       0       0   
2      1       0       0       0       0       0       0       0       0   
3      4       0       0       0       0       0       0       0       0   
4      0       0       0       0       0       0       0       0       0   

   pixel8  ...  pixel774  pixel775  pixel776  pixel777  pixel778  pixel779  \
0       0  ...         0         0         0         0         0         0   
1       0  ...         0         0         0         0         0         0   
2       0  ...         0         0         0         0         0         0   
3       0  ...         0         0         0         0         0         0   
4       0  ...         0         0         0         0         0         0   

   pixel780  pixel781  pixel782  pixel783  
0         0      

Split features and labels

In [4]:
#Split training data
X = train_df.drop('label', axis = 1)
y = train_df['label']

In [5]:
#Normalize pixel values
X = X / 255.0
test_df = test_df / 255.0

Reshape the Data for CNN

In [6]:
X = X.values.reshape(-1, 28, 28, 1)
test_images = test_df.values.reshape(-1, 28, 28, 1)

Create Training and Validation Sets

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.1, random_state = 42)

Image Augmentation (optional, helps improve generalization)

In [8]:
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)
datagen.fit(X_train)

Build the CNN Model

In [9]:
model = Sequential([
    Input(shape=(28, 28, 1)),   # this replaces input_shape in Conv2D
    Conv2D(32, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')   #10 output classes for 0-9
])

Compile model

In [10]:
model.compile(optimizer='adam',
              loss = 'sparse_categorical_crossentropy',
              metrics=['accuracy'])

Set Early Stopping

In [11]:
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

Train Model

In [12]:
history = model.fit(datagen.flow(X_train, y_train, batch_size=64),
                    epochs=20,
                    validation_data=(X_val, y_val),
                    callbacks=[early_stop])

  self._warn_if_super_not_called()


Epoch 1/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 88ms/step - accuracy: 0.6031 - loss: 1.1701 - val_accuracy: 0.9664 - val_loss: 0.1122
Epoch 2/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 91ms/step - accuracy: 0.8825 - loss: 0.3779 - val_accuracy: 0.9779 - val_loss: 0.0675
Epoch 3/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 89ms/step - accuracy: 0.9134 - loss: 0.2822 - val_accuracy: 0.9736 - val_loss: 0.0828
Epoch 4/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 90ms/step - accuracy: 0.9303 - loss: 0.2340 - val_accuracy: 0.9838 - val_loss: 0.0484
Epoch 5/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 89ms/step - accuracy: 0.9403 - loss: 0.1973 - val_accuracy: 0.9845 - val_loss: 0.0438
Epoch 6/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 92ms/step - accuracy: 0.9460 - loss: 0.1768 - val_accuracy: 0.9864 - val_loss: 0.0386
Epoch 7/20
[1m5

Evaluate the Model

In [13]:
val_loss, val_acc = model.evaluate(X_val, y_val)
print(f'Validation Accuracy: {val_acc:.4f}')

[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.9898 - loss: 0.0249
Validation Accuracy: 0.9910


Predict on test data

In [14]:
predictions = model.predict(test_images)
predicted_labels = np.argmax(predictions, axis=1)

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step


Save Predictions

In [16]:
submission = pd.DataFrame({"ImageId": list(range(1, len(predicted_labels)+1)),
                           "Label": predicted_labels})
submission.to_csv("submission.csv", index=False)

Visualize some predictions

In [None]:
import random
for i in range(5):
    index = random.randint(0, len(test_images)-1)
    plt.imshow(test_images[index].reshape(28,28), cmap='gray')
    pred = np.argmax(model.predict(test_images[index].reshape(1,28,28,1)))
    plt.title(f'Predicted: {pred}')
    plt.axis('off')
    plt.show()

Accuracy plot

In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()