In [2]:
import pandas as pd
import torch
from torch.utils.data import random_split
import torch.optim as optim
import torch.nn as nn

In [3]:
df_train=pd.read_csv('datasets/digit-recognizer/train.csv')
df_test=pd.read_csv('datasets/digit-recognizer/test.csv')

In [4]:
df_train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
class Custom_train_dataset(torch.utils.data.Dataset):
    def __init__(self, df_train):
        self.df_train = df_train
        self.images=df_train.iloc[:,1:].values
        self.labels=df_train.iloc[:,0].values
        
    def __getitem__(self,index):
        image=self.images[index]
        label=self.labels[index]
        
        image_tensor=torch.tensor(image,dtype=torch.float32).unsqueeze(0)
        label_tensor=torch.tensor(label,dtype=torch.long)

        return image_tensor,label_tensor
    
    def __len__(self):
        return len(self.df_train)

#created a custom dataset using our Custom_train_dataset class
custom_train_dataset = Custom_train_dataset(df_train)

# dataloader = torch.utils.data.DataLoader(custom_train_dataset,batch_size = 32, shuffle= True) #this is the dataloader with the entire training set


#creating train val split from custom training dataset
train_size = int(0.8 * len(custom_train_dataset))  # 80% for training
val_size = len(custom_train_dataset) - train_size  # 20% for validation


#created training and validation datasets from the entire train dataset
train_dataset, val_dataset = random_split(custom_train_dataset, [train_size, val_size])


# Dataloaders
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)


In [6]:
class Custom_test_dataset(torch.utils.data.Dataset):
    def __init__(self, df_test):
        self.df_test = df_test
        self.images=df_test.values
        
    def __getitem__(self,index):
        image=self.images[index]
        image_tensor=torch.tensor(image,dtype=torch.float32).unsqueeze(0)
        return image_tensor
    
    def __len__(self):
        return len(self.df_test)
        
    
        
custom_test_dataset = Custom_test_dataset(df_test)
test_dataloader = torch.utils.data.DataLoader(custom_test_dataset,batch_size = 32, shuffle= False)


In [8]:
model_CNN=torch.nn.Sequential(
    torch.nn.Conv2d(in_channels=1,out_channels=32,kernel_size=3,padding=1),
    torch.nn.BatchNorm2d(32),
    torch.nn.ReLU(),

    torch.nn.MaxPool2d(2,2),# 28x28 -> 14x14

    torch.nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,padding=1),
    torch.nn.BatchNorm2d(64),
    torch.nn.ReLU(),

    torch.nn.MaxPool2d(2,2),# 14x14 -> 7x7

    torch.nn.Dropout2d(0.25),

    # Third Conv block (extra layer)
    nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.Dropout2d(0.25),
    
    # Forth Conv block (extra layer)
    nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.Dropout2d(0.2),

    # Flatten and Fully Connected layers
    nn.Flatten(),
    nn.Linear(256*7*7, 256),
    nn.ReLU(),
    nn.Dropout(0.3),             # Dropout after first Linear layer
    nn.Linear(256, 10)           # Output layer for 10 classes

)

CNN_model= model_CNN

In [9]:
class OptimizedCNN(nn.Module):
    def __init__(self):
        super(OptimizedCNN, self).__init__()
        
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 28x28 -> 14x14
            nn.Dropout2d(0.2),
            
            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 14x14 -> 7x7
            nn.Dropout2d(0.3),
            
            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout2d(0.3)
        )
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 7 * 7, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(256, 10)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = OptimizedCNN()

In [10]:
print("Training model")

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(CNN_model.parameters(), lr=0.001, momentum=0.9)

for epoch in range(15):
    running_loss = 0.0
    CNN_model.train()  # Training mode
    
    for i, (x_actual, y_actual) in enumerate(train_dataloader):
        x_actual = x_actual.view(x_actual.size(0), 1, 28, 28)/255.0 # reshaping images here
        
        optimizer.zero_grad()
        y_pred = CNN_model(x_actual)
        loss = criterion(y_pred, y_actual)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if (i + 1) % 100 == 0:
            print(f"[Epoch {epoch+1}, Batch {i+1}] Loss: {loss.item():.4f}")
    
print('Finished Training')

Training model
[Epoch 1, Batch 100] Loss: 0.4866
[Epoch 1, Batch 200] Loss: 0.5658
[Epoch 1, Batch 300] Loss: 0.2522
[Epoch 1, Batch 400] Loss: 0.1951
[Epoch 1, Batch 500] Loss: 0.1075
[Epoch 2, Batch 100] Loss: 0.1959
[Epoch 2, Batch 200] Loss: 0.1095
[Epoch 2, Batch 300] Loss: 0.1173
[Epoch 2, Batch 400] Loss: 0.0588
[Epoch 2, Batch 500] Loss: 0.0690
[Epoch 3, Batch 100] Loss: 0.0329
[Epoch 3, Batch 200] Loss: 0.0456
[Epoch 3, Batch 300] Loss: 0.1044
[Epoch 3, Batch 400] Loss: 0.0263
[Epoch 3, Batch 500] Loss: 0.0273
[Epoch 4, Batch 100] Loss: 0.0354
[Epoch 4, Batch 200] Loss: 0.0885
[Epoch 4, Batch 300] Loss: 0.1293
[Epoch 4, Batch 400] Loss: 0.0380
[Epoch 4, Batch 500] Loss: 0.0250
[Epoch 5, Batch 100] Loss: 0.0550
[Epoch 5, Batch 200] Loss: 0.0446
[Epoch 5, Batch 300] Loss: 0.0467
[Epoch 5, Batch 400] Loss: 0.0817
[Epoch 5, Batch 500] Loss: 0.0131
[Epoch 6, Batch 100] Loss: 0.0098
[Epoch 6, Batch 200] Loss: 0.0370
[Epoch 6, Batch 300] Loss: 0.0633
[Epoch 6, Batch 400] Loss: 0.0200

In [11]:
print("Training Optimized model")

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

for epoch in range(15):
    running_loss = 0.0
    model.train()  # Training mode
    
    for i, (x_actual, y_actual) in enumerate(train_dataloader):
        x_actual = x_actual.view(x_actual.size(0), 1, 28, 28)/255.0 # reshaping images here
        
        optimizer.zero_grad()
        y_pred = model(x_actual)
        loss = criterion(y_pred, y_actual)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if (i + 1) % 100 == 0:
            print(f"[Epoch {epoch+1}, Batch {i+1}] Loss: {loss.item():.4f}")
    
print('Finished Training')

Training Optimized model
[Epoch 1, Batch 100] Loss: 0.8718
[Epoch 1, Batch 200] Loss: 0.4929
[Epoch 1, Batch 300] Loss: 0.2279
[Epoch 1, Batch 400] Loss: 0.3138
[Epoch 1, Batch 500] Loss: 0.2470
[Epoch 2, Batch 100] Loss: 0.2551
[Epoch 2, Batch 200] Loss: 0.1228
[Epoch 2, Batch 300] Loss: 0.1163
[Epoch 2, Batch 400] Loss: 0.1859
[Epoch 2, Batch 500] Loss: 0.1486
[Epoch 3, Batch 100] Loss: 0.0931
[Epoch 3, Batch 200] Loss: 0.0747
[Epoch 3, Batch 300] Loss: 0.0589
[Epoch 3, Batch 400] Loss: 0.0843
[Epoch 3, Batch 500] Loss: 0.0565
[Epoch 4, Batch 100] Loss: 0.2004
[Epoch 4, Batch 200] Loss: 0.0425
[Epoch 4, Batch 300] Loss: 0.1517
[Epoch 4, Batch 400] Loss: 0.1485
[Epoch 4, Batch 500] Loss: 0.1154
[Epoch 5, Batch 100] Loss: 0.0742
[Epoch 5, Batch 200] Loss: 0.0771
[Epoch 5, Batch 300] Loss: 0.0462
[Epoch 5, Batch 400] Loss: 0.0237
[Epoch 5, Batch 500] Loss: 0.1212
[Epoch 6, Batch 100] Loss: 0.0184
[Epoch 6, Batch 200] Loss: 0.0527
[Epoch 6, Batch 300] Loss: 0.0449
[Epoch 6, Batch 400] Lo

In [12]:
# Validation accuracy after each epoch
CNN_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for x_val, y_val in val_dataloader:
        x_val = x_val.view(x_val.size(0), 1, 28, 28)/255.0
    
        outputs = CNN_model(x_val)
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == y_val).sum().item()
        total += y_val.size(0)

val_accuracy = 100 * correct / total
print(f"Model= model_CNN: Loss = {running_loss/len(train_dataloader):.4f}, Val Accuracy = {val_accuracy:.2f}%")


Model= model_CNN: Loss = 0.0398, Val Accuracy = 99.23%


In [13]:
# Validation accuracy after each epoch
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for x_val, y_val in val_dataloader:
        x_val = x_val.view(x_val.size(0), 1, 28, 28)/255.0
    
        outputs = model(x_val)
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == y_val).sum().item()
        total += y_val.size(0)

val_accuracy = 100 * correct / total
print(f"Model = OptimizedCNN : Loss = {running_loss/len(train_dataloader):.4f}, Val Accuracy = {val_accuracy:.2f}%")


Model = OptimizedCNN : Loss = 0.0398, Val Accuracy = 99.40%


In [14]:
model.eval()
all_preds = []

with torch.no_grad():
    for x_test in test_dataloader:
        x_test = x_test.view(x_test.size(0), 1, 28, 28)/255.0
        outputs = model(x_test)
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.tolist())

submission = pd.DataFrame({"ImageId": range(1, len(all_preds)+1), "Label": all_preds})
submission.to_csv("submission.csv", index=False)

In [15]:
df_submissions=pd.read_csv('submission.csv')

In [16]:
df_submissions.shape

(28000, 2)