In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd

In [2]:
class DigitCSVTrain(Dataset):
    def __init__(self, path):
        df = pd.read_csv(path)
        self.labels = df.iloc[:, 0].values
        self.images = df.iloc[:, 1:].values.reshape(-1, 1, 28, 28).astype("float32") / 255.0

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        x = torch.tensor(self.images[idx])
        y = torch.tensor(self.labels[idx]).long()
        return x, y

In [3]:
class DigitCSVTest(Dataset):
    def __init__(self, path):
        df = pd.read_csv(path)
        self.images = df.values.reshape(-1, 1, 28, 28).astype("float32") / 255.0

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        x = torch.tensor(self.images[idx])
        return x

In [4]:
class ConvNeXtBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dw = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim)
        self.bn = nn.BatchNorm2d(dim)
        self.act = nn.GELU()
        self.pw = nn.Conv2d(dim, dim, kernel_size=1)

    def forward(self, x):
        out = self.dw(x)
        out = self.bn(out)
        out = self.act(out)
        out = self.pw(out)
        return x + out

In [5]:
class ConvNeXtTiny(nn.Module):
    def __init__(self, in_ch=1, num_classes=10):
        super().__init__()

        self.stem = nn.Sequential(
            nn.Conv2d(in_ch, 96, kernel_size=4, stride=4),
            nn.BatchNorm2d(96),
            nn.GELU()
        )

        self.stage1 = nn.Sequential(
            ConvNeXtBlock(96),
            ConvNeXtBlock(96),
            ConvNeXtBlock(96)
        )

        self.down1 = nn.Conv2d(96, 192, kernel_size=2, stride=2)

        self.stage2 = nn.Sequential(
            ConvNeXtBlock(192),
            ConvNeXtBlock(192),
            ConvNeXtBlock(192)
        )

        self.down2 = nn.Conv2d(192, 384, kernel_size=2, stride=2)

        self.stage3 = nn.Sequential(
            *[ConvNeXtBlock(384) for _ in range(9)]
        )

        self.stage4 = nn.Sequential(
            ConvNeXtBlock(384),
            ConvNeXtBlock(384),
            ConvNeXtBlock(384)
        )

        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(384, num_classes)

    def forward(self, x):
        x = self.stem(x)
        x = self.stage1(x)
        x = self.down1(x)

        x = self.stage2(x)
        x = self.down2(x)

        x = self.stage3(x)

        x = self.stage4(x)

        x = self.pool(x).flatten(1)
        return self.fc(x)

In [6]:
train_path = r"C:\Users\Public\Documents\NITJ\Research MTech\digit-recognizer\train.csv"
test_path  = r"C:\Users\Public\Documents\NITJ\Research MTech\digit-recognizer\test.csv"

train_dataset = DigitCSVTrain(train_path)
test_dataset  = DigitCSVTest(test_path)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = ConvNeXtTiny(in_ch=1, num_classes=10).to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

epochs = 5

for e in range(epochs):
    model.train()
    total = 0
    correct = 0
    loss_sum = 0

    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)

        opt.zero_grad()
        out = model(x)
        loss = loss_fn(out, y)
        loss.backward()
        opt.step()

        loss_sum += loss.item() * x.size(0)
        correct += (out.argmax(1) == y).sum().item()
        total += x.size(0)

    print(f"Epoch {e+1}: loss={loss_sum/total:.4f}, acc={correct/total:.4f}")

Epoch 1: loss=0.2535, acc=0.9301
Epoch 2: loss=0.1004, acc=0.9698
Epoch 3: loss=0.0812, acc=0.9763
Epoch 4: loss=0.0784, acc=0.9779
Epoch 5: loss=0.0598, acc=0.9827


In [12]:
import joblib
joblib.dump(model, 'CNN_digit_recognizer.pkl')
print("Model saved!")

Model saved!


In [7]:
import joblib
# 2. Load the model
# Note: You must have the Class definitions (ConvNeXtTiny, ConvNeXtBlock) 
# defined in the script before running this.
loaded_model = joblib.load('CNN_digit_recognizer.pkl')

# 3. Set to evaluation mode (Important for BatchNorm layers)
loaded_model.eval()

print("Model loaded successfully via Joblib")

Model loaded successfully via Joblib


In [11]:
# Checking accuracy with different Hyperparameter
def run_experiment(
    lr,
    batch_size,
    epochs,
    blocks_stage3 
):
    print("\n==== Running Experiment ====")
    print(f"Learning Rate: {lr}")
    print(f"Batch Size: {batch_size}")
    print(f"Epochs: {epochs}")
    print(f"Stage3 Blocks: {blocks_stage3}")

    device = "cuda" if torch.cuda.is_available() else "cpu"

    train_path = r"C:\Users\Public\Documents\NITJ\Research MTech\digit-recognizer\train.csv"
    train_dataset = DigitCSVTrain(train_path)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # ---- Create model normally ----
    model = ConvNeXtTiny(in_ch=1, num_classes=10)

    # ---- Modify stage3 dynamically based on user input ----
    model.stage3 = nn.Sequential(*[ConvNeXtBlock(384) for _ in range(blocks_stage3)])

    model = model.to(device)

    opt = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()

    for e in range(epochs):
        model.train()
        total = 0
        correct = 0
        loss_sum = 0

        for x, y in train_loader:
            x = x.to(device)
            y = y.to(device)

            opt.zero_grad()
            out = model(x)
            loss = loss_fn(out, y)
            loss.backward()
            opt.step()

            loss_sum += loss.item() * x.size(0)
            correct += (out.argmax(1) == y).sum().item()
            total += x.size(0)

        print(f"Epoch {e+1}/{epochs} — Loss: {loss_sum/total:.4f}, Acc: {correct/total:.4f}")

    print("Training complete!\n")

In [14]:
run_experiment(
    lr=0.0005,
    batch_size=256,
    epochs=3,
    blocks_stage3=3
)


==== Running Experiment ====
Learning Rate: 0.0005
Batch Size: 256
Epochs: 3
Stage3 Blocks: 3
Epoch 1/3 — Loss: 0.2001, Acc: 0.9371
Epoch 2/3 — Loss: 0.0754, Acc: 0.9761
Epoch 3/3 — Loss: 0.0379, Acc: 0.9879
Training complete!



In [9]:
run_experiment(
    lr=0.001,
    batch_size=384,
    epochs=2,
    blocks_stage3=1
)


==== Running Experiment ====
Learning Rate: 0.001
Batch Size: 384
Epochs: 2
Stage3 Blocks: 1
Epoch 1/2 — Loss: 0.2004, Acc: 0.9374
Epoch 2/2 — Loss: 0.0593, Acc: 0.9810
Training complete!



In [9]:
run_experiment(
    lr=0.002,
    batch_size=512,
    epochs=2,
    blocks_stage3=2
)


==== Running Experiment ====
Learning Rate: 0.002
Batch Size: 512
Epochs: 2
Stage3 Blocks: 2
Epoch 1/2 — Loss: 0.2681, Acc: 0.9202
Epoch 2/2 — Loss: 0.0875, Acc: 0.9741
Training complete!

