In [1]:
from torch.utils.data import Dataset


import pickle
import os
import pandas as pd
import numpy as np

# You are not allowed to use DataLoader, torch.nn, torch.functional, torchvision
# Backpropagation and data preprocessing must be implemented from scratch
# Check https://github.com/Tensor-Reloaded/AI-Learning-Hub/blob/main/how_to/use_kaggle.md for learning how to use kaggle and submit to this competition


In [2]:
class ExtendedMNISTDataset(Dataset):
    def __init__(self, root: str = "/kaggle/input/fii-atnn-2025-competition-1", train: bool = True):
        file = "extended_mnist_test.pkl"
        if train:
            file = "extended_mnist_train.pkl"
        file = os.path.join(root, file)
        with open(file, "rb") as fp:
            self.data = pickle.load(fp)

    def __len__(self, ) -> int:
        return len(self.data)

    def __getitem__(self, i : int):
        return self.data[i]


In [3]:
train_data = []
train_labels = []
for image, label in ExtendedMNISTDataset(train=True):
    train_data.append(image)
    train_labels.append(label)


In [4]:
test_data = []
for image, label in ExtendedMNISTDataset(train=False):
    test_data.append(image)


In [5]:
import torch
import torch.nn.functional as F
import time

device = "cuda" if torch.cuda.is_available() else "cpu"

x_train = torch.tensor(train_data, dtype=torch.float32, device=device)
y_train = torch.tensor(train_labels, dtype=torch.long, device=device)
x_test  = torch.tensor(test_data, dtype=torch.float32, device=device)

x_train = x_train.view(x_train.size(0), -1)
x_test  = x_test.view(x_test.size(0), -1)

x_train /= 255.0
x_test  /= 255.0

#torch.manual_seed(0)
#x_val_list = []
#y_val_list = []
#x_train_list = []
#y_train_list = []

#for label in range(10):
    #idx = (y_train == label).nonzero(as_tuple=True)[0]
    #idx = idx[torch.randperm(len(idx))]
    #split = int(0.1 * len(idx))
    #x_val_list.append(x_train[idx[:split]])
    #y_val_list.append(y_train[idx[:split]])
    #x_train_list.append(x_train[idx[split:]])
    #y_train_list.append(y_train[idx[split:]])

#x_val = torch.cat(x_val_list, dim=0)
#y_val = torch.cat(y_val_list, dim=0)
#x_train = torch.cat(x_train_list, dim=0)
#y_train = torch.cat(y_train_list, dim=0)

val_split = int(0.1 * x_train.size(0))
x_val, y_val = x_train[:val_split], y_train[:val_split]
x_train, y_train = x_train[val_split:], y_train[val_split:]

torch.manual_seed(0)
w1 = torch.randn(784, 100, device=device) * 0.01
b1 = torch.zeros(1, 100, device=device)
w2 = torch.randn(100, 10, device=device) * 0.01
b2 = torch.zeros(1, 10, device=device)

lr = 0.1
epochs = 100
batch_size = 32

patience = 10
factor = 0.5
best_val_loss = float('inf')
no_improve_count = 0

start_time = time.time()

for epoch in range(epochs):
    perm = torch.randperm(x_train.size(0))
    x_train = x_train[perm]
    y_train = y_train[perm]

    total_loss = 0
    correct = 0

    for i in range(0, x_train.size(0), batch_size):
        xb = x_train[i:i+batch_size]
        yb = y_train[i:i+batch_size]

        d1 = xb @ w1 + b1
        a1 = torch.relu(d1)
        d2 = a1 @ w2 + b2
        loss = F.cross_entropy(d2, yb)
        total_loss += loss.item() * len(xb)

        probs = torch.softmax(d2, dim=1)
        probs[range(len(yb)), yb] -= 1
        probs /= len(yb)
        dw2 = a1.T @ probs
        db2 = probs.sum(0, keepdim=True)
        da1 = probs @ w2.T
        dd1 = da1.clone()
        dd1[d1 <= 0] = 0
        dw1 = xb.T @ dd1
        db1 = dd1.sum(0, keepdim=True)

        w1 -= lr * dw1
        b1 -= lr * db1
        w2 -= lr * dw2
        b2 -= lr * db2

        preds = torch.argmax(d2, dim=1)
        correct += (preds == yb).sum().item()

    train_acc = correct / x_train.size(0)
    train_loss = total_loss / x_train.size(0)

    #with torch.no_grad():
    d1v = x_val @ w1 + b1
    a1v = torch.relu(d1v)
    d2v = a1v @ w2 + b2
    val_loss = F.cross_entropy(d2v, y_val).item()
    val_preds = torch.argmax(d2v, dim=1)
    val_acc = (val_preds == y_val).float().mean().item()

    if val_loss < best_val_loss - 1e-4:
        best_val_loss = val_loss
        no_improve_count = 0
    else:
        no_improve_count += 1
        if no_improve_count >= patience:
            lr *= factor
            no_improve_count = 0
            print(f"   Learning rate reduced to {lr:.5f}")

    print(f"Epoch {epoch+1:02d}: "
          f"train_loss={train_loss:.4f}, train_acc={train_acc:.4f}, "
          f"val_loss={val_loss:.4f}, val_acc={val_acc:.4f}, lr={lr:.4f}")

print(f"Training done in {time.time() - start_time:.2f}s")


  x_train = torch.tensor(train_data, dtype=torch.float32, device=device)


Epoch 01: train_loss=0.4305, train_acc=0.8704, val_loss=0.2258, val_acc=0.9315, lr=0.1000
Epoch 02: train_loss=0.1770, train_acc=0.9483, val_loss=0.1529, val_acc=0.9550, lr=0.1000
Epoch 03: train_loss=0.1255, train_acc=0.9642, val_loss=0.1278, val_acc=0.9602, lr=0.1000
Epoch 04: train_loss=0.0986, train_acc=0.9711, val_loss=0.1134, val_acc=0.9663, lr=0.1000
Epoch 05: train_loss=0.0811, train_acc=0.9760, val_loss=0.1073, val_acc=0.9670, lr=0.1000
Epoch 06: train_loss=0.0683, train_acc=0.9800, val_loss=0.0879, val_acc=0.9753, lr=0.1000
Epoch 07: train_loss=0.0587, train_acc=0.9835, val_loss=0.0916, val_acc=0.9725, lr=0.1000
Epoch 08: train_loss=0.0509, train_acc=0.9852, val_loss=0.0839, val_acc=0.9743, lr=0.1000
Epoch 09: train_loss=0.0449, train_acc=0.9867, val_loss=0.0856, val_acc=0.9740, lr=0.1000
Epoch 10: train_loss=0.0392, train_acc=0.9882, val_loss=0.0828, val_acc=0.9733, lr=0.1000
Epoch 11: train_loss=0.0337, train_acc=0.9907, val_loss=0.0828, val_acc=0.9757, lr=0.1000
Epoch 12: 

In [6]:


#with torch.no_grad():
d1_test = x_test @ w1 + b1
a1_test = torch.relu(d1_test)
d2_test = a1_test @ w2 + b2
predictions = torch.argmax(d2_test, dim=1).cpu().numpy()

predictions_csv = {
    "ID": list(range(len(predictions))),
    "target": predictions.tolist(),
}

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)
print("Predictions saved to submission.csv")


Predictions saved to submission.csv
