In [None]:
import torch
import torch.nn as nn

In [None]:
class Perceptron(nn.Module):
  def __init__(self, input_size, activation):
    super(Perceptron, self).__init__()
    self.w = nn.Parameter(torch.randn(input_size))
    self.b = nn.Parameter(torch.randn(1))
    self.activation = activation

  def forward(self, x):
    return self.activation(x @ self.w + self.b)

In [None]:
p1 = Perceptron(3, torch.sigmoid)

In [None]:
inputs = torch.tensor([1, 2, 3], dtype = torch.float32)
p1(inputs)

tensor([0.0204], grad_fn=<SigmoidBackward0>)

In [None]:
p1.parameters().__next__()

Parameter containing:
tensor([ 0.5300, -1.9257,  0.7713], requires_grad=True)

In [None]:
class DenseLayer(nn.Module):
  def __init__(self, input_size, output_size, activation):
    super(DenseLayer, self).__init__()
    self.layer = nn.ModuleList([Perceptron(input_size, activation) for i in range(output_size)])

  def forward(self, x):
    return torch.stack([p(x) for p in self.layer], dim=1)

In [None]:
x = torch.tensor([3, 4, 5], dtype = torch.float32)

layer1 = DenseLayer(3, 2, torch.relu)
print(layer1(x))

tensor([[ 0.0000, 13.9383]], grad_fn=<StackBackward0>)


In [None]:
class Model(nn.Module):
  def __init__(self, input_size, output_size):
    super(Model, self).__init__()
    self.net = nn.Sequential(
        nn.Linear(input_size, 1024),
        nn.BatchNorm1d(1024),
        nn.GELU(),

        nn.Linear(1024, 512),
        nn.BatchNorm1d(512),
        nn.GELU(),

        nn.Linear(512, output_size)
    )


  def forward(self, x):
    return self.net(x)

In [None]:
'''model1 = Model(3, 10)
model1(x)'''

'model1 = Model(3, 10)\nmodel1(x)'

In [None]:
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from torchvision import transforms

from torch.utils.data import random_split

transform_modified = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

dataset = MNIST(root = 'data/', download = True, transform = transform_modified, train=True)

train_dataset, test_dataset = random_split(
    dataset,
    [50000, 10000],
    generator=torch.Generator().manual_seed(10)
)

batch_size = 128
train_loader = DataLoader(train_dataset, batch_size, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size, shuffle = False)

In [None]:
mnist_model = Model(28*28, 10)

In [None]:
loss_fn = nn.CrossEntropyLoss()

optimiser = torch.optim.AdamW(mnist_model.parameters(), lr=1e-3)

In [None]:
from tqdm import tqdm

for epoch in range(20):
    mnist_model.train()
    loss_sum = 0

    for images, labels in tqdm(train_loader):
        images = images.view(images.size(0), -1)

        output = mnist_model(images)
        loss = loss_fn(output, labels)

        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

        loss_sum += loss.item()

    print(loss_sum / len(train_loader))


100%|██████████| 391/391 [00:21<00:00, 18.26it/s]


0.17441733080007688


100%|██████████| 391/391 [00:20<00:00, 18.98it/s]


0.07327943459353255


100%|██████████| 391/391 [00:21<00:00, 18.32it/s]


0.04766256045883574


100%|██████████| 391/391 [00:20<00:00, 19.14it/s]


0.034277327287265714


100%|██████████| 391/391 [00:21<00:00, 18.09it/s]


0.026768004170397434


100%|██████████| 391/391 [00:20<00:00, 19.15it/s]


0.02131284086171852


100%|██████████| 391/391 [00:21<00:00, 18.16it/s]


0.020413851937996056


100%|██████████| 391/391 [00:20<00:00, 19.29it/s]


0.016247809072524724


100%|██████████| 391/391 [00:21<00:00, 18.18it/s]


0.014636225268796153


100%|██████████| 391/391 [00:20<00:00, 19.31it/s]


0.01122287311326966


100%|██████████| 391/391 [00:21<00:00, 18.07it/s]


0.012528994000036582


100%|██████████| 391/391 [00:20<00:00, 19.13it/s]


0.01430303204985981


100%|██████████| 391/391 [00:21<00:00, 18.08it/s]


0.011107750054214523


100%|██████████| 391/391 [00:20<00:00, 19.13it/s]


0.008947011114515261


100%|██████████| 391/391 [00:21<00:00, 17.93it/s]


0.009070080457660315


100%|██████████| 391/391 [00:20<00:00, 19.11it/s]


0.008120166680068755


100%|██████████| 391/391 [00:21<00:00, 18.09it/s]


0.006435210481347085


100%|██████████| 391/391 [00:20<00:00, 18.89it/s]


0.0038687916622223073


100%|██████████| 391/391 [00:21<00:00, 18.22it/s]


0.01425689705068429


100%|██████████| 391/391 [00:20<00:00, 18.80it/s]

0.008122342527559523





In [None]:
mnist_model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.view(images.size(0), -1)
        outputs = mnist_model(images)
        preds = outputs.argmax(dim=1)

        correct += (preds == labels).sum().item()
        total += labels.size(0)

val_accuracy = correct / total
print("Validation Accuracy:", val_accuracy)

Validation Accuracy: 0.9822


In [None]:
import pandas as pd
def generate_kaggle_submission(model, output_csv_path="submission.csv",
    batch_size=128, device=None):

    model.eval()

    test_dataset = MNIST(
    root="./data",
    train=False,
    download=True,
    transform=transform_modified   # <-- SAME normalization
)


    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False
    )

    all_predictions = []

    with torch.no_grad():
        for images, _ in test_loader:
            # Flatten the images to match the model's expected input size
            images = torch.flatten(images, start_dim=1)

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            all_predictions.append(preds)

    all_predictions = torch.cat(all_predictions).numpy()

    submission_df = pd.DataFrame({
        "ImageId": range(1, len(all_predictions) + 1),
        "Label": all_predictions
    })

    submission_df.to_csv(output_csv_path, index=False)
    print(f"Submission file saved to {output_csv_path}")

generate_kaggle_submission(mnist_model)

Submission file saved to submission.csv
