In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

In [None]:
# Using the GPU if it exists
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000)

In [None]:
# Build the model
SequentialNet = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
)

In [None]:
# Training loop
def train(model, loader, optimizer, loss_fn, epochs=5):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            logits = model(x)
            loss = loss_fn(logits, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

# Testing loop
def test(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            pred = logits.argmax(dim=1)
            correct += (pred == y).sum().item()
            total += y.size(0)
    print(f"Accuracy: {100 * correct / total:.2f}%")


In [None]:
print("\nTraining SequentialNet")
sequential_model = SequentialNet.to(device)
optimizer_seq = optim.Adam(sequential_model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()


train(sequential_model, train_loader, optimizer_seq, loss_fn)
test(sequential_model, test_loader)

In [None]:
# Custom model
class ManualNet(nn.Module):
    def __init__(self):
        super(ManualNet, self).__init__()
        # TODO: Define your paramters using nn.Parameters (the layers)
        self.w1 = nn.Parameter(torch.randn(128, 784) * 0.01) #First to second layer
        self.w2 = nn.Parameter(torch.randn(64, 128) * 0.01) #Second to third layer
        self.w3 = nn.Parameter(torch.randn(10, 64) * 0.01) #For final layer of size 10
        self.b1 = nn.Parameter(torch.zeros(128))
        self.b2 = nn.Parameter(torch.zeros(64))
        self.b3 = nn.Parameter(torch.zeros(10))

    def forward(self, x):
        x = x.view(-1, 784)
        # TODO: Do the forward pass using matrix multiplications and applying activation functions
        out1 = x@self.w1.T + self.b1
        out1 = (out1>0)*out1
        out2 = out1@self.w2.T + self.b2
        out2 = (out2>0)*out2
        out3 = out2@self.w3.T + self.b3
        return out3

In [None]:
print("\nTraining ManualNet")
# TODO: Create a ManualNet object and call it manual_model. Train and test it
manual_model = ManualNet().to(device)
optimizer_manual = optim.Adam(manual_model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
train(manual_model, train_loader, optimizer_manual, loss_fn)
test(manual_model, test_loader)

In [None]:
def visualize(model, loader, n=5):
    model.eval()
    x, y = next(iter(loader))
    x, y = x.to(device), y.to(device)
    preds = model(x).argmax(dim=1)

    plt.figure(figsize=(10, 2))
    for i in range(n):
        plt.subplot(1, n, i+1)
        plt.imshow(x[i].cpu().squeeze(), cmap='gray')
        plt.title(f"T:{y[i].item()} P:{preds[i].item()}")
        plt.axis('off')
    plt.show()

# visualize(manual_model, test_loader) # Uncomment this later
visualize(sequential_model, test_loader)

## Assignment
* Load and preprocess CIFAR100 dataset (not CIFAR10)
* Build a feedforward network for it. You can experiment around with number of layers and and neurons in each layer and different activation functions
* You are allowed to use nn.functional. (convolutions _might_ make your accuracy better)