# Deep Learning Fundamentals — Minimal Notebook
Minimal lab version. Tasks: tensors, FFNN, gradient check, CNN, LR sweep, conv arithmetic.
Author: student


In [ ]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T
import matplotlib.pyplot as plt
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(1337)
print('Device:', device)

## Load dataset (MNIST)

In [ ]:
transform = T.Compose([T.ToTensor()])
train_set = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=256, shuffle=False)
imgs, labels = next(iter(train_loader))
plt.imshow(imgs[0][0], cmap='gray')
plt.title(f'label: {labels[0].item()}')
plt.show()

## FFNN model + training

In [ ]:
class FFNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 10)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = FFNN().to(device)
opt = torch.optim.SGD(model.parameters(), lr=0.05)
criterion = nn.CrossEntropyLoss()
for epoch in range(3):
    model.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        opt.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        opt.step()
        total_loss += loss.item()
    print(f'Epoch {epoch+1}, loss={total_loss/len(train_loader):.4f}')

## Gradient check (tiny network example)

In [ ]:
eps = 1e-4
x = torch.randn(5, 3, device=device)
W = nn.Parameter(torch.randn(2, 3, device=device)*0.1)
b = nn.Parameter(torch.zeros(2, device=device))
y = torch.randint(0, 2, (5,), device=device)

def forward_fc(x):
    return x @ W.T + b

out = forward_fc(x)
loss = F.cross_entropy(out, y)
loss.backward()
grad_W = W.grad.clone().flatten()

num_grad = []
for i in range(W.numel()):
    old = W.data.flatten()[i].item()
    W.data.flatten()[i] = old + eps
    l1 = F.cross_entropy(forward_fc(x), y).item()
    W.data.flatten()[i] = old - eps
    l2 = F.cross_entropy(forward_fc(x), y).item()
    W.data.flatten()[i] = old
    num_grad.append((l1 - l2)/(2*eps))

rel_err = torch.norm(grad_W - torch.tensor(num_grad, device=device)) / torch.norm(grad_W + torch.tensor(num_grad, device=device))
print('Relative error:', rel_err.item())

## CNN model + quick training

In [ ]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, 5, padding=2)
        self.pool = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1)
        self.fc1 = nn.Linear(16*14*14, 64)
        self.fc2 = nn.Linear(64, 10)
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = F.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

cnn = SimpleCNN().to(device)
opt = torch.optim.SGD(cnn.parameters(), lr=0.01)
crit = nn.CrossEntropyLoss()
for epoch in range(3):
    cnn.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        opt.zero_grad()
        out = cnn(x)
        loss = crit(out, y)
        loss.backward()
        opt.step()
        total_loss += loss.item()
    print(f'[CNN] Epoch {epoch+1}, loss={total_loss/len(train_loader):.4f}')

## LR sweep

In [ ]:
lrs = [0.005, 0.05, 0.5]
results = {}
for lr in lrs:
    model = FFNN().to(device)
    opt = torch.optim.SGD(model.parameters(), lr=lr)
    crit = nn.CrossEntropyLoss()
    for epoch in range(3):
        model.train()
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            opt.zero_grad()
            out = model(x)
            loss = crit(out, y)
            loss.backward()
            opt.step()
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(device), y.to(device)
            preds = model(x).argmax(1)
            correct += (preds==y).sum().item()
            total += y.size(0)
    results[lr] = correct/total
print('LR sweep results:', results)

## Conv arithmetic check

In [ ]:
def conv_out(h, k, s, p, d):
    return int((h + 2*p - d*(k-1) - 1)/s + 1)

for (h, k, s, p, d) in [(28,5,1,2,1),(28,3,2,1,1),(32,3,1,0,1),(64,7,2,3,1),(64,3,1,1,2)]:
    x = torch.zeros(1,1,h,h)
    conv = nn.Conv2d(1,1,k,s,p,d)
    y = conv(x)
    print((h,k,s,p,d), 'formula:', conv_out(h,k,s,p,d), 'torch:', y.shape[-1])