Imports + dataset + loaders

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.ToTensor(),                # Gives [0,1]
    transforms.Lambda(lambda x: x.view(-1))  # Flatten 1×28×28 → 784
])
train_dataset = datasets.MNIST('.', train=True,  download=True, transform=transform)
test_dataset  = datasets.MNIST('.', train=False, download=True, transform=transform)

train_loader  = DataLoader(train_dataset, batch_size=64,  shuffle=True)
test_loader   = DataLoader(test_dataset,  batch_size=1000, shuffle=False)

100%|██████████| 9.91M/9.91M [00:00<00:00, 18.2MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 504kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.48MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.56MB/s]


Perceptron Model

In [2]:
class PerceptronOVR(nn.Module):
    def __init__(self, input_dim=784, num_classes=10):
        super().__init__()
        # weight matrix [10×784] and bias [10]
        self.weights = nn.Parameter(torch.zeros(num_classes, input_dim))
        self.biases  = nn.Parameter(torch.zeros(num_classes))
    def forward(self, x):
        # x: [B,784] → scores [B,10]
        return x @ self.weights.t() + self.biases

Model + Loss Function + update rule

In [3]:
def perceptron_loss(scores, labels):
    # scores [B,10], labels [B]
    B, C = scores.size()

    # y_one_hot: +1 for true class, –1 otherwise
    y_oh = -torch.ones_like(scores)
    y_oh[torch.arange(B), labels] = 1.0

    # hinge loss max(0, –y·s)
    loss = torch.clamp(-y_oh * scores, min=0.0)
    return loss.mean()


# 4. Training setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model        = PerceptronOVR().to(device)
loss_function = perceptron_loss
update_rule   = optim.SGD(model.parameters(), lr=1e-2)

Training + predicting + output

In [4]:
def run(train_loader, test_loader, model, loss_function, update_rule):
    max_epochs = 10
    for epoch in range(1, max_epochs+1):
        # Training
        model.train()
        total_loss = 0.0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            update_rule.zero_grad()
            scores = model(x)
            loss   = loss_function(scores, y)
            loss.backward()
            update_rule.step()
            total_loss += loss.item() * x.size(0)
        avg_train_loss = total_loss / len(train_loader.dataset)
        # Evaluation
        model.eval()
        correct = 0
        with torch.no_grad():
            for x, y in test_loader:
                x, y = x.to(device), y.to(device)
                preds = model(x).argmax(dim=1)
                correct += (preds == y).sum().item()
        test_acc = 100 * correct / len(test_loader.dataset)
        print(f"Epoch {epoch}/{max_epochs} — Train Loss: {avg_train_loss:.4f} — Test Acc: {test_acc:.2f}%")

In [5]:
run(train_loader, test_loader, model, loss_function, update_rule)


Epoch 1/10 — Train Loss: 0.0001 — Test Acc: 76.39%
Epoch 2/10 — Train Loss: 0.0000 — Test Acc: 85.86%
Epoch 3/10 — Train Loss: 0.0000 — Test Acc: 77.00%
Epoch 4/10 — Train Loss: 0.0000 — Test Acc: 85.27%
Epoch 5/10 — Train Loss: 0.0000 — Test Acc: 85.96%
Epoch 6/10 — Train Loss: 0.0000 — Test Acc: 89.24%
Epoch 7/10 — Train Loss: 0.0000 — Test Acc: 86.23%
Epoch 8/10 — Train Loss: 0.0000 — Test Acc: 76.48%
Epoch 9/10 — Train Loss: 0.0000 — Test Acc: 82.20%
Epoch 10/10 — Train Loss: 0.0000 — Test Acc: 88.73%


In [6]:
torch.save(model.state_dict(), 'perceptron_oneVall.pth')