In [1]:
import torch 
import torch.nn as nn
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)) #mean, std
])

In [3]:
train_dataset = torchvision.datasets.MNIST(root = './data', train=True, download=True, transform = transform)
test_dataset = torchvision.datasets.MNIST(root = './data', train=False, download=True, transform = transform)

100%|██████████| 9.91M/9.91M [00:00<00:00, 12.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 345kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.15MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.61MB/s]


In [4]:
train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

In [12]:
class MNISTClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten() #1 channel ->grayscale #flatten Linear Layer from [1,28,25]->[784]
        self.layers = nn.Sequential(
            nn.Linear(784, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x= self.flatten(x)
        x= self.layers(x)
        return x

In [13]:
#Check for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using {device}')

Using cuda


# Initialize Model and move to device

In [16]:
model = MNISTClassifier().to(device)
print(model)

MNISTClassifier(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layers): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=10, bias=True)
  )
)


# Loss Function and Optimizer

In [21]:
loss_function = nn.CrossEntropyLoss()
print(loss_function)
optimizer = torch.optim.Adam(model.parameters(), lr =0.001)
print(optimizer)

CrossEntropyLoss()
Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)


# Training

In [28]:
def train_epoch(model, train_loader, loss_function, optimizer, device, print_every=50):
    model.train()
    running_loss = 0.0
    running_correct = 0
    running_total = 0

    num_batches = len(train_loader)

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()

        # accumulate stats
        running_loss += loss.item()
        _, predicted = output.max(1)
        running_total += target.size(0)
        running_correct += (predicted == target).sum().item()

        # print every N batches or at the end of the epoch
        if ((batch_idx + 1) % print_every == 0) or ((batch_idx + 1) == num_batches):
            avg_loss = running_loss / ((batch_idx % print_every) + 1)
            accuracy = 100.0 * running_correct / running_total
            processed = (batch_idx + 1) * data.size(0)
            total = len(train_loader.dataset)
            print(f'[{processed}/{total}] Loss: {avg_loss:.3f} | Acc: {accuracy:.1f}%', flush=True)

            # reset window stats
            running_loss = 0.0
            running_correct = 0
            running_total = 0

# Evaluation

In [29]:
def evaluate(model, test_loader, device):
    model.eval()
    correct =0
    total = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total +=targets.size(0)
            correct += predicted.eq(targets).sum().item()

    return 100. *correct/total

# Training Loop

In [30]:
num_epochs = 10 
for epoch in range(num_epochs):
    print(f'\nEpoch:{epoch+1}')
    train_epoch(model, train_loader, loss_function, optimizer, device)
    accuracy = evaluate(model, test_loader, device)
    print(f'Test Accuracy:{accuracy:.2f}%')


Epoch:1
[3200/60000] Loss: 0.014 | Acc: 99.6%
[6400/60000] Loss: 0.013 | Acc: 99.7%
[9600/60000] Loss: 0.013 | Acc: 99.5%
[12800/60000] Loss: 0.012 | Acc: 99.6%
[16000/60000] Loss: 0.015 | Acc: 99.4%
[19200/60000] Loss: 0.014 | Acc: 99.5%
[22400/60000] Loss: 0.009 | Acc: 99.7%
[25600/60000] Loss: 0.011 | Acc: 99.6%
[28800/60000] Loss: 0.011 | Acc: 99.6%
[32000/60000] Loss: 0.013 | Acc: 99.5%
[35200/60000] Loss: 0.016 | Acc: 99.4%
[38400/60000] Loss: 0.014 | Acc: 99.5%
[41600/60000] Loss: 0.022 | Acc: 99.1%
[44800/60000] Loss: 0.023 | Acc: 99.1%
[48000/60000] Loss: 0.023 | Acc: 99.2%
[51200/60000] Loss: 0.031 | Acc: 99.1%
[54400/60000] Loss: 0.025 | Acc: 99.1%
[57600/60000] Loss: 0.020 | Acc: 99.4%
[30016/60000] Loss: 0.018 | Acc: 99.3%
Test Accuracy:97.71%

Epoch:2
[3200/60000] Loss: 0.008 | Acc: 99.8%
[6400/60000] Loss: 0.009 | Acc: 99.7%
[9600/60000] Loss: 0.009 | Acc: 99.8%
[12800/60000] Loss: 0.006 | Acc: 99.9%
[16000/60000] Loss: 0.006 | Acc: 99.8%
[19200/60000] Loss: 0.008 | Acc