In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import time

In [35]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)
batch_size = 2
epochs = 3

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

LAYER_ONE_SIZE = 784
LAYER_TWO_SIZE = 200
LAYER_THREE_SIZE = 80
LAYER_FOUR_SIZE = 10



Using cpu device
Shape of X [N, C, H, W]: torch.Size([2, 1, 28, 28])
Shape of y: torch.Size([2]) torch.int64


In [36]:
# Define model
class NeuralNetwork1(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, LAYER_TWO_SIZE, bias = False),
            nn.Sigmoid(),
            nn.Linear(LAYER_TWO_SIZE, LAYER_THREE_SIZE, bias = False),
            nn.Sigmoid(),
            nn.Linear(LAYER_THREE_SIZE, LAYER_FOUR_SIZE, bias = False)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
        
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return (100*correct)

model1 = NeuralNetwork1().to(device)
# model2 = NeuralNetwork2().to(device)
print(model1)
loss_fn = nn.CrossEntropyLoss()
optimizer1 = torch.optim.SGD(model1.parameters(), lr=1e-2)
# optimizer2 = torch.optim.SGD(model2.parameters(), lr=1e-3)

NeuralNetwork1(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=200, bias=False)
    (1): Sigmoid()
    (2): Linear(in_features=200, out_features=80, bias=False)
    (3): Sigmoid()
    (4): Linear(in_features=80, out_features=10, bias=False)
  )
)


In [38]:
epochs = 2
start1 = time.time()
for t in range(epochs):
    print(f"Model 1 Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model1, loss_fn, optimizer1)
    test(test_dataloader, model1, loss_fn)
end1 = time.time()
print("Done!", end1-start1)

Model 1 Epoch 1
-------------------------------
loss: 0.190320  [    2/60000]
loss: 0.011296  [  202/60000]
loss: 0.013661  [  402/60000]
loss: 0.098771  [  602/60000]
loss: 0.482596  [  802/60000]
loss: 0.019149  [ 1002/60000]
loss: 0.801377  [ 1202/60000]
loss: 0.039846  [ 1402/60000]
loss: 0.127609  [ 1602/60000]
loss: 0.005249  [ 1802/60000]
loss: 0.806561  [ 2002/60000]
loss: 0.029430  [ 2202/60000]
loss: 0.006587  [ 2402/60000]
loss: 0.562397  [ 2602/60000]
loss: 0.424573  [ 2802/60000]
loss: 0.088213  [ 3002/60000]
loss: 0.031906  [ 3202/60000]
loss: 0.015859  [ 3402/60000]
loss: 0.117567  [ 3602/60000]
loss: 0.016858  [ 3802/60000]
loss: 0.008090  [ 4002/60000]
loss: 0.148585  [ 4202/60000]
loss: 0.006214  [ 4402/60000]
loss: 0.345707  [ 4602/60000]
loss: 1.357677  [ 4802/60000]
loss: 0.143221  [ 5002/60000]
loss: 0.182180  [ 5202/60000]
loss: 0.008501  [ 5402/60000]
loss: 0.183328  [ 5602/60000]
loss: 1.003575  [ 5802/60000]
loss: 0.001594  [ 6002/60000]
loss: 0.022815  [ 6202