In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break


Using cpu device
Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [5]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 4)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    correct = 0
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        correct += (pred.argmax(1) == y).type(torch.float).sum().item()

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    correct /= size
    print(f"Training Error: \n Accuracy: {(100*correct):>0.1f}%")

    return correct
def test(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return correct

In [46]:
# thank you Claude!!

def DecimalToBinary(num):
    if num == 0:
        return torch.tensor([0, 0, 0, 0])
    elif num == 1:
        return torch.tensor([0, 0, 0, 1])
    elif num == 2:
        return torch.tensor([0, 0, 1, 0])
    elif num == 3:
        return torch.tensor([0, 0, 1, 1])
    elif num == 4:
        return torch.tensor([0, 1, 0, 0])
    elif num == 5:
        return torch.tensor([0, 1, 0, 1])
    elif num == 6:
        return torch.tensor([0, 1, 1, 0])
    elif num == 7:
        return torch.tensor([0, 1, 1, 1])
    elif num == 8:
        return torch.tensor([1, 0, 0, 0])
    elif num == 9:
        return torch.tensor([1, 0, 0, 1])

In [14]:
model(X).shape

torch.Size([64, 10])

In [52]:
DecimalToBinary(9)

tensor([1, 0, 0, 1])

In [15]:
y

tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2, 3, 5, 1, 2,
        4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3])

In [6]:
torch.manual_seed(0)
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=10, bias=True)
  )
)


In [7]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [8]:
epochs=10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)

Epoch 1
-------------------------------
loss: 2.372502  [   64/60000]
loss: 2.240199  [ 6464/60000]
loss: 2.173671  [12864/60000]
loss: 1.986791  [19264/60000]
loss: 1.956882  [25664/60000]
loss: 1.902747  [32064/60000]
loss: 1.775721  [38464/60000]
loss: 1.827866  [44864/60000]
loss: 1.688143  [51264/60000]
loss: 1.618635  [57664/60000]
Training Error: 
 Accuracy: 54.8%
Test Error: 
 Accuracy: 75.4%, Avg loss: 1.591632 

Epoch 2
-------------------------------
loss: 1.629079  [   64/60000]
loss: 1.506394  [ 6464/60000]
loss: 1.563687  [12864/60000]
loss: 1.365137  [19264/60000]
loss: 1.391124  [25664/60000]
loss: 1.379962  [32064/60000]
loss: 1.282315  [38464/60000]
loss: 1.434766  [44864/60000]
loss: 1.293364  [51264/60000]
loss: 1.246456  [57664/60000]
Training Error: 
 Accuracy: 77.0%
Test Error: 
 Accuracy: 80.1%, Avg loss: 1.216154 

Epoch 3
-------------------------------
loss: 1.267438  [   64/60000]
loss: 1.142514  [ 6464/60000]
loss: 1.227036  [12864/60000]
loss: 1.076298  [1

In [9]:
import seaborn as sns
sns.lineplot((1-train_acc, 1-test_acc))


NameError: name 'train_acc' is not defined