In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms as T

In [2]:
device = torch.device("mps" if torch.mps.is_available() else "cpu")
print(device)

mps


In [3]:
transform = T.Compose(
    [
        T.ToTensor(),
        T.RandomHorizontalFlip(),
        T.Normalize((0.5,), (0.5,))
    ]
)

In [4]:
trainset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
testset = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

In [5]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, shuffle=True, pin_memory=False if device == "mps" else True, num_workers=4)
testloader = torch.utils.data.DataLoader(testset, batch_size=8, shuffle=True, pin_memory=False if device == "mps" else True, num_workers=4)

In [6]:
class NNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 2, 1, 1)
        self.conv2 = nn.Conv2d(32, 64, 2, 1, 1)

        self.pool = nn.AvgPool2d(2)

        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [7]:
model = NNet()
model.to(device) 

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

In [None]:
epochs = 4

for i in range(epochs):
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        out = model(inputs)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    print(f"EPOCH: {i} -- LOSS: {running_loss/len(trainloader)}")



libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x10e1ebc40>
Traceback (most recent call last):
  File "/Users/jacob/Dev/Sandbox/ML/.MLsbx/lib/python3.13/site-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/Users/jacob/Dev/Sandbox/ML/.MLsbx/lib/python3.13/site-packages/torch/utils/data/dataloader.py", line 1628, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/opt/homebrew/Cellar/python@3.13/3.13.7/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/process.py", line 149, in join
    res =

KeyboardInterrupt: 

In [12]:
# Eval
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in testloader:
        inputs, labels = inputs.to(device), labels.to(device)
        out = model(inputs)

        _, pred = torch.max(out, 1)
        total += labels.size(0)
        correct += (pred == labels).sum().item()

print(f"ACC: {100 * correct / total}%")

ACC: 97.68%


In [13]:
print(model.state_dict)

<bound method Module.state_dict of NNet(
  (conv1): Conv2d(1, 32, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)>
