In [1]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch import nn
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

training_data = datasets.FashionMNIST(
        root="data",
        train=True,
        download=True,
        transform=ToTensor()
        )

test_data = datasets.FashionMNIST(
        root="data",
        train=False,
        download=True,
        transform=ToTensor()
        )
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

100%|██████████| 26.4M/26.4M [00:01<00:00, 16.3MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 1.87MB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 12.8MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 18.2MB/s]


In [4]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using mps device


In [6]:
t1 = torch.tensor([1.0, 2.0, 3.0]).to(device)
t2 = torch.tensor([4.0, 5.0, 6.0])
t1 + t2

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, mps:0 and cpu!

In [7]:
class NeuralNetwork(nn.Module):
    def __init__(self, nr_neurons=128):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
                nn.Linear(28 * 28, nr_neurons),
                nn.ReLU(),
                nn.Linear(nr_neurons, nr_neurons),
                nn.SELU(),
                nn.Linear(nr_neurons, 10)
                )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


model = NeuralNetwork(nr_neurons=64).to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): SELU()
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)


In [8]:
X = torch.rand(1, 28, 28, device=device)
print(X.shape)
logits = model(X)
print(logits)
pred_probab = nn.Softmax(dim=1)(logits)
print(pred_probab)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

torch.Size([1, 28, 28])
tensor([[ 0.1647,  0.0572, -0.0734,  0.1695, -0.1053,  0.1051, -0.0323, -0.0561,
          0.2587,  0.0950]], device='mps:0', grad_fn=<LinearBackward0>)
tensor([[0.1105, 0.0992, 0.0871, 0.1110, 0.0843, 0.1041, 0.0907, 0.0886, 0.1214,
         0.1030]], device='mps:0', grad_fn=<SoftmaxBackward0>)
Predicted class: tensor([8], device='mps:0')


In [9]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

In [59]:
X = train_dataloader.dataset[0][0].to(device)
print(X.shape)
y = train_dataloader.dataset[0][1]
print(f"Class: {y}")
y = torch.tensor(y).to(device)
y = y.reshape(1, )
print(y.shape)
model.train()
pred = model(X)
loss = loss_fn(pred, torch.tensor(y).to(device))
print(f"Predicted class: {pred.argmax(1)}")
print(f"Loss: {loss.item()}")
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()

torch.Size([1, 28, 28])
Class: 9
torch.Size([1])
Predicted class: tensor([9], device='mps:0')
Loss: 0.01793699711561203


  loss = loss_fn(pred, torch.tensor(y).to(device))


In [60]:
dataloader = train_dataloader
size = len(dataloader.dataset)
# Set the model to training mode - important for batch normalization and dropout layers
# Unnecessary in this situation but added for best practices
model.train()
for batch, (X, y) in enumerate(dataloader):
    # Compute prediction and loss
    pred = model(X.to(device))
    loss = loss_fn(pred, torch.tensor(y).to(device))

    # Backpropagation
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if batch % 100 == 0:
        loss, current = loss.item(), batch * dataloader.batch_size + len(X)
        print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

  loss = loss_fn(pred, torch.tensor(y).to(device))


loss: 3.440949  [   64/60000]
loss: 1.795173  [ 6464/60000]
loss: 1.436284  [12864/60000]
loss: 1.217893  [19264/60000]
loss: 0.978774  [25664/60000]
loss: 1.016475  [32064/60000]
loss: 0.847417  [38464/60000]
loss: 0.769276  [44864/60000]
loss: 0.913644  [51264/60000]
loss: 0.618301  [57664/60000]
