# Pytroch main structure

## imports and definations

### Imports

In [1]:
import torch
from torch import nn

import torchvision

### Classes

In [2]:
class MyModel(nn.Module):

    def __init__(self) -> None:
        super().__init__()
        self.linear_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        results = self.linear_stack(x)
        return results

### Functions

In [3]:
def train(model: nn.Module, data_loader: torch.utils.data.DataLoader, loss_fn: nn.Module, optimizer: torch.optim.Optimizer, device:str='cuda'):
    number_of_batches = len(data_loader)
    model.train()
    for i, (images, labels) in enumerate(data_loader):
        images = images.to(device)
        labels = labels.to(device)

        prediction = model(images)
        loss = loss_fn(prediction, labels)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if i % 500 == 0:
            print(f'{i+1}/{number_of_batches}, loss = {loss.item():>4f}')

In [4]:
def test(model: nn.Module, data_loader: torch.utils.data.DataLoader, loss_fn: nn.Module, device:str='cuda') -> (torch.float, torch.float):
    data_size = len(data_loader.dataset)
    number_of_batches = len(data_loader)

    model.eval()

    loss = 0
    correct = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)

            prediction = model(images)
            loss += loss_fn(prediction, labels).item()
            correct += (prediction.argmax(1) ==
                        labels).type(torch.float).sum().item()

        loss /= number_of_batches
        correct /= data_size

    return correct, loss

## Run

### Define the device that we are going to use

In [5]:
device = 'cpu'

if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device ='mps'

print(f'device: {device}')

device: cuda


### load a dataset

In [6]:
train_data = torchvision.datasets.MNIST(
    root='data', train=True, transform=torchvision.transforms.ToTensor(), download=True)


test_data = torchvision.datasets.MNIST(
    root='data', train=False, transform=torchvision.transforms.ToTensor(), download=True)

In [7]:
batch_size = 64

train_data_loader = torch.utils.data.DataLoader(
    train_data, batch_size=batch_size)

test_data_loader = torch.utils.data.DataLoader(
    test_data, batch_size=batch_size)

### Create a model

In [8]:
my_model = MyModel()

my_model = my_model.to(device)

### Define loss function and optimizer

In [9]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(my_model.parameters(), lr=1e-3)

### Train the model and print test results

In [10]:
for epoch in range(5):
    print(f'in epoch: {epoch}')
    train(my_model, train_data_loader, loss_fn, optimizer, device)
    accuracy, loss = test(my_model, test_data_loader, loss_fn, device)
    print(f'accuracy: {accuracy:.2f}, loss: {loss:.2f}')

in epoch: 0
1/938, loss = 2.309347
501/938, loss = 2.277762
accuracy: 0.37, loss: 2.26
in epoch: 1
1/938, loss = 2.258945
501/938, loss = 2.226407
accuracy: 0.57, loss: 2.19
in epoch: 2
1/938, loss = 2.188202
501/938, loss = 2.144634
accuracy: 0.66, loss: 2.08
in epoch: 3
1/938, loss = 2.071887
501/938, loss = 2.002120
accuracy: 0.71, loss: 1.89
in epoch: 4
1/938, loss = 1.880979
501/938, loss = 1.761243
accuracy: 0.73, loss: 1.60


### Save the model

In [11]:
torch.save(my_model.state_dict(), "my_model.pth")

### load the model

In [12]:
loaded_model = MyModel().to(device)
loaded_model.load_state_dict(torch.load('my_model.pth'))

<All keys matched successfully>