In [29]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
import numpy as np
from torchvision.transforms import ToTensor, Lambda
from torch.optim import SGD

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [30]:
class MyNN(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.flatten = nn.Flatten()
        self.stack = nn.Sequential(
            nn.Linear(784, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 120),
            nn.ReLU(),
            nn.Linear(120, 10)
        )
    
    def forward(self, x): 
        return self.stack(self.flatten(x))
    
    
model = MyNN()

In [31]:

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    # target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
    # target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))
)

train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [32]:
learning_rate = 0.01
batch_size = 64
epochs = 10


loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=learning_rate)

In [33]:
def train(dl: DataLoader, model: MyNN, loss, opt: SGD):
    model.train()
    for i, (x, y) in enumerate(dl):
        # print(y)
        p = model(x)
        # print(p)
        l = loss(p, y)
        
        l.backward()
        opt.step()
        opt.zero_grad()

In [34]:
def test(dl: DataLoader, model: MyNN, loss):
    model.eval()
    sz = len(dl.dataset)
    b = len(dl)
    # print(b)
    c, tl = 0, 0
    
    with torch.no_grad():
        for x, y in dl:
            p = model(x)
            tl += loss(p, y).item()
            # print(p.argmax(1))
            c += (p.argmax(1) == y).type(torch.float).sum().item()
            
    tl /= b
    c /= sz
    
    print(f"Loss: {tl}")
    print(f"Acc: {c}")
            

In [35]:
for t in range(epochs):
    print(f"Epoch {t}.")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
    
torch.save(model, 'first.pt')

Epoch 0.
Loss: 0.9932247635665213
Acc: 0.596
Epoch 1.
Loss: 0.7822378406858748
Acc: 0.7322
Epoch 2.
Loss: 0.6506427399292114
Acc: 0.7546
Epoch 3.
Loss: 0.5669387795363262
Acc: 0.8035
Epoch 4.
Loss: 0.5303962393930763
Acc: 0.8111
Epoch 5.
Loss: 0.49942008837772783
Acc: 0.8252
Epoch 6.
Loss: 0.4863054866244079
Acc: 0.8268
Epoch 7.
Loss: 0.47773106348742345
Acc: 0.8278
Epoch 8.
Loss: 0.4931648350824976
Acc: 0.8225
Epoch 9.
Loss: 0.4685398661976407
Acc: 0.8324
