In [1]:
import torch
from torch import nn
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

In [2]:
train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transforms.ToTensor(),
    target_transform=None
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=transforms.ToTensor(),
    target_transform=None
)

In [3]:
class_names = train_data.classes
class_names

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

In [4]:
train_data

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [5]:
from torch.utils.data import DataLoader

BATCH_SIZE=32

train_dataloader=DataLoader(
    train_data,
    batch_size=BATCH_SIZE,
    shuffle=True
)

test_dataloader=DataLoader(
    test_data,
    batch_size=BATCH_SIZE,
    shuffle=False
)
len(train_dataloader) , len(test_dataloader)

(1875, 313)

In [6]:
train_features_batch , train_labels_batch = next(iter(train_dataloader))

In [7]:
class FashionMNISTModelV0(nn.Module):
    def __init__(self,input_shape:int,hidden_units:int,output_shape:int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape,out_features=hidden_units),
            nn.Linear(in_features=hidden_units,out_features=output_shape)
        )

    def forward(self,x):
        return self.layer_stack(x)

In [8]:
torch.manual_seed(67)
model_0 = FashionMNISTModelV0(input_shape=784,hidden_units=10,output_shape=len(class_names)).to("cpu")

model_0

FashionMNISTModelV0(
  (layer_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=10, bias=True)
  )
)

In [9]:
dummy = torch.rand([1,1,28,28])
model_0(dummy)

tensor([[-0.3895, -0.2405, -0.0236, -0.0275, -0.0554, -0.5227,  0.4288,  0.3914,
         -0.1494,  0.2538]], grad_fn=<AddmmBackward0>)

In [10]:
from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_0.parameters(),lr=0.1)

In [18]:
epochs = 2

for epoch in range(epochs):
    train_loss = 0
    for batch , (X,y) in enumerate(train_dataloader):
        model_0.train()
        y_pred = model_0(X)
        loss = loss_fn(y_pred,y)
        train_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    train_loss /= len(train_dataloader)

    test_loss , test_acc = 0 , 0
    model_0.eval()
    with torch.inference_mode():
        for X_test,y_test in test_dataloader:
            test_pred = model_0(X_test)
            test_loss += loss_fn(test_pred,y_test)
            test_acc += accuracy_fn(y_true=y_test,y_pred=test_pred.argmax(dim=1))
        test_loss /= len(test_data)
        test_acc /= len(test_dataloader)

    print(f"{train_loss} , {test_loss} , {test_acc}")

0.45463812351226807 , 0.014714744873344898 , 83.6261980830671
0.44458329677581787 , 0.014390682801604271 , 83.89576677316293


In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [24]:
class FashionMNISTModelV1(nn.Module):
    def __init__(self,input_shape:int,hidden_units:int,output_shape:int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape,out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units,out_features=output_shape),
            nn.ReLU()
        )

    def forward(self,x: torch.Tensor):
        return self.layer_stack(x)

In [25]:
model_1 = FashionMNISTModelV1(input_shape=784,hidden_units=10,output_shape=len(class_names)).to(device)
next(model_1.parameters()).device

device(type='cuda', index=0)

In [28]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_1.parameters(),lr=0.1)

In [None]:
epochs = 5

for epoch in range(epochs):
    train_loss = 0
    for batch , (X,y) in enumerate(train_dataloader):
        model_1.train()
        X , y = X.to(device) , y.to(device)
        y_pred = model_1(X)
        loss = loss_fn(y_pred,y)
        train_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    train_loss /= len(train_dataloader)
    test_loss , test_acc = 0 , 0
    model_1.eval()
    with torch.inference_mode():
        for X_test , y_test in test_dataloader:
            X_test , y_test = X_test.to(device) , y_test.to(device)
            test_pred = model_1(X_test)
            test_loss += loss_fn(test_pred,y_test)
            test_acc += accuracy_fn(y_true=y_test,y_pred=test_pred.argmax(dim=1))
        test_loss /= len(test_data)
        test_acc /= len(test_dataloader)

    print(f"{train_loss} || {test_loss} || {test_acc}")

0.882680356502533 || 0.02306642010807991 || 73.19289137380191
0.6994467973709106 || 0.021781934425234795 || 74.46086261980831
0.6722455620765686 || 0.021867996081709862 || 74.37100638977635
0.6542568206787109 || 0.02315579727292061 || 72.01477635782747
