In [13]:
from tqdm import tqdm
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from helper_functions import accuracy_fn
from safetensors.torch import save_model

In [14]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

### Data Processing

In [15]:
data_transforms = transforms.Compose([
    transforms.ToTensor()
])

In [16]:
train_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=data_transforms
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=data_transforms
)

In [17]:
batch_size = 16

# put custom dataset to dataloader
train_dl = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dl = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [18]:
# get how many classes are there
train_data.classes, len(train_data.classes)

(['0 - zero',
  '1 - one',
  '2 - two',
  '3 - three',
  '4 - four',
  '5 - five',
  '6 - six',
  '7 - seven',
  '8 - eight',
  '9 - nine'],
 10)

In [19]:
# check shapes of dataloader
feature, label = next(iter(train_dl))
feature.shape, label.shape

(torch.Size([16, 1, 28, 28]), torch.Size([16]))

### Create Model

In [20]:
class TinyVGG(nn.Module):

    def __init__(self):
        super().__init__()

        self.block_1 = nn.Sequential(
            nn.Conv2d(1, 10, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(10, 10, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(p=0.2)
        )

        self.block_2 = nn.Sequential(
            nn.Conv2d(10, 10, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(10, 10, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(p=0.2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(10*7*7, len(train_data.classes)+1) # 7 * 7 because maxpool has been done twice which divides the shape of image by 2 twice
        )

    def forward(self, x):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.classifier(x)
        return x

model = TinyVGG().to(device)

In [21]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01)

### Train Model

In [22]:
torch.manual_seed(20)

epochs = 5

for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n------")
    
    # TRAINING
    train_loss, train_acc = 0, 0
    model.train()
    for batch, (X, y) in enumerate(train_dl):

        X, y = X.to(device), y.to(device)

        # forward pass
        train_pred = model(X)

        # metrics
        loss = loss_fn(train_pred, y)
        train_loss += loss
        train_acc += accuracy_fn(y_true=y, y_pred=train_pred.argmax(dim=1))

        # backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # print metrics
    train_loss /= len(train_dl)
    train_acc /= len(train_dl)
    print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.2f}%")

    
    # TESTING
    test_loss, test_acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in test_dl:

            X, y = X.to(device), y.to(device)


            # forward pass
            test_pred = model(X)

            # metrics
            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))
        
        # print metrics
        test_loss /= len(test_dl)
        test_acc /= len(test_dl)
        print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 0
------
Train Loss: 0.8058 | Train Accuracy: 71.27%


 20%|██        | 1/5 [00:22<01:31, 22.84s/it]

Test Loss: 0.1132 | Test Accuracy: 96.51%
Epoch: 1
------
Train Loss: 0.1577 | Train Accuracy: 94.96%


 40%|████      | 2/5 [00:45<01:07, 22.48s/it]

Test Loss: 0.0733 | Test Accuracy: 97.64%
Epoch: 2
------
Train Loss: 0.1217 | Train Accuracy: 96.21%


 60%|██████    | 3/5 [01:07<00:44, 22.39s/it]

Test Loss: 0.0599 | Test Accuracy: 98.05%
Epoch: 3
------
Train Loss: 0.1048 | Train Accuracy: 96.72%


 80%|████████  | 4/5 [01:29<00:22, 22.24s/it]

Test Loss: 0.0571 | Test Accuracy: 98.32%
Epoch: 4
------
Train Loss: 0.0940 | Train Accuracy: 97.03%


100%|██████████| 5/5 [01:51<00:00, 22.33s/it]

Test Loss: 0.0562 | Test Accuracy: 98.29%





### Save Model

In [23]:
model.state_dict()

OrderedDict([('block_1.0.weight',
              tensor([[[[ 0.3005, -0.1971,  0.2649],
                        [-0.1328, -0.2262, -0.3223],
                        [-0.3301, -0.0180,  0.0381]]],
              
              
                      [[[ 0.2602,  0.6841,  0.4374],
                        [ 0.0057,  1.0231,  0.5137],
                        [ 0.1119,  0.5229,  0.0846]]],
              
              
                      [[[ 0.2059,  0.4934,  0.4294],
                        [ 0.2622, -0.0830,  0.2392],
                        [-0.1487, -0.2794, -0.3746]]],
              
              
                      [[[ 0.3304, -0.0987,  0.2722],
                        [ 0.1458, -0.2252,  0.0206],
                        [-0.3316, -0.0030,  0.2456]]],
              
              
                      [[[-0.0084, -0.3086,  0.1821],
                        [ 0.0251,  0.0338,  0.0438],
                        [-0.2829, -0.2846, -0.3113]]],
              
              
           

In [24]:
save_model(model, "tinyvgg.safetensors")