In [14]:
import sys
import torch
from torch import nn
from torchinfo import summary
from torchvision import transforms, datasets
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision.models import resnet18, ResNet18_Weights
from torch.utils.data import DataLoader, random_split

import os
from pathlib import Path
from tqdm.auto import tqdm
from timeit import default_timer as timer

In [15]:
BATCH_SIZE = 32
NUM_WORKERS = os.cpu_count()
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [16]:
# Resizing to the 224x224 standard and using normalization 

simple_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_transform_trivial_augment = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.TrivialAugmentWide(num_magnitude_bins=31),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])

In [17]:
venv_dir = Path(sys.prefix)
project_root = venv_dir.parent
image_path = project_root/"data/food-101-pizzasteaksushi/images"

dataset = datasets.ImageFolder(
    root=image_path,
    transform=train_transform_trivial_augment,
)

train_size = int(0.8*len(dataset))
train_dataset, test_dataset = random_split(dataset=dataset, lengths=[train_size, len(dataset)-train_size])
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)

In [18]:
def train_step(model: torch.nn.Module, dataloader: torch.utils.data.DataLoader, loss_fn: torch.nn.Module, optimizer: torch.optim.Optimizer):
    model.train()
    train_loss, train_acc = 0, 0

    for (X, y) in iter(dataloader):
        X, y = X.to(DEVICE), y.to(DEVICE)

        y_pred = model(X)

        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    train_acc /= len(dataloader)
    train_loss /= len(dataloader)
    return train_loss, train_acc

def test_step(model: torch.nn.Module, dataloader: torch.utils.data.DataLoader, loss_fn: torch.nn.Module):
    model.eval()
    test_loss, test_acc = 0, 0

    with torch.inference_mode():
        for (X, y) in iter(dataloader):
            X, y = X.to(DEVICE), y.to(DEVICE)

            y_pred = model(X)

            loss = loss_fn(y_pred, y)
            test_loss += loss.item()

            y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
            test_acc += (y_pred_class == y).sum().item()/len(y_pred)

    test_acc /= len(dataloader)
    test_loss /= len(dataloader)
    return test_loss, test_acc

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, test_dataloader: torch.utils.data.DataLoader, 
          loss_fn: torch.nn.Module, optimizer: torch.optim.Optimizer, scheduler=None,
          epochs=5):
    results = {
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": [],
    }

    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model, train_dataloader, loss_fn, optimizer)
        test_loss, test_acc = test_step(model, test_dataloader, loss_fn)
        if scheduler is not None:
            scheduler.step()

        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    return results

In [19]:
model = resnet18(weights=ResNet18_Weights.DEFAULT)

# resnet18 is pretrained on 1000 classes, we will modify is final fully connected layer for 3 classes
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 3)

model = model.to(DEVICE)
print(summary(model))

# Freezing base layers parameters, to only train the final layer
for param in model.parameters():
    param.requires_grad = False

for param in model.fc.parameters():
    param.requires_grad = True

NUM_EPOCHS = 20
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.001)

start_time = timer()
model_results = train(model, train_dataloader, test_dataloader, loss_fn, optimizer, epochs=NUM_EPOCHS)
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

Layer (type:depth-idx)                   Param #
ResNet                                   --
├─Conv2d: 1-1                            9,408
├─BatchNorm2d: 1-2                       128
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Sequential: 1-5                        --
│    └─BasicBlock: 2-1                   --
│    │    └─Conv2d: 3-1                  36,864
│    │    └─BatchNorm2d: 3-2             128
│    │    └─ReLU: 3-3                    --
│    │    └─Conv2d: 3-4                  36,864
│    │    └─BatchNorm2d: 3-5             128
│    └─BasicBlock: 2-2                   --
│    │    └─Conv2d: 3-6                  36,864
│    │    └─BatchNorm2d: 3-7             128
│    │    └─ReLU: 3-8                    --
│    │    └─Conv2d: 3-9                  36,864
│    │    └─BatchNorm2d: 3-10            128
├─Sequential: 1-6                        --
│    └─BasicBlock: 2-3                   --
│    │    └─Conv2d: 3-11                 73,728

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0576 | train_acc: 0.4529 | test_loss: 0.9670 | test_acc: 0.5367
Epoch: 2 | train_loss: 0.8848 | train_acc: 0.6450 | test_loss: 0.8234 | test_acc: 0.6908
Epoch: 3 | train_loss: 0.7674 | train_acc: 0.7462 | test_loss: 0.7264 | test_acc: 0.7555
Epoch: 4 | train_loss: 0.6922 | train_acc: 0.7804 | test_loss: 0.6635 | test_acc: 0.7966
Epoch: 5 | train_loss: 0.6376 | train_acc: 0.8042 | test_loss: 0.6064 | test_acc: 0.8070
Epoch: 6 | train_loss: 0.5971 | train_acc: 0.8200 | test_loss: 0.5690 | test_acc: 0.8328
Epoch: 7 | train_loss: 0.5541 | train_acc: 0.8333 | test_loss: 0.5639 | test_acc: 0.8147
Epoch: 8 | train_loss: 0.5383 | train_acc: 0.8446 | test_loss: 0.5384 | test_acc: 0.8235
Epoch: 9 | train_loss: 0.5167 | train_acc: 0.8363 | test_loss: 0.5187 | test_acc: 0.8224
Epoch: 10 | train_loss: 0.5088 | train_acc: 0.8462 | test_loss: 0.4913 | test_acc: 0.8558
Epoch: 11 | train_loss: 0.4871 | train_acc: 0.8433 | test_loss: 0.4723 | test_acc: 0.8657
Epoch: 12 | train_l

Model achieves 85% accuracy, I will now do some fine-tuning

In [20]:
# Unfreezing the last ResNet block, lowering learning rate and giving the optimizer only the unfrozen parameters with separate learning rates

for name, param in model.named_parameters():
    if "layer4" in name or "fc" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

optimizer = torch.optim.SGD([
    {'params': model.layer4.parameters(), 'lr': 0.0001},
    {'params': model.fc.parameters(), 'lr': 0.001}
], momentum=0.9)


# Improving data augmentation
better_train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.TrivialAugmentWide(num_magnitude_bins=31),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

venv_dir = Path(sys.prefix)
project_root = venv_dir.parent
image_path = project_root/"data/food-101-pizzasteaksushi/images"

dataset = datasets.ImageFolder(
    root=image_path,
    transform=better_train_transform,
)

train_size = int(0.8*len(dataset))
train_dataset, test_dataset = random_split(dataset=dataset, lengths=[train_size, len(dataset)-train_size])
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)


# Adding learning rate scheduling and using it in training loop
# Gives smoother scheduling compared to StepLR
scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=NUM_EPOCHS)

NUM_EPOCHS = 20

start_time = timer()
model_results = train(model, train_dataloader, test_dataloader, loss_fn, optimizer, scheduler=scheduler, epochs=NUM_EPOCHS)
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.4634 | train_acc: 0.8192 | test_loss: 0.4122 | test_acc: 0.8520
Epoch: 2 | train_loss: 0.4248 | train_acc: 0.8425 | test_loss: 0.3813 | test_acc: 0.8640
Epoch: 3 | train_loss: 0.3908 | train_acc: 0.8446 | test_loss: 0.3939 | test_acc: 0.8492
Epoch: 4 | train_loss: 0.3686 | train_acc: 0.8583 | test_loss: 0.3618 | test_acc: 0.8536
Epoch: 5 | train_loss: 0.3769 | train_acc: 0.8554 | test_loss: 0.4615 | test_acc: 0.8213
Epoch: 6 | train_loss: 0.3587 | train_acc: 0.8625 | test_loss: 0.3749 | test_acc: 0.8487
Epoch: 7 | train_loss: 0.3293 | train_acc: 0.8767 | test_loss: 0.3279 | test_acc: 0.8586
Epoch: 8 | train_loss: 0.3237 | train_acc: 0.8796 | test_loss: 0.3443 | test_acc: 0.8602
Epoch: 9 | train_loss: 0.3373 | train_acc: 0.8717 | test_loss: 0.3524 | test_acc: 0.8635
Epoch: 10 | train_loss: 0.3171 | train_acc: 0.8838 | test_loss: 0.3156 | test_acc: 0.8695
Epoch: 11 | train_loss: 0.3034 | train_acc: 0.8733 | test_loss: 0.3232 | test_acc: 0.8668
Epoch: 12 | train_l

In [22]:
venv_dir = Path(sys.prefix)
project_root = venv_dir.parent
models_path = project_root/"trained_models"

torch.save(model.state_dict(), models_path/"resnet18_pizzasushisteak.pth")