In [32]:
import torch
import torchvision
from torchvision import datasets, transforms as T
import torchvision.models as models

In [33]:
model = models.densenet121(pretrained=True)

In [34]:
!pip install torchsummary



In [36]:
transforms = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [38]:
train_data = torchvision.datasets.CIFAR100(
    root="./data",
    train=True,
    transform=transforms,
    download=True
)
test_data = torchvision.datasets.CIFAR100(
    root="./data",
    train=False,
    transform=transforms,
    download=True
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:08<00:00, 20656348.60it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [39]:
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=128, shuffle=True)

In [45]:
from torchinfo import summary

def show_summary():
    summary(model,
            input_size=(64, 3, 224, 224),
            col_names=["input_size", "output_size", "num_params", "trainable"],
            col_width=20,
            row_settings=["var_names"]
           )

In [47]:
model.classifier = torch.nn.Linear(1024, 100)

In [50]:
train = False
for name, param in model.named_parameters():
    if "denseblock4" in name or train:
        param.requires_grad = True
        train = True
    else:
        param.requires_grad = False

In [52]:
summary(model,
        input_size=(64, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
       )

Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
DenseNet (DenseNet)                           [64, 3, 224, 224]    [64, 100]            --                   Partial
├─Sequential (features)                       [64, 3, 224, 224]    [64, 1024, 7, 7]     --                   Partial
│    └─Conv2d (conv0)                         [64, 3, 224, 224]    [64, 64, 112, 112]   (9,408)              False
│    └─BatchNorm2d (norm0)                    [64, 64, 112, 112]   [64, 64, 112, 112]   (128)                False
│    └─ReLU (relu0)                           [64, 64, 112, 112]   [64, 64, 112, 112]   --                   --
│    └─MaxPool2d (pool0)                      [64, 64, 112, 112]   [64, 64, 56, 56]     --                   --
│    └─_DenseBlock (denseblock1)              [64, 64, 56, 56]     [64, 256, 56, 56]    --                   False
│    │    └─_DenseLayer (denselayer1)         [64, 64, 56, 56]     [64, 32, 56

In [57]:
import torch

from tqdm.auto import tqdm
from typing import Dict, List, Tuple

def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device) -> Tuple[float, float]:
  """
  Trains a PyTorch model for a single epoch.

  Turns a target PyTorch model to training mode and then
  runs through all of the required training steps (forward
  pass, loss calculations, optimizer step).

  Args:
    model: A PyTorch model to be trained.
    dataloader: A DataLoader instance for the model to be trained on.
    loss_fn: A PyTorch loss function to minimize.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    device: A target device to compute on (e.g. "cuda" or "cpu")

  Returns:
    A tuple of training loss and training accuracy metrics.
    In the form (train_loss, train_accuracy). For example:
    (0.1112, 0.8765)
  """
  # Put the model in train mode
  model.train()

  # Setup train loss and train accuracy values
  train_loss, train_acc = 0, 0

  # Loop through data loader data batches
  for batch, (X, y) in enumerate(dataloader):
    # Send data to target device
    X, y = X.to(device), y.to(device)

    # Forward pass
    y_pred = model(X)

    # Calculate and accumulate loss
    loss = loss_fn(y_pred, y)
    train_loss += loss.item()

    # Optimizer zero grad
    optimizer.zero_grad()

    # Loss backward
    loss.backward()

    # Optimzer Step
    optimizer.step()

    # Calculate and accumulate accuracy metric across all batches
    y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
    train_acc += (y_pred_class == y).sum().item()/len(y_pred)

  # Adjust metrics to get average loss and accuracy per batch
  train_loss /= len(dataloader)
  train_acc /= len(dataloader)

  return train_loss, train_acc


def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: torch.device) -> Tuple[float, float]:
  """
  Tests a PyTorch model for a single epoch.

  Turns a target PyTorch model to "eval" mode and then performs
  a forward pass on a testing dataset.

  Args:
    model: A PyTorch model to be tested.
    dataloader: A DataLoader instance for the model to be tested on.
    los_fn: A PyTorch loss function to calculate loss on the test data.
    device: A target device to compute on (e.g. "cuda" or "cpu").

  Returns:
    A tuple of testing loss and testing accuracy metrics.
    In the form (test_loss, test_accuracy). For example:
    (0.0223, 0.8976)
  """
  # Put model in eval mode
  model.eval()

  # Setup test loss and test accuracy values
  test_loss, test_acc = 0, 0

  # Turn on inference context manager
  with torch.inference_mode():
    # Loop through DataLoader batches
    for batch, (X, y) in enumerate(dataloader):
      # Send data to target device
      X, y = X.to(device), y.to(device)

      # Forward pass
      test_pred_logits = model(X)

      # Calculate and accumulate loss
      loss = loss_fn(test_pred_logits, y)
      test_loss += loss

      # Calculate and accumulate accuracy
      test_pred_labels = test_pred_logits.argmax(dim=1)
      test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

    # Adjust metrics to get average loss and accuracy per batch
    test_loss /= len(dataloader)
    test_acc /= len(dataloader)
  return test_loss, test_acc


def train(
    model: torch.nn.Module,
    train_dataloader: torch.utils.data.DataLoader,
    test_dataloader: torch.utils.data.DataLoader,
    optimizer: torch.optim.Optimizer,
    loss_fn: torch.nn.Module,
    epochs: int,
    device: torch.device
) -> Dict[str, List]:
  """
  Trains and tests a PyTorch model.

  Passes a target PyTorch models through train_step() and test_step()
  functions for a number of epochs, training and testing the model in the
  same epoch loop.

  Calculates, prints and stores evaluation metrics throughout.

  Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader intance for the model to be tested on.
    optimizer: A PyTorch optimzier to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to comput on (e.g. "cuda" or "cpu").

  Returns:
    A dictionary of training and testing loss as well as training and testing
    accuracy metrics. Each metric has a value in a list for each epoch.
    In the form: {train_loss: [...],
                  train_acc: [...],
                  test_loss: [...],
                  test_acc: [...]}
    For example if training for epochs=2:
                  {train_loss: [2.0492, 1.2849],
                  train_acc: [0.8029, 0.8723],
                  test_loss: [2.3482, 1.9870],
                  test_acc: [0.7425, 0.8029]}
  """
  # Create empty dictionary
  results = {"train_loss": [],
             "train_acc": [],
             "test_loss": [],
             "test_acc": []}

  # Loop through training and testing steps for a number of epochs
  for epoch in tqdm(range(epochs)):
    train_loss, train_acc = train_step(model=model,
                                       dataloader=train_dataloader,
                                       loss_fn=loss_fn,
                                       optimizer=optimizer,
                                       device=device)
    test_loss, test_acc = test_step(model=model,
                                    dataloader=test_dataloader,
                                    loss_fn=loss_fn,
                                    device=device)

    print(
        f"Epoch: {epoch + 1} | "
        f"train_loss: {train_loss:.4f} |"
        f"train_acc: {train_acc:.4f} | "
        f"test_loss: {test_loss:.4f} | "
        f"test_acc: {test_acc:.4f}"
      )

    # Update results dictionary
    results["train_loss"].append(train_loss)
    results["train_acc"].append(train_acc)
    results["test_loss"].append(test_loss)
    results["test_acc"].append(test_acc)


  return results

In [54]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)

In [55]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [58]:
train(
    model=model,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    loss_fn=loss_fn,
    optimizer=optimizer,
    epochs=10,
    device=device
)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.6022 |train_acc: 0.5715 | test_loss: 1.2449 | test_acc: 0.6390
Epoch: 2 | train_loss: 1.0298 |train_acc: 0.7042 | test_loss: 1.1634 | test_acc: 0.6669
Epoch: 3 | train_loss: 0.8780 |train_acc: 0.7439 | test_loss: 1.1105 | test_acc: 0.6806
Epoch: 4 | train_loss: 0.7845 |train_acc: 0.7724 | test_loss: 1.0986 | test_acc: 0.6843
Epoch: 5 | train_loss: 0.7040 |train_acc: 0.7976 | test_loss: 1.1158 | test_acc: 0.6772
Epoch: 6 | train_loss: 0.6580 |train_acc: 0.8102 | test_loss: 1.1513 | test_acc: 0.6741
Epoch: 7 | train_loss: 0.5981 |train_acc: 0.8274 | test_loss: 1.1347 | test_acc: 0.6796
Epoch: 8 | train_loss: 0.5629 |train_acc: 0.8372 | test_loss: 1.1439 | test_acc: 0.6845
Epoch: 9 | train_loss: 0.5329 |train_acc: 0.8443 | test_loss: 1.1504 | test_acc: 0.6768
Epoch: 10 | train_loss: 0.4930 |train_acc: 0.8582 | test_loss: 1.1500 | test_acc: 0.6830


{'train_loss': [1.6021720137437592,
  1.0298484978468523,
  0.8780472033164081,
  0.7845490561116992,
  0.7040203647387911,
  0.6580161517843262,
  0.598137865667148,
  0.5629408092754881,
  0.5329171627988596,
  0.49302905889423304],
 'train_acc': [0.5715033567774935,
  0.7041839833759591,
  0.7438539002557545,
  0.7723505434782608,
  0.7976102941176472,
  0.8102461636828644,
  0.8274376598465473,
  0.8371683184143223,
  0.8442655051150896,
  0.8581881393861892],
 'test_loss': [tensor(1.2449, device='cuda:0'),
  tensor(1.1634, device='cuda:0'),
  tensor(1.1105, device='cuda:0'),
  tensor(1.0986, device='cuda:0'),
  tensor(1.1158, device='cuda:0'),
  tensor(1.1513, device='cuda:0'),
  tensor(1.1347, device='cuda:0'),
  tensor(1.1439, device='cuda:0'),
  tensor(1.1504, device='cuda:0'),
  tensor(1.1500, device='cuda:0')],
 'test_acc': [0.6390427215189873,
  0.6669303797468354,
  0.6805775316455697,
  0.6843354430379747,
  0.6772151898734177,
  0.6740506329113924,
  0.6795886075949367,
 