# CNNs for Scene Recognition

In [1]:
import torch
import logging

from recognizer.data import ProjectDataset
from recognizer.utils.visualise import Visualise
from recognizer.model import ResNetFineTunedClassifier
from recognizer.trainer import TorchRunner

## Setup

In [2]:
# configuring logging level
logging.basicConfig(level=logging.INFO)
# picking appropriate device to train the model
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
# dataset related config
datasets_dir = "../data"
training_dataset = f"{datasets_dir}/train"
testing_dataset = f"{datasets_dir}/test"
DEBUG_MODE = False
RESTORE_CHECKPOINT = True

In [3]:
train_loader, val_loader, test_loader = ProjectDataset.get_loaders()

In [4]:
if DEBUG_MODE:
    Visualise.display_sample_per_class(train_loader)

In [5]:
classifier = ResNetFineTunedClassifier()
TorchRunner.get_summary(classifier, (3, 244, 244))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 122, 122]           9,408
       BatchNorm2d-2         [-1, 64, 122, 122]             128
              ReLU-3         [-1, 64, 122, 122]               0
         MaxPool2d-4           [-1, 64, 61, 61]               0
            Conv2d-5           [-1, 64, 61, 61]          36,864
       BatchNorm2d-6           [-1, 64, 61, 61]             128
              ReLU-7           [-1, 64, 61, 61]               0
            Conv2d-8           [-1, 64, 61, 61]          36,864
       BatchNorm2d-9           [-1, 64, 61, 61]             128
             ReLU-10           [-1, 64, 61, 61]               0
       BasicBlock-11           [-1, 64, 61, 61]               0
           Conv2d-12           [-1, 64, 61, 61]          36,864
      BatchNorm2d-13           [-1, 64, 61, 61]             128
             ReLU-14           [-1, 64,

In [6]:
if RESTORE_CHECKPOINT:
    classifier.load("../checkpoints/models/notebook-latest.safetensors")

In [7]:
_pre_val_loss, _pre_val_acc = TorchRunner.test(
    loader=val_loader,
    model=classifier,
    loss_fn=torch.nn.functional.cross_entropy,
    device=device,
)
logging.info(
    f"\nPre-training accuracy: {round(_pre_val_acc, 2)}%\nPre-training loss: {round(_pre_val_loss, 4)}\n"
)

_pre_test_loss, _pre_test_acc = TorchRunner.test(
    loader=test_loader,
    model=classifier,
    loss_fn=torch.nn.functional.cross_entropy,
    device=device,
)
logging.info(
    f"\nPre-training accuracy: {round(_pre_test_acc, 2)}%\nPre-training loss: {round(_pre_test_loss, 4)}"
)

INFO:recognizer.trainer:Validation/Test: Average loss: 2.1513, Accuracy: 3142/8000 (39.27%)
INFO:root:
Pre-training accuracy: 39.27%
Pre-training loss: 2.1513

INFO:recognizer.trainer:Validation/Test: Average loss: 3.6894, Accuracy: 15/54 (27.78%)
INFO:root:
Pre-training accuracy: 27.78%
Pre-training loss: 3.6894


In [7]:
if DEBUG_MODE:
    # A single step of training
    TorchRunner.training_step(
        train_loader=train_loader,
        model=classifier,
        loss_fn=torch.nn.functional.cross_entropy,
        optimizer=torch.optim.Adam(
            classifier.parameters(), lr=0.001, weight_decay=0.0005
        ),
        device=device,
    )

In [9]:
# Trainer
optimiser = torch.optim.Adam(classifier.parameters(), lr=0.001, weight_decay=0.0005)
exponential_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimiser, gamma=0.99)
plateau_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimiser, mode="min", factor=0.2, patience=10, min_lr=0.000001
)
TorchRunner.train(
    train_loader=train_loader,
    val_loader=val_loader,
    model=classifier,
    loss_fn=torch.nn.functional.cross_entropy,
    optimizer=optimiser,
    scheduler=plateau_scheduler,
    epochs=1,
    device=device,
)

INFO:recognizer.trainer:Epoch: 1/1
  0%|          | 0/500 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x126c5aac0>
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/envs/scene-recognition/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/opt/homebrew/anaconda3/envs/scene-recognition/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 1443, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/opt/homebrew/anaconda3/envs/scene-recognition/lib/python3.11/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/anaconda3/envs/scene-recognition/lib/python3.11/multiprocessing/popen_fork.py", line 40, in wait
    if not wait([self.sentinel], timeout):
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/anaconda3/envs/scene

KeyboardInterrupt: 

In [10]:
if DEBUG_MODE:
    %reload_ext tensorboard
    %tensorboard --logdir ../checkpoints/runs

In [11]:
_val_loss, _val_acc = TorchRunner.test(
    loader=val_loader,
    model=classifier,
    loss_fn=torch.nn.functional.cross_entropy,
    device=device,
)
logging.info(
    f"\nPost-training accuracy: {round(_val_acc, 2)}%\nPost-training loss: {round(_val_loss, 4)}"
)

INFO:recognizer.trainer:Validation/Test: Average loss: 2.1015, Accuracy: 3176/8000 (39.70%)
INFO:root:
Post-training accuracy: 39.7%
Post-training loss: 2.1015


In [12]:
eval_results = TorchRunner.evaluate(
    loader=val_loader, model=classifier, k=5, device=device
)
eval_results

INFO:recognizer.trainer:Top-1 accuracy: 40.56%
Top-5 accuracy: 75.19%


dict_keys(['top_1_acc', 'top_n_acc', 'all_targets', 'data', 'top_k_scores', 'top_k_predictions'])

In [13]:
classifier.save("../checkpoints/models/notebook-latest.safetensors")