In [1]:
import logging

import torch
import torch.nn.functional as F

from lightning.pytorch import Trainer
from lightning.pytorch.tuner import Tuner

from model import FasterRCNN
from data import VOCDataModule
from helper import show_image, show_image_and_bounding_box, show_worst_image_predictions, show_confusion_matrix, get_batch, MyProgressBar
from helper import get_sample, convert_predictions

log = logging.getLogger(__name__)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
voc_data_module = VOCDataModule(data_dir='data/VOC', batch_size=16)

In [3]:
# Example usage
data_module = VOCDataModule(data_dir='data/VOC', batch_size=16, num_workers=4)
model = FasterRCNN(num_classes=21)



In [None]:
trainer = Trainer(max_epochs=5, callbacks=[MyProgressBar()])
trainer.fit(model, data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Using downloaded and verified file: data/VOC/VOCtrainval_11-May-2012.tar
Extracting data/VOC/VOCtrainval_11-May-2012.tar to data/VOC


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type                 | Params | Mode 
------------------------------------------------------------
0 | map_metric | MeanAveragePrecision | 0      | train
1 | model      | FasterRCNN           | 41.4 M | train
------------------------------------------------------------
41.2 M    Trainable params
222 K     Non-trainable params
41.4 M    Total params
165.586   Total estimated model params size (MB)


Epoch 0: 100%|██████████████████████████████| 358/358 [03:35<00:00,  1.66it/s, v_num=74, train_loss=0.747]
Epoch 1:  93%|▉| 334/358 [03:23<00:14,  1.64it/s, v_num=74, train_loss=0.383, val_map=0.0921, val_map_50=0[A

In [None]:
image, bbox = get_sample(data_module, val=True)

In [None]:
show_image_and_bounding_box(image, bbox['annotation']['object'])

In [None]:
pred = trainer.model([image])

In [None]:
idx_to_class = data_module.get_idx_to_class()
converted_predictions = convert_predictions(pred, idx_to_class, threshold=0.3)
show_image_and_bounding_box(image, converted_predictions)