# Model Training

In [22]:
import os
import torch
from torchvision.transforms import v2

from detectors import *

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
root_dir = os.getcwd()
dataset_name = "7s"
num_classes = 10
data_root_dir = root_dir + f"\\..\\DETR\\data\\{dataset_name}"
train_data_dir = f"{data_root_dir}\\train2017"
train_annotation_file = f"{data_root_dir}\\annotations\\instances_train2017.json"
val_data_dir = f"{data_root_dir}\\val2017"
val_annotation_file = f"{data_root_dir}\\annotations\\instances_val2017.json"
device = "cuda" if torch.cuda.is_available() else "cpu"

train_transforms = v2.Compose([
    v2.ToTensor(),
    v2.ToDtype(torch.float32),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

val_transforms = v2.Compose([
    v2.ToTensor(),
])



In [3]:
detector = FasterRCNNV2Detector(
    num_classes=num_classes,
    device=device,
    root_dir=root_dir,
)

In [4]:
detector.train(
    n_epochs = 50,
    lr = 1e-3,
    batch_size = 16,
    start_epoch = 0,
    resume = None,
    save_every = 10,
    lr_step_every = 10,
    num_classes = num_classes,
    device=device,
    log_dir=os.path.join(root_dir, "logs", dataset_name, detector.name),
    train_data_dir = train_data_dir,
    train_annotation_file = train_annotation_file,
    train_transforms = train_transforms,
    val_data_dir = val_data_dir,
    val_annotation_file = val_annotation_file,
    val_transforms = val_transforms,
    val_batch_size=2,
    n_batches_validation=2,
    test_data_dir = None,
    test_annotation_file = None,
    test_transforms = None,    
)

loading annotations into memory...
Done (t=0.10s)
creating index...
index created!
loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
Logging training at C:\Users\tilof\PycharmProjects\DeepLearningProjects\MasterThesis\ObjectDetection\logs\7s\fasterrcnn_resnet50_fpn_v2
Start training ...
Train ...
Validating ...
Epoch 1/50: Epoch 1/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.optim.lr_scheduler.StepLR'>, epoch_start=2024-01-06T20:13:13.286627+01:00, batch_size=16, val_batch_size=2, n_batches_validation=2, loss=183.4164936542511, loss_classifier=84.17560887336731, loss_box_reg=74.8349797129631, loss_objectness=15.53691529482603, loss_rpn_box_reg=8.868989635258913, epoch_end=2024-01-06T20:15:34.211990+01:00, train_map50=0.256330668926239, train_mAP50_95=0.10410401225090027, val_map50=0.030000001192092896, val_mAP50_95=0.017875000834465027, epoch=1
Train ...
Validating ...
Epoch 2/50: Epoch 2/50: le

In [5]:
detector = RetinaNetResNet50FPNDetector(
    num_classes=num_classes,
    device=device,
    root_dir=root_dir,
)

In [6]:
detector.train(
    n_epochs = 50,
    lr = 1e-3,
    batch_size = 16,
    start_epoch = 0,
    resume = None,
    save_every = 10,
    lr_step_every = 10,
    num_classes = num_classes,
    device=device,
    log_dir=os.path.join(root_dir, "logs", dataset_name, detector.name),
    train_data_dir = train_data_dir,
    train_annotation_file = train_annotation_file,
    train_transforms = train_transforms,
    val_data_dir = val_data_dir,
    val_annotation_file = val_annotation_file,
    val_transforms = val_transforms,
    val_batch_size=2,
    n_batches_validation=2,
    test_data_dir = None,
    test_annotation_file = None,
    test_transforms = None,    
)

loading annotations into memory...
Done (t=0.17s)
creating index...
index created!
loading annotations into memory...
Done (t=0.04s)
creating index...
index created!
Logging training at C:\Users\tilof\PycharmProjects\DeepLearningProjects\MasterThesis\ObjectDetection\logs\7s\retinanet_resnet50_fpn
Start training ...
Train ...
Validating ...
Epoch 1/50: Epoch 1/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.optim.lr_scheduler.StepLR'>, epoch_start=2024-01-06T22:03:23.610159+01:00, batch_size=16, val_batch_size=2, n_batches_validation=2, loss=165.92778837680817, loss_classification=105.10929262638092, loss_bbox_regression=60.81849581003189, epoch_end=2024-01-06T22:04:38.738319+01:00, train_map50=0.0, train_mAP50_95=0.0, val_map50=0.0, val_mAP50_95=0.0, epoch=1
Train ...
Validating ...
Epoch 2/50: Epoch 2/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.optim.lr_scheduler.StepLR'

In [7]:
detector = FCOSResNet50FPNDetector(
    num_classes=num_classes,
    device=device,
    root_dir=root_dir,
)



In [8]:
detector.train(
    n_epochs = 50,
    lr = 1e-3,
    batch_size = 16,
    start_epoch = 0,
    resume = None,
    save_every = 10,
    lr_step_every = 10,
    num_classes = num_classes,
    device=device,
    log_dir=os.path.join(root_dir, "logs", dataset_name, detector.name),
    train_data_dir = train_data_dir,
    train_annotation_file = train_annotation_file,
    train_transforms = train_transforms,
    val_data_dir = val_data_dir,
    val_annotation_file = val_annotation_file,
    val_transforms = val_transforms,
    val_batch_size=2,
    n_batches_validation=2,
    test_data_dir = None,
    test_annotation_file = None,
    test_transforms = None,    
)

loading annotations into memory...
Done (t=0.34s)
creating index...
index created!
loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
Logging training at C:\Users\tilof\PycharmProjects\DeepLearningProjects\MasterThesis\ObjectDetection\logs\7s\fcos_resnet50_fpn
Start training ...
Train ...
Validating ...
Epoch 1/50: Epoch 1/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.optim.lr_scheduler.StepLR'>, epoch_start=2024-01-06T23:29:15.470699+01:00, batch_size=16, val_batch_size=2, n_batches_validation=2, loss=166.48619902133942, loss_classification=58.72590509057045, loss_bbox_regression=47.347290605306625, loss_bbox_ctrness=60.413003742694855, epoch_end=2024-01-06T23:30:20.140012+01:00, train_map50=0.1403818428516388, train_mAP50_95=0.07240423560142517, val_map50=0.08135562390089035, val_mAP50_95=0.03473641723394394, epoch=1
Train ...
Validating ...
Epoch 2/50: Epoch 2/50: learning_rate=0.001, lr_step_e

In [9]:
detector = SSD300VGG16Detector(
    num_classes=num_classes,
    device=device,
    root_dir=root_dir,
)

Downloading: "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth" to C:\Users\tilof/.cache\torch\hub\checkpoints\ssd300_vgg16_coco-b556d3b4.pth
100%|████████████████████████████████████████████████████████████████████████████████████████████| 136M/136M [00:21<00:00, 6.71MB/s]


In [10]:
detector.train(
    n_epochs = 50,
    lr = 1e-3,
    batch_size = 16,
    start_epoch = 0,
    resume = None,
    save_every = 10,
    lr_step_every = 10,
    num_classes = num_classes,
    device=device,
    log_dir=os.path.join(root_dir, "logs", dataset_name, detector.name),
    train_data_dir = train_data_dir,
    train_annotation_file = train_annotation_file,
    train_transforms = train_transforms,
    val_data_dir = val_data_dir,
    val_annotation_file = val_annotation_file,
    val_transforms = val_transforms,
    val_batch_size=2,
    n_batches_validation=2,
    test_data_dir = None,
    test_annotation_file = None,
    test_transforms = None,    
)

loading annotations into memory...
Done (t=0.25s)
creating index...
index created!
loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
Logging training at C:\Users\tilof\PycharmProjects\DeepLearningProjects\MasterThesis\ObjectDetection\logs\7s\ssd300_vgg16
Start training ...
Train ...
Validating ...
Epoch 1/50: Epoch 1/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.optim.lr_scheduler.StepLR'>, epoch_start=2024-01-07T00:33:57.269612+01:00, batch_size=16, val_batch_size=2, n_batches_validation=2, loss=873.4659233093262, loss_classification=621.0435481071472, loss_bbox_regression=252.42237520217896, epoch_end=2024-01-07T00:34:41.416085+01:00, train_map50=0.003171532182022929, train_mAP50_95=0.0007811591494828463, val_map50=0.0, val_mAP50_95=0.0, epoch=1
Train ...
Validating ...
Epoch 2/50: Epoch 2/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.op

In [11]:
detector = SSDLite320MobileNetV3LargeDetector(
    num_classes=num_classes,
    device=device,
    root_dir=root_dir,
)

In [12]:
detector.train(
    n_epochs = 50,
    lr = 1e-3,
    batch_size = 16,
    start_epoch = 0,
    resume = None,
    save_every = 10,
    lr_step_every = 10,
    num_classes = num_classes,
    device=device,
    log_dir=os.path.join(root_dir, "logs", dataset_name, detector.name),
    train_data_dir = train_data_dir,
    train_annotation_file = train_annotation_file,
    train_transforms = train_transforms,
    val_data_dir = val_data_dir,
    val_annotation_file = val_annotation_file,
    val_transforms = val_transforms,
    val_batch_size=2,
    n_batches_validation=2,
    test_data_dir = None,
    test_annotation_file = None,
    test_transforms = None,    
)

loading annotations into memory...
Done (t=0.24s)
creating index...
index created!
loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
Logging training at C:\Users\tilof\PycharmProjects\DeepLearningProjects\MasterThesis\ObjectDetection\logs\7s\ssdlite320_mobilenet_v3_large
Start training ...
Train ...
Validating ...
Epoch 1/50: Epoch 1/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.optim.lr_scheduler.StepLR'>, epoch_start=2024-01-07T01:36:45.919374+01:00, batch_size=16, val_batch_size=2, n_batches_validation=2, loss=1126.3251104354858, loss_classification=798.408106803894, loss_bbox_regression=327.9169988632202, epoch_end=2024-01-07T01:37:31.927747+01:00, train_map50=0.01275361143052578, train_mAP50_95=0.0035174472723156214, val_map50=0.0068451398983597755, val_mAP50_95=0.0029061592649668455, epoch=1
Train ...
Validating ...
Epoch 2/50: Epoch 2/50: learning_rate=0.001, lr_step_every=10, optim=<class

In [15]:
detector = FasterRCNNDetector(
    num_classes=num_classes,
    device=device,
    root_dir=root_dir,
)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to C:\Users\tilof/.cache\torch\hub\checkpoints\fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|████████████████████████████████████████████████████████████████████████████████████████████| 160M/160M [00:25<00:00, 6.68MB/s]


In [16]:
detector.train(
    n_epochs = 50,
    lr = 1e-3,
    batch_size = 16,
    start_epoch = 0,
    resume = None,
    save_every = 10,
    lr_step_every = 10,
    num_classes = num_classes,
    device=device,
    log_dir=os.path.join(root_dir, "logs", dataset_name, detector.name),
    train_data_dir = train_data_dir,
    train_annotation_file = train_annotation_file,
    train_transforms = train_transforms,
    val_data_dir = val_data_dir,
    val_annotation_file = val_annotation_file,
    val_transforms = val_transforms,
    val_batch_size=2,
    n_batches_validation=2,
    test_data_dir = None,
    test_annotation_file = None,
    test_transforms = None,    
)

loading annotations into memory...
Done (t=0.15s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Logging training at C:\Users\tilof\PycharmProjects\DeepLearningProjects\MasterThesis\ObjectDetection\logs\7s\fasterrcnn_resnet50_fpn
Start training ...
Train ...
Validating ...
Epoch 1/50: Epoch 1/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.optim.lr_scheduler.StepLR'>, epoch_start=2024-01-07T08:14:18.711390+01:00, batch_size=16, val_batch_size=2, n_batches_validation=2, loss=170.3774893283844, loss_classifier=77.29778480529785, loss_box_reg=63.677278101444244, loss_objectness=15.480922929942608, loss_rpn_box_reg=13.921503726392984, epoch_end=2024-01-07T08:15:43.521248+01:00, train_map50=0.1489706039428711, train_mAP50_95=0.07884575426578522, val_map50=0.16516491770744324, val_mAP50_95=0.08549725264310837, epoch=1
Train ...
Validating ...
Epoch 2/50: Epoch 2/50: le

In [18]:
detector = FasterRCNNMobileNetV3LargeDetector(
    num_classes=num_classes,
    device=device,
    root_dir=root_dir,
)

Downloading: "https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth" to C:\Users\tilof/.cache\torch\hub\checkpoints\fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth
100%|██████████████████████████████████████████████████████████████████████████████████████████| 74.2M/74.2M [00:11<00:00, 6.72MB/s]


In [19]:
detector.train(
    n_epochs = 50,
    lr = 1e-3,
    batch_size = 16,
    start_epoch = 0,
    resume = None,
    save_every = 10,
    lr_step_every = 10,
    num_classes = num_classes,
    device=device,
    log_dir=os.path.join(root_dir, "logs", dataset_name, detector.name),
    train_data_dir = train_data_dir,
    train_annotation_file = train_annotation_file,
    train_transforms = train_transforms,
    val_data_dir = val_data_dir,
    val_annotation_file = val_annotation_file,
    val_transforms = val_transforms,
    val_batch_size=2,
    n_batches_validation=2,
    test_data_dir = None,
    test_annotation_file = None,
    test_transforms = None,    
)

loading annotations into memory...
Done (t=0.17s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
Logging training at C:\Users\tilof\PycharmProjects\DeepLearningProjects\MasterThesis\ObjectDetection\logs\7s\fasterrcnn_mobilenet_v3_large_fpn
Start training ...
Train ...
Validating ...
Epoch 1/50: Epoch 1/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.optim.lr_scheduler.StepLR'>, epoch_start=2024-01-07T09:46:57.760146+01:00, batch_size=16, val_batch_size=2, n_batches_validation=2, loss=190.15894258022308, loss_classifier=88.57986736297607, loss_box_reg=69.18051242828369, loss_objectness=24.12834208458662, loss_rpn_box_reg=8.270222466439009, epoch_end=2024-01-07T09:48:05.651654+01:00, train_map50=0.10443538427352905, train_mAP50_95=0.04304007440805435, val_map50=0.09753390401601791, val_mAP50_95=0.04634634405374527, epoch=1
Train ...
Validating ...
Epoch 2/50: Epoch

In [20]:
detector = FasterRCNNMobileNetV3Large320Detector(
    num_classes=num_classes,
    device=device,
    root_dir=root_dir,
)

Downloading: "https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_320_fpn-907ea3f9.pth" to C:\Users\tilof/.cache\torch\hub\checkpoints\fasterrcnn_mobilenet_v3_large_320_fpn-907ea3f9.pth
100%|██████████████████████████████████████████████████████████████████████████████████████████| 74.2M/74.2M [00:11<00:00, 6.69MB/s]


In [21]:
detector.train(
    n_epochs = 50,
    lr = 1e-3,
    batch_size = 16,
    start_epoch = 0,
    resume = None,
    save_every = 10,
    lr_step_every = 10,
    num_classes = num_classes,
    device=device,
    log_dir=os.path.join(root_dir, "logs", dataset_name, detector.name),
    train_data_dir = train_data_dir,
    train_annotation_file = train_annotation_file,
    train_transforms = train_transforms,
    val_data_dir = val_data_dir,
    val_annotation_file = val_annotation_file,
    val_transforms = val_transforms,
    val_batch_size=2,
    n_batches_validation=2,
    test_data_dir = None,
    test_annotation_file = None,
    test_transforms = None,    
)

loading annotations into memory...
Done (t=0.50s)
creating index...
index created!
loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
Logging training at C:\Users\tilof\PycharmProjects\DeepLearningProjects\MasterThesis\ObjectDetection\logs\7s\fasterrcnn_mobilenet_v3_large_320_fpn
Start training ...
Train ...
Validating ...
Epoch 1/50: Epoch 1/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.optim.lr_scheduler.StepLR'>, epoch_start=2024-01-07T10:47:34.839848+01:00, batch_size=16, val_batch_size=2, n_batches_validation=2, loss=128.98542857170105, loss_classifier=61.174988597631454, loss_box_reg=30.67648746073246, loss_objectness=25.833288565278053, loss_rpn_box_reg=11.300665013492107, epoch_end=2024-01-07T10:48:32.183398+01:00, train_map50=0.029907409101724625, train_mAP50_95=0.014833332970738411, val_map50=0.09718253463506699, val_mAP50_95=0.036757443100214005, epoch=1
Train ...
Validating ...
Epoch 2

In [23]:
detector = RetinaNetResNet50FPNV2Detector(
    num_classes=num_classes,
    device=device,
    root_dir=root_dir,
)

Downloading: "https://download.pytorch.org/models/retinanet_resnet50_fpn_v2_coco-5905b1c5.pth" to C:\Users\tilof/.cache\torch\hub\checkpoints\retinanet_resnet50_fpn_v2_coco-5905b1c5.pth
100%|████████████████████████████████████████████████████████████████████████████████████████████| 146M/146M [00:22<00:00, 6.71MB/s]


In [24]:
detector.train(
    n_epochs = 50,
    lr = 1e-3,
    batch_size = 16,
    start_epoch = 0,
    resume = None,
    save_every = 10,
    lr_step_every = 10,
    num_classes = num_classes,
    device=device,
    log_dir=os.path.join(root_dir, "logs", dataset_name, detector.name),
    train_data_dir = train_data_dir,
    train_annotation_file = train_annotation_file,
    train_transforms = train_transforms,
    val_data_dir = val_data_dir,
    val_annotation_file = val_annotation_file,
    val_transforms = val_transforms,
    val_batch_size=2,
    n_batches_validation=2,
    test_data_dir = None,
    test_annotation_file = None,
    test_transforms = None,    
)

loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Logging training at C:\Users\tilof\PycharmProjects\DeepLearningProjects\MasterThesis\ObjectDetection\logs\7s\retinanet_resnet50_fpn_v2
Start training ...
Train ...
Validating ...
Epoch 1/50: Epoch 1/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch.optim.sgd.SGD'>, scheduler=<class 'torch.optim.lr_scheduler.StepLR'>, epoch_start=2024-01-07T11:44:15.411167+01:00, batch_size=16, val_batch_size=2, n_batches_validation=2, loss=153.66863131523132, loss_classification=101.26726424694061, loss_bbox_regression=52.4013674557209, epoch_end=2024-01-07T11:45:31.275640+01:00, train_map50=0.01163439080119133, train_mAP50_95=0.003830164670944214, val_map50=0.02154846116900444, val_mAP50_95=0.005589494947344065, epoch=1
Train ...
Validating ...
Epoch 2/50: Epoch 2/50: learning_rate=0.001, lr_step_every=10, optim=<class 'torch