## Importing Packages/Functions

In [1]:
import torch
import torchvision
import utils
import os
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import engine
from joblib import dump, load
from importlib import reload
from torch.utils.tensorboard import SummaryWriter
%load_ext tensorboard


def custom_target_save(target, folder_name):
    os.mkdir(folder_name)
    
    for k, v in target.items():
        if isinstance(v, tv_tensors.BoundingBoxes):
            torch.save(v, f'{folder_name}/{k}.pt')
        elif isinstance(v, torch.Tensor):
            torch.save(v, f'{folder_name}/{k}.pt')
        else:    
            dump(v, f'{folder_name}/{k}')
            
    return True

def custom_target_load(folder_name):
    target ={}
    
    target['area'] = load(f'{folder_name}/area')
    target['iscrowd'] = torch.load(f'{folder_name}/iscrowd.pt').to(torch.int64)
    target['labels'] = torch.load(f'{folder_name}/labels.pt').to(torch.int64)
    target['boxes'] = torch.load(f'{folder_name}/boxes.pt').float()
    target['image_id'] = load(f'{folder_name}/image_id')
    
            
    return target

## Note: as discused in previous interviews, my experience with PyTorch is limited. Much of this work has been derived from  [this](https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html) tutorial. Notably, the [utils](https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py), [engine](https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py), [transforms](https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py), [coco_utils](https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py) and [coco_eval ](https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py) modules have been downloaded and used as-is, with the exception of engine, which has been lightly modified for the validation step in the training loop.

## Data Loaders

In [8]:
### Construct Data Loader

class Aug_CHR_Dataset(torch.utils.data.Dataset):
    def __init__(self, 
                 augmented_folder,
                 ):
        self.augmented_folder = augmented_folder 
        self.uus = list(os.listdir(f'{self.augmented_folder}//Images'))
    

    def __getitem__(self, idx):
        img = torch.load(f'{self.augmented_folder}//Images//{self.uus[idx]}').float()
        target= custom_target_load(f'{self.augmented_folder}//Targets//{self.uus[idx]}')
        
        return img, target

    def __len__(self):
        return len(self.uus)


batch_size = 2


aug_train_data = Aug_CHR_Dataset('augmented_balanced_data\\Training_Data')
aug_val_data = Aug_CHR_Dataset('augmented_balanced_data\\Val_Data')

aug_train_data_loader = torch.utils.data.DataLoader(
    aug_train_data,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=utils.collate_fn
)


aug_val_data_loader = torch.utils.data.DataLoader(
    aug_val_data,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=utils.collate_fn
)



## Model

In [3]:
num_classes = 4 ## three species plus "crowd" 
 
## Firweall issues have prevented me from a direct download, requiring the following work around
foundation_model_weights = torch.load('fasterrcnn_resnet50_fpn_coco-258fb6c6.pth') 
foundation_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained_backbone=False)
foundation_model.load_state_dict(foundation_model_weights)

## constrcut the desired mode and consign to gpu
def get_model(model,
              num_classes):
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 
                                                      num_classes)

    # now get the number of input features for the mask classifier
    model.cuda()
    return model

## build classifier and bounding box model

local_model = get_model(foundation_model,
                        num_classes)



In [4]:
## Define training loop
params = [p for p in local_model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params,
    lr=0.005,
    momentum=0.9,
    weight_decay=0.0005
)



# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=3,
    gamma=0.1
)


def training_loop(model, 
                  trainloader, 
                  valloader, 
                  optimizer,
                  save_name,
                  device,
                  epochs=2): 
     
    tb = SummaryWriter(f'logs/fit')
    best_loss = torch.inf
   
    ## losses/metrics to monitor in tensorboard
    
    toe_keys = ['lr', 'loss',
                'loss_classifier', 'loss_box_reg',
                'loss_objectness', 'loss_rpn_box_reg']
    
    output_dict = {'Training' : {},
                   'Validation' : {}}
    
    for tk in toe_keys:
        for v in output_dict.values():
            v[f'{tk}_Median'] = []
            v[f'{tk}_First_Quartile'] = []
            v[f'{tk}_Third_Quartile'] = []
            v[f'{tk}_Mean'] = []
        
    
    for epoch in range(epochs): 
        print(f'Epoch {epoch}/{epochs - 1}')
        print('-' * 10)
        evals ={} 
        model = model.to(device)
        print('train:')
        evals['Training']  = engine.engage_one_epoch(model, 
                                optimizer, 
                                trainloader, 
                                device, 
                                epoch, 
                                print_freq=10, 
                                train = True)
        print('val:')
        evals['Validation']  = engine.engage_one_epoch(model, 
                                optimizer, 
                                valloader, 
                                device, 
                                epoch, 
                                print_freq=10, 
                                train = False)
        for k,v in evals.items():
            for tk in toe_keys:
                
                med = v.meters[tk].median
                q1 = v.meters[tk].first_quartile
                q3 = v.meters[tk].third_quartile
                mu = v.meters[tk].avg
                
                tb.add_scalar(f'{tk}/{k}_Median',
                            med,
                            epoch)
                tb.add_scalar(f'{tk}/{k}_First_Quartile',
                            q1,
                            epoch)
                tb.add_scalar(f'{tk}/{k}_Third_Quartile',
                            q3,
                            epoch)
                tb.add_scalar(f'{tk}/{k}_Mean',
                            mu,
                            epoch)
                
                output_dict[k][f'{tk}_Median'].append(med)
                output_dict[k][f'{tk}_First_Quartile'].append(q1)
                output_dict[k][f'{tk}_Third_Quartile'].append(q3)
                output_dict[k][f'{tk}_Mean'].append(mu)
                
        bl = evals['Validation'].meters['loss_classifier'].median
        if  bl < best_loss:
            best_loss = bl
            torch.save(local_model, save_name)
        
    tb.close()
    
    return tb, output_dict

In [6]:
## Launch Tensorboard
%tensorboard --logdir='logs/fit'

Reusing TensorBoard on port 6006 (pid 15820), started 4:52:22 ago. (Use '!kill 15820' to kill it.)

In [5]:
tl, od = training_loop(local_model, 
                       aug_train_data_loader, 
                       aug_val_data_loader, 
                       optimizer,
                       save_name = 'Models/CHR_ResNet',
                       device = torch.device('cuda'),
                       epochs=2)

dump(od, 'training_output')

Epoch 0/1
----------
train:
Epoch: [0]  [  0/546]  eta: 0:26:38  lr: 0.000014  loss: 2.4146 (2.4146)  loss_classifier: 1.4738 (1.4738)  loss_box_reg: 0.0026 (0.0026)  loss_objectness: 0.8179 (0.8179)  loss_rpn_box_reg: 0.1203 (0.1203)  time: 2.9283  data: 0.0519  max mem: 4017
Epoch: [0]  [ 10/546]  eta: 0:42:17  lr: 0.000106  loss: 2.1408 (2.1331)  loss_classifier: 1.3907 (1.3635)  loss_box_reg: 0.0341 (0.0326)  loss_objectness: 0.7149 (0.6687)  loss_rpn_box_reg: 0.0671 (0.0683)  time: 4.7333  data: 0.0187  max mem: 6241
Epoch: [0]  [ 20/546]  eta: 0:39:56  lr: 0.000197  loss: 1.5982 (1.5985)  loss_classifier: 0.9789 (1.0051)  loss_box_reg: 0.0381 (0.0413)  loss_objectness: 0.4287 (0.4821)  loss_rpn_box_reg: 0.0587 (0.0700)  time: 4.6382  data: 0.0170  max mem: 6241
Epoch: [0]  [ 30/546]  eta: 0:38:54  lr: 0.000289  loss: 0.6635 (1.2385)  loss_classifier: 0.3384 (0.7539)  loss_box_reg: 0.0781 (0.0657)  loss_objectness: 0.1029 (0.3529)  loss_rpn_box_reg: 0.0553 (0.0661)  time: 4.4098  

Epoch: [0]  [330/546]  eta: 0:18:27  lr: 0.003039  loss: 0.1564 (0.3334)  loss_classifier: 0.0503 (0.1475)  loss_box_reg: 0.0617 (0.1021)  loss_objectness: 0.0033 (0.0444)  loss_rpn_box_reg: 0.0262 (0.0394)  time: 6.1423  data: 0.0195  max mem: 8558
Epoch: [0]  [340/546]  eta: 0:17:40  lr: 0.003130  loss: 0.1650 (0.3295)  loss_classifier: 0.0580 (0.1454)  loss_box_reg: 0.0645 (0.1015)  loss_objectness: 0.0062 (0.0433)  loss_rpn_box_reg: 0.0216 (0.0392)  time: 5.9210  data: 0.0200  max mem: 8558
Epoch: [0]  [350/546]  eta: 0:16:52  lr: 0.003222  loss: 0.1533 (0.3251)  loss_classifier: 0.0580 (0.1429)  loss_box_reg: 0.0645 (0.1008)  loss_objectness: 0.0029 (0.0422)  loss_rpn_box_reg: 0.0314 (0.0392)  time: 5.7786  data: 0.0214  max mem: 8558
Epoch: [0]  [360/546]  eta: 0:16:02  lr: 0.003314  loss: 0.1457 (0.3211)  loss_classifier: 0.0500 (0.1406)  loss_box_reg: 0.0607 (0.1002)  loss_objectness: 0.0019 (0.0412)  loss_rpn_box_reg: 0.0277 (0.0391)  time: 5.6432  data: 0.0190  max mem: 8558


Epoch: [0]  [100/182]  eta: 0:02:55  lr: 0.000005  loss: 0.1610 (0.1555)  loss_classifier: 0.0484 (0.0497)  loss_box_reg: 0.0707 (0.0677)  loss_objectness: 0.0033 (0.0045)  loss_rpn_box_reg: 0.0316 (0.0336)  time: 2.4181  data: 0.0221  max mem: 10656
Epoch: [0]  [110/182]  eta: 0:02:31  lr: 0.000005  loss: 0.1487 (0.1556)  loss_classifier: 0.0438 (0.0497)  loss_box_reg: 0.0693 (0.0678)  loss_objectness: 0.0022 (0.0044)  loss_rpn_box_reg: 0.0210 (0.0336)  time: 1.9611  data: 0.0233  max mem: 10656
Epoch: [0]  [120/182]  eta: 0:02:10  lr: 0.000005  loss: 0.1492 (0.1553)  loss_classifier: 0.0349 (0.0498)  loss_box_reg: 0.0465 (0.0675)  loss_objectness: 0.0015 (0.0045)  loss_rpn_box_reg: 0.0316 (0.0336)  time: 1.9991  data: 0.0224  max mem: 10656
Epoch: [0]  [130/182]  eta: 0:01:48  lr: 0.000005  loss: 0.1492 (0.1565)  loss_classifier: 0.0459 (0.0505)  loss_box_reg: 0.0534 (0.0684)  loss_objectness: 0.0019 (0.0044)  loss_rpn_box_reg: 0.0303 (0.0332)  time: 2.0053  data: 0.0217  max mem: 10

Epoch: [1]  [230/546]  eta: 0:31:11  lr: 0.000005  loss: 0.1430 (0.1593)  loss_classifier: 0.0366 (0.0524)  loss_box_reg: 0.0463 (0.0720)  loss_objectness: 0.0021 (0.0040)  loss_rpn_box_reg: 0.0189 (0.0308)  time: 5.7113  data: 0.0119  max mem: 10844
Epoch: [1]  [240/546]  eta: 0:30:15  lr: 0.000005  loss: 0.1273 (0.1581)  loss_classifier: 0.0374 (0.0522)  loss_box_reg: 0.0460 (0.0713)  loss_objectness: 0.0031 (0.0040)  loss_rpn_box_reg: 0.0222 (0.0306)  time: 6.0717  data: 0.0118  max mem: 10844
Epoch: [1]  [250/546]  eta: 0:29:13  lr: 0.000005  loss: 0.1139 (0.1575)  loss_classifier: 0.0330 (0.0519)  loss_box_reg: 0.0444 (0.0709)  loss_objectness: 0.0031 (0.0041)  loss_rpn_box_reg: 0.0189 (0.0305)  time: 5.9431  data: 0.0120  max mem: 10844
Epoch: [1]  [260/546]  eta: 0:28:15  lr: 0.000005  loss: 0.1328 (0.1569)  loss_classifier: 0.0318 (0.0516)  loss_box_reg: 0.0388 (0.0707)  loss_objectness: 0.0025 (0.0041)  loss_rpn_box_reg: 0.0183 (0.0305)  time: 5.8965  data: 0.0120  max mem: 10

Epoch: [1]  [  0/182]  eta: 0:09:36  lr: 0.000005  loss: 0.0652 (0.0652)  loss_classifier: 0.0193 (0.0193)  loss_box_reg: 0.0281 (0.0281)  loss_objectness: 0.0066 (0.0066)  loss_rpn_box_reg: 0.0111 (0.0111)  time: 3.1695  data: 0.0299  max mem: 10844
Epoch: [1]  [ 10/182]  eta: 0:09:52  lr: 0.000005  loss: 0.1269 (0.1550)  loss_classifier: 0.0334 (0.0519)  loss_box_reg: 0.0482 (0.0750)  loss_objectness: 0.0009 (0.0026)  loss_rpn_box_reg: 0.0269 (0.0255)  time: 3.4458  data: 0.0251  max mem: 10844
Epoch: [1]  [ 20/182]  eta: 0:09:18  lr: 0.000005  loss: 0.1279 (0.1476)  loss_classifier: 0.0400 (0.0483)  loss_box_reg: 0.0455 (0.0626)  loss_objectness: 0.0009 (0.0028)  loss_rpn_box_reg: 0.0277 (0.0339)  time: 3.4617  data: 0.0228  max mem: 10844
Epoch: [1]  [ 30/182]  eta: 0:08:36  lr: 0.000005  loss: 0.1197 (0.1446)  loss_classifier: 0.0389 (0.0463)  loss_box_reg: 0.0416 (0.0587)  loss_objectness: 0.0014 (0.0036)  loss_rpn_box_reg: 0.0386 (0.0359)  time: 3.3707  data: 0.0216  max mem: 11

['training_output']

In [None]:
tl, od = training_loop(local_model, 
                       aug_train_data_loader, 
                       aug_val_data_loader, 
                       optimizer,
                       save_name = 'Models/CHR_ResNet',
                       device = torch.device('cuda'),
                       epochs=20)

dump(od, 'training_output')

Epoch 0/19
----------
train:
Epoch: [0]  [  0/534]  eta: 2:37:53  lr: 0.000000  loss: 0.1872 (0.1872)  loss_classifier: 0.0540 (0.0540)  loss_box_reg: 0.0663 (0.0663)  loss_objectness: 0.0022 (0.0022)  loss_rpn_box_reg: 0.0646 (0.0646)  time: 17.7411  data: 0.0178  max mem: 11403
Epoch: [0]  [ 10/534]  eta: 2:00:30  lr: 0.000000  loss: 0.1216 (0.1832)  loss_classifier: 0.0378 (0.0626)  loss_box_reg: 0.0486 (0.0757)  loss_objectness: 0.0022 (0.0051)  loss_rpn_box_reg: 0.0248 (0.0397)  time: 13.7978  data: 0.0187  max mem: 11403
Epoch: [0]  [ 20/534]  eta: 1:57:23  lr: 0.000000  loss: 0.1557 (0.2988)  loss_classifier: 0.0419 (0.0674)  loss_box_reg: 0.0486 (0.0747)  loss_objectness: 0.0037 (0.1065)  loss_rpn_box_reg: 0.0169 (0.0503)  time: 13.5024  data: 0.0192  max mem: 11403
Epoch: [0]  [ 30/534]  eta: 1:49:58  lr: 0.000000  loss: 0.1570 (0.2563)  loss_classifier: 0.0463 (0.0626)  loss_box_reg: 0.0518 (0.0762)  loss_objectness: 0.0030 (0.0738)  loss_rpn_box_reg: 0.0142 (0.0438)  time: 1