# Imports

In [1]:
import os
import json
import time
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torch
import torchvision
from torchvision import transforms
from torchvision.transforms import ToTensor
from torchvision.io import read_image
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchmetrics.detection.map import MeanAveragePrecision
from PIL import Image
import pycocotools

In [2]:
# Imports local modules downloaded from TorchVision repo v0.8.2, references/detection
# https://github.com/pytorch/vision/tree/v0.8.2/references/detection
import utils
import transforms
import coco_eval
from engine import train_one_epoch, evaluate

In [3]:
# Imports from local lib files
from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
from engine import _get_iou_types 

# Functions and initiations

## File paths

In [4]:
# Set the paths to the annotation files that will retrieve the images with the split based on the annotations
output_path = 'output'
img_folder = 'eccv_18_all_images_sm'
cis_test_ann_path = 'eccv_18_annotation_files/cis_test_annotations.json'
cis_val_ann_path = 'eccv_18_annotation_files/cis_val_annotations.json'
train_ann_path = 'eccv_18_annotation_files/train_annotations.json'
trans_test_ann_path = 'eccv_18_annotation_files/trans_test_annotations.json'
trans_val_ann_path = 'eccv_18_annotation_files/trans_val_annotations.json'

# Load the json files of the annotations for better exploring of each images
cis_test_ann = json.load(open(cis_test_ann_path))
cis_val_ann = json.load(open(cis_val_ann_path))
train_ann = json.load(open(train_ann_path))
trans_test_ann = json.load(open(trans_test_ann_path))
trans_val_ann = json.load(open(trans_val_ann_path))

## Utils

In [5]:
# Sets the device for pytorch
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

### Transformation

In [6]:
# Make and horizontal flip data transformation with 50% chance to use as data augmentation in a data loader
# In paper :  ' ... and employ horizontal flipping for data augmentation. ( for detection)

import transforms as T   # from git hub repo
import torchvision.transforms as TorchTrans
# In paper :  ' ... and employ horizontal flipping for data augmentation. ( for detection)

colorTranformations = torch.nn.Sequential(
                      TorchTrans.RandomInvert(1), # or 0.6
                      TorchTrans.ColorJitter([.2,.3], [0.7,0.9], [.1,0.12])#jitter2 = T.ColorJitter([.2,.3], [0.7,0.9],  hue=.1)
)

### Images and dataset

In [7]:
# Method that returns a list with the idx of images with at least one bounding box (img_wbbox) and a 
# list with the number of bbox for each valid image (num_bbox)
def get_img_with_bbox(file_path):
    
    file = json.load(open(file_path))
    img_wbbox = []
    num_bbox = []

    for i in range(len(file['images'])):
        bboxes = [file['annotations'][j]['bbox'] 
                  for j in range(len(file['annotations'])) 
                  if file['annotations'][j]['image_id']==file['images'][i]['id'] 
                  and 'bbox' in file['annotations'][j].keys()]

        if len(bboxes)!=0:
            img_wbbox.append(i)

            num_bbox.append(len(bboxes))

    return img_wbbox, num_bbox

In [8]:
# Class used to create a custom dataset
class CustomImageDataset(Dataset):
    """Gets and prints the spreadsheet's header columns

    Args:
        file_loc (str): The file location of the spreadsheet
        print_cols (bool): A flag used to print the columns to the console
            (default is False)

    Returns:
        list: a list of strings representing the header columns
    """
    def __init__(self, label_path, img_dir, valid_img, transform = None, rotation = False):
        self.label_file = json.load(open(label_path))
        self.img_dir = img_dir
        self.transform = transform
        self.valid_img = valid_img
        self.rotation = rotation  
        self.rotate = T.RandomHorizontalFlip(0.5)
    
    def __len__(self):
        return len(self.valid_img)

    def __getitem__(self, idx):
        
        idx = self.valid_img[idx] # consider only images with bbox annotations
        img_path = os.path.join(self.img_dir, self.label_file['images'][idx]['file_name'])
        image = read_image(img_path)

        conv = torchvision.transforms.ToTensor()
        # if image.shape[0]==1:
        # some images have only one channel, we convert them to rgb
        image = Image.open(img_path).convert("RGB")
        image = conv(image)

        boxes = [self.label_file['annotations'][j]['bbox'] 
                 for j in range(len(self.label_file['annotations'])) 
                 if self.label_file['annotations'][j]['image_id']==self.label_file['images'][idx]['id']]
        
        label = [self.label_file['annotations'][j]['category_id'] 
                 for j in range(len(self.label_file['annotations'])) 
                 if self.label_file['annotations'][j]['image_id']==self.label_file['images'][idx]['id']]

        # transform bbox coords to adjust for resizing
        scale_x = image.shape[2] / self.label_file['images'][idx]['width'] 
        scale_y = image.shape[1] / self.label_file['images'][idx]['height']

        boxes = torch.as_tensor(boxes)
        for i in range(boxes.shape[0]):
            boxes[i][0] = torch.round(boxes[i][0] * scale_x)
            boxes[i][1] = torch.round(boxes[i][1] * scale_y)
            boxes[i][2] = torch.round(boxes[i][2] * scale_x)
            boxes[i][3] = torch.round(boxes[i][3] * scale_y)

            boxes[i][2] = boxes[i][0] + boxes[i][2] # to transform to pytorch bbox format
            boxes[i][3] = boxes[i][1] + boxes[i][3]

        label = torch.as_tensor(label)
        label = torch.where(label==30,0,1)  # 0 if empty (categ id = 30), 1 if animal
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = label
        target["image_id"] = image_id
        target['area']=area
        target['iscrowd']=iscrowd
        
        
        if self.rotation:
            image, target= self.rotate(image, target)
        if self.transform:
            image = self.transform(image)
        return image, target

## Create the model

### Pre-trained models
Inspred from https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/torchvision_finetuning_instance_segmentation.ipynb#scrollTo=YjNHjVMOyYlH

### Model with only the last layer to train (CNN layers)

In [9]:
# Get a pretrained model and set to train the last layer (predictors (base) : model 1)
def get_model_from_pretrained_base(num_classes):

    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    for param in model.parameters(): # to freeze all existing weights
        param.requires_grad = False

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


# Get a pretrained model and set to train the last 2 layers (ROI + predictors : model 2)
def get_model_from_pretrained_roi(num_classes):

    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    for param in model.parameters(): # to freeze all existing weights
        param.requires_grad = False

    for param in model.roi_heads.parameters():
        param.requires_grad = True

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


# Get a pretrained model and set to train the last 3 layers (RPN + ROI + predictors : model 3)
def get_model_from_pretrained_rpn(num_classes):

    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    for param in model.parameters(): # to freeze all existing weights

        param.requires_grad = False

    for param in model.roi_heads.parameters():

        param.requires_grad = True

    for param in model.rpn.parameters():

        param.requires_grad = True

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


# Create a model based on a type preference between the 3 proposed
def create_model(model_type, normalize, num_classes=2, milestones=[5, 10]):

    # our dataset has two classes only - background and person
    num_classes = num_classes

    # get the model from the type we want using our helper function
    if model_type==1 or model_type=='base':
        model = get_model_from_pretrained_base(num_classes)
    elif model_type==2 or model_type=='roi':
        model = get_model_from_pretrained_roi(num_classes)
    elif model_type==3 or model_type=='rpn':
        model = get_model_from_pretrained_rpn(num_classes)
    else:
        print('Please select a valid model. 1:"base"- 2:"roi" - 3:"rpn"')
        return None

    # move model to the right device
    model.to(device)
    
    ## Mean and Std by chanel by pixel from the training set.  
    if normalize:
        model.transform.image_mean = [0.3321, 0.3406, 0.3210] # mean = [0.3321, 0.3406, 0.3210]
        model.transform.image_std = [0.2359, 0.2369, 0.2313] # std = [0.2359, 0.2369, 0.2313]

    # construct an SGD optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.0003, momentum=0.9)

    # like in the paper, construct the scheduler
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones = milestones, gamma=0.1)
    
    return model, optimizer, lr_scheduler


# Save the model, the optimizer and the scheduler into 3 separate files (~165MB)
def save_model(file_name = time.strftime("%Y%m%d_%H%M%S")):
    filename = file_name

    torch.save(model.state_dict(), 'saved_models/' + filename + '_model.pt')
    torch.save(optimizer.state_dict(), 'saved_models/' + filename + '_optimizer.pt')
    torch.save(lr_scheduler.state_dict(), 'saved_models/' + filename + '_scheduler.pt')
    print("Succesfully saved!")
    return None


# Load a model, an optimizer and a schduler into 3 different variables
def load_model(model_type, model_type_file_name, num_classes=2, milestones=[5, 10]):
    model, optimizer, lr_scheduler = create_model(model_type, num_classes, milestones)
    
    # load the model, the optimizer and the scheduler
    model.load_state_dict(torch.load('saved_models/' + model_type_file_name + '_model.pt'))
    optimizer.load_state_dict(torch.load('saved_models/' + model_type_file_name + '_optimizer.pt'))
    lr_scheduler.load_state_dict(torch.load('saved_models/' + model_type_file_name + '_scheduler.pt'))
    
    return model, optimizer, lr_scheduler

## Create the dataloaders
To load the data of the dataset efficiently for the model

In [10]:
# Create the full/light dataloader with the full/light dataset
def create_dataloader(ann_path, batch_size, light, transform=None, rotate=True, shuffle=True):
    images_with_bbox,_ = get_img_with_bbox(ann_path)
    if light:
        index = np.random.choice(range(len(images_with_bbox)), 500)
        images_with_bbox = [images_with_bbox[i] for i in index]
    data = CustomImageDataset(ann_path, img_folder, images_with_bbox, transform, rotate)
    return DataLoader(data, batch_size=batch_size, shuffle=shuffle, collate_fn=utils.collate_fn)

In [11]:
def offline_augment_dataloader(batch_size, light):
    train_valid_img,_ = get_img_with_bbox(train_ann_path)
    if light:
        index = np.random.choice(range(len(train_valid_img)), 500)
        train_valid_img = [train_valid_img[i] for i in index]
    train_data = CustomImageDataset(label_path=train_ann_path, img_dir=img_folder, valid_img=train_valid_img)
    train_data_colored = CustomImageDataset(label_path=train_ann_path, img_dir=img_folder, 
                                            # Transformations applied: transform=colorTranformations, rotate = True
                                            valid_img=train_valid_img,transform=colorTranformations)
    train_data_rotated = CustomImageDataset(label_path=train_ann_path, img_dir=img_folder, 
                                            valid_img=train_valid_img, rotation = True) 

    trainFinal = ConcatDataset([train_data, train_data_rotated, train_data_colored])
    return DataLoader(trainFinal, batch_size=batch_size, shuffle=True, collate_fn=utils.collate_fn)

## Define the 'evaluate' fonction

In [12]:
# Evaluates the current model using the coco_evaluator passing through a test dataloader
def evaluate(dataloader, coco, nms=True, iou=0.35):
    apply_nms = nms
    iou_threshold = iou # param to potentially tune (threshold for nms)
    the_data_loader = dataloader # change to test set
    
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    model.eval()

    for images, targets in the_data_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        with torch.no_grad():

            pred=model(images)

            if apply_nms:
                boxes_to_keep = torchvision.ops.nms(pred[0]['boxes'], pred[0]['scores'], iou_threshold=iou_threshold).cpu()
                pred[0]['boxes'] = pred[0]['boxes'][boxes_to_keep]
                pred[0]['labels'] = pred[0]['labels'][boxes_to_keep]
                pred[0]['scores'] = pred[0]['scores'][boxes_to_keep]

            outputs = [{k: v.cpu() for k, v in t.items()} for t in pred]
            res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
            coco_evaluator.update(res)
    
    coco_evaluator.synchronize_between_processes()
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    
    return coco_evaluator

## Logs utils

#### Train logs utils

In [13]:
# Converts the smoothed values to a dictionnary of each values
def smoothed_value_to_str(smoothed_value):
    d_values = {}
    d_values['median'] = smoothed_value.median
    d_values['avg'] = smoothed_value.avg
    d_values['global_avg'] = smoothed_value.global_avg
    d_values['max'] = smoothed_value.max
    d_values['value'] = smoothed_value.value
    return d_values


# Converts the train logs from MetricLogger to list
def train_logs_to_lst(logs):
    lst = []
    for i in range(len(logs)):
        d = {}
        for key in logs[i].meters.keys():
            d[key] = smoothed_value_to_str(logs[i].meters[key])
        lst.append(d)
    return lst


# Puts the training logs into a json file with time dependent file name
def train_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    train_metric_logs = train_logs_to_lst(logs)
    filename = ftime + "_train_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(train_metric_logs, f, ensure_ascii=False, indent=4)
    return None


# Converts the train logs from MetricLogger to list
def train_logs_to_lst(logs):
    lst = []
    for i in range(len(logs)):
        d = {}
        for key in logs[i].meters.keys():
            d[key] = smoothed_value_to_str(logs[i].meters[key])
        lst.append(d)
    return lst


# Puts the training logs into a json file with time dependent file name
def train_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    train_metric_logs = train_logs_to_lst(logs)
    filename = ftime + "_train_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(train_metric_logs, f, ensure_ascii=False, indent=4)
    return None

#### Valid logs utils

In [14]:
# Merge the dicts of a list 
def merge_dict(logs):
    logs_better = []
    try:
        for i in range(len(logs)):
            logs_better.append({**logs[i][0], **logs[i][1], **logs[i][2], **logs[i][3]})
        return logs_better
    except:
        print(logs[0])
        logs_better = logs
        return logs_better
    return None


# Converts the valid logs from list of dictionnaries to string
# TODO: add if type == list to not do anything if its already a list
def valid_logs_to_lst(valid_logs):
    logs = merge_dict(valid_logs)
    lst = []
    for i in range(len(logs)):
        d = {}
        for key in logs[i].keys():
            d[key] = logs[i][key].cpu().numpy().tolist()
        lst.append(d)
    return lst


# Puts the cis validation logs into a json file with time dependent file name
def cis_valid_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    valid_metric_logs = valid_logs_to_lst(logs)
    filename = ftime + "_cis_valid_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(valid_metric_logs, f, ensure_ascii=False, indent=4)
    return None


# Puts the trans validation logs into a json file with time dependent file name
def trans_valid_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    valid_metric_logs = valid_logs_to_lst(logs)
    filename = ftime + "_trans_valid_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(valid_metric_logs, f, ensure_ascii=False, indent=4)
    return None

## Training the model

In [15]:
def train(dataloader, num_epochs, save_logs=True, save_model=True, print_freq=100):
    
    model.train()
    
    all_train_logs = []
    all_cis_valid_logs = []
    all_trans_valid_logs = []

    for epoch in range(num_epochs):
        
        # train for one epoch, printing every 100 images
        train_logs = train_one_epoch(model, optimizer, dataloader, device, epoch, print_freq)
        all_train_logs.append(train_logs)
        
        # update the learning rate
        lr_scheduler.step()
        
        # evaluate on the validation dataset after training one epoch
        for images, targets in trans_valid_dataloader: # can do batch of 10 prob.
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            with torch.no_grad():
                trans_loss_dict = model(images, targets)
                trans_loss_dict = [{k: loss.to('cpu')} for k, loss in trans_loss_dict.items()]
                all_trans_valid_logs.append(trans_loss_dict)


        for images, targets in cis_valid_dataloader: # can do batch of 10 prob.
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            with torch.no_grad():
                cis_loss_dict = model(images, targets)
                cis_loss_dict = [{k: loss.to('cpu')} for k, loss in cis_loss_dict.items()]
                all_cis_valid_logs.append(cis_loss_dict)
    
    filetime = time.strftime("%Y%m%d_%H%M%S")
    
    if save_logs:
        
        # save the train, cis valid and trans valid logs
        train_logs_to_json(all_train_logs, filetime)
        cis_valid_logs_to_json(all_cis_valid_logs, filetime)
        trans_valid_logs_to_json(all_trans_valid_logs, filetime)
        
    if save_model:
        
        # save the model, the optimizer and the scheduler
        torch.save(model.state_dict(), 'saved_models/' + filetime + '_model.pt')
        torch.save(optimizer.state_dict(), 'saved_models/' + filetime + '_optimizer.pt')
        torch.save(lr_scheduler.state_dict(), 'saved_models/' + filetime + '_scheduler.pt')
    
    
    
    return all_train_logs, all_trans_valid_logs, all_cis_valid_logs


# Interactive Part
#### Parameters before training

In [29]:
# Set the data aumentation mode ('none', 'offline', 'online')
data_augmentation_mode = 'none'

# Set the model type (1:'predictors', 2:'ROI', 3:'RPN')
model_depth = 3

# Set the number of epochs 
# Time of training to expect (12909 train images ~ 23 minutes/epoch on GTX 1080 Ti, 1000 train images ~ 2.75 minutes/epoch)
num_epochs = 10

# If you use data augmentation, we use a specific normalization beforehand
normalize = True
if data_augmentation_mode =='none':
    normalize = False

#### Using lightweight mode

In [30]:
# Set the lightweight configuration mode to use subset of data, simpler architecture and few epochs
# to quickly test the code for evaluation (False:0, True:1)
lightweight_mode = 1

if lightweight_mode:
    light = 1
    num_epochs = 10
    print("Using lightweight mode")
else:
    light = 0
    print("Using non-lightweight mode")

Using lightweight mode


In [31]:
# Check if using the right device before training
torch.cuda.get_device_name(0)

'NVIDIA GeForce GTX 1080 Ti'

#### Initiation of the dataloaders

In [32]:
# Initiate the dataloaders with batch size from the paper for better comparison and the right options
valid = True
if data_augmentation_mode == 'none':
    train_dataloader = create_dataloader(train_ann_path, 1, light)
elif data_augmentation_mode == 'online':
    train_dataloader = create_dataloader(train_ann_path, 1, light, transform=colorTranformations)
elif data_augmentation_mode == 'offline':
    train_dataloader = offline_augment_dataloader(1, light)
else:
    valid = False
    print('Please enter a valid data_augmentation mode')

if valid:
    cis_valid_dataloader = create_dataloader(cis_val_ann_path, 10, light)
    trans_valid_dataloader = create_dataloader(trans_val_ann_path, 10, light)
    cis_test_dataloader = create_dataloader(cis_test_ann_path, 10, light)
    trans_test_dataloader = create_dataloader(trans_test_ann_path, 10, light)

In [33]:
# Loads the test dataset for coco evaluation later on (takes time)
cis_coco = get_coco_api_from_dataset(cis_test_dataloader.dataset)
trans_coco = get_coco_api_from_dataset(trans_test_dataloader.dataset)

creating index...
index created!
creating index...
index created!


#### Specify the model to create and the parameters

In [34]:
# # BEFORE trying a second model in the same kernel, use this to clear the memory:

# moddel = None
# optimizer = None
# lr_scheduler = None
# with torch.no_grad():
#     torch.cuda.empty_cache()

In [35]:
model, optimizer, lr_scheduler = create_model(model_depth, normalize=normalize)

### This next cell starts the training of the model

In [36]:
# TRAIN
all_train_logs, all_trans_valid_logs, all_cis_valid_logs = train(dataloader=train_dataloader, num_epochs=num_epochs)

Epoch: [0]  [  0/500]  eta: 0:01:18  lr: 0.000001  loss: 0.9548 (0.9548)  loss_classifier: 0.7870 (0.7870)  loss_box_reg: 0.1651 (0.1651)  loss_objectness: 0.0001 (0.0001)  loss_rpn_box_reg: 0.0026 (0.0026)  time: 0.1570  data: 0.0325  max mem: 878
Epoch: [0]  [100/500]  eta: 0:00:51  lr: 0.000061  loss: 0.2581 (0.4946)  loss_classifier: 0.1231 (0.3252)  loss_box_reg: 0.1042 (0.1223)  loss_objectness: 0.0289 (0.0403)  loss_rpn_box_reg: 0.0035 (0.0069)  time: 0.1278  data: 0.0302  max mem: 935
Epoch: [0]  [200/500]  eta: 0:00:39  lr: 0.000121  loss: 0.2456 (0.3910)  loss_classifier: 0.1097 (0.2232)  loss_box_reg: 0.1296 (0.1289)  loss_objectness: 0.0020 (0.0332)  loss_rpn_box_reg: 0.0020 (0.0058)  time: 0.1336  data: 0.0335  max mem: 935
Epoch: [0]  [300/500]  eta: 0:00:26  lr: 0.000181  loss: 0.2492 (0.3447)  loss_classifier: 0.0871 (0.1810)  loss_box_reg: 0.1142 (0.1276)  loss_objectness: 0.0043 (0.0303)  loss_rpn_box_reg: 0.0027 (0.0059)  time: 0.1358  data: 0.0338  max mem: 935
Epoc

Epoch: [5]  [200/500]  eta: 0:00:39  lr: 0.000030  loss: 0.0983 (0.1329)  loss_classifier: 0.0379 (0.0507)  loss_box_reg: 0.0556 (0.0624)  loss_objectness: 0.0096 (0.0152)  loss_rpn_box_reg: 0.0013 (0.0046)  time: 0.1312  data: 0.0328  max mem: 4147
Epoch: [5]  [300/500]  eta: 0:00:26  lr: 0.000030  loss: 0.1167 (0.1328)  loss_classifier: 0.0375 (0.0510)  loss_box_reg: 0.0535 (0.0632)  loss_objectness: 0.0067 (0.0142)  loss_rpn_box_reg: 0.0021 (0.0044)  time: 0.1413  data: 0.0362  max mem: 4147
Epoch: [5]  [400/500]  eta: 0:00:13  lr: 0.000030  loss: 0.1319 (0.1307)  loss_classifier: 0.0385 (0.0499)  loss_box_reg: 0.0610 (0.0619)  loss_objectness: 0.0099 (0.0149)  loss_rpn_box_reg: 0.0017 (0.0040)  time: 0.1384  data: 0.0365  max mem: 4147
Epoch: [5]  [499/500]  eta: 0:00:00  lr: 0.000030  loss: 0.1265 (0.1310)  loss_classifier: 0.0430 (0.0499)  loss_box_reg: 0.0636 (0.0617)  loss_objectness: 0.0060 (0.0149)  loss_rpn_box_reg: 0.0018 (0.0044)  time: 0.1348  data: 0.0346  max mem: 4147


# Checking the log results

##### Ensures that if you hit the training cell, you don't lose the variables containing the logs from the last run

In [None]:
last_train_logs = all_train_logs
last_trans_valid_logs = all_trans_valid_logs
last_cis_valid_logs = all_cis_valid_logs

##### Converts the logs to lists and the tensors to numpy 

In [None]:
train_logs = train_logs_to_lst(last_train_logs)
cis_valid_logs = valid_logs_to_lst(last_cis_valid_logs)
trans_valid_logs = valid_logs_to_lst(last_trans_valid_logs)

## Results

In [None]:
# To confirm that the data is loaded properly
n = len(train_logs)
print(n)

In [None]:
# Train loss to print (here we use global_avg but we can use: value, median, avg, max or global_avg)

results_train_loss = []

for i in range(n):
    results_train_loss.append(train_logs[i]['loss_box_reg']['global_avg'])
    
# Cis valid loss to print
results_cis_valid_loss = [] # cis

for i in range(n):
    loss_interm = 0
    for j in range(len(cis_valid_dataloader)):
        loss_interm += cis_valid_logs[(len(cis_valid_dataloader) * i) + j]['loss_rpn_box_reg']
    results_cis_valid_loss.append(loss_interm)

# Trans valid loss to print
results_trans_valid_loss = [] # trans

for i in range(n):
    loss_interm = 0
    for j in range(len(trans_valid_dataloader)):
        loss_interm += trans_valid_logs[(len(trans_valid_dataloader) * i) + j]['loss_rpn_box_reg']
    results_trans_valid_loss.append(loss_interm)

### Training and valid Plots

In [None]:
# Printing the different plots
fig, ax = plt.subplots(1,2, figsize=(20,6))

ax[0].plot(np.arange(1, n + 1), results_train_loss, label='train')
ax[0].set_title('Train loss per epoch')
ax[0].set_ylabel('loss_box_reg')
ax[0].set_xlabel('epoch')

plt.title('Train loss per epoch')
ax[1].plot(np.arange(1, n + 1), results_cis_valid_loss, label='cis')
ax[1].plot(np.arange(1, n + 1), results_trans_valid_loss, label='trans')
ax[1].set_title('Valid loss per epoch')
ax[1].set_ylabel('loss_box_reg')
ax[1].set_xlabel('epoch')
ax[1].legend()

#### Save the figure to pdf format in the figures folder

In [None]:
fig.savefig("saved_figures/" + time.strftime("%Y%m%d_%H%M%S") + "_figure.pdf")

# Evaluate on COCO detection metrics

### Test on COCO metrics from data loaders
##### 'For evaluation, we consider a detected box to be correct if its IoU ≥ 0.5 with a ground truth box.'

We need to look at the precison score with IoU=0.5, area=all and maxDets=100.
For the recall score, by default it's IoU=0.5:IoU=0.95.

In [None]:
# takes +- 25min to run on cis_test with full dataloader
cis_coco_evaluator = evaluate(cis_test_dataloader, cis_coco)

In [None]:
# takes +- 25min to run on trans_test with full dataloader
trans_coco_evaluator = evaluate(trans_test_dataloader, trans_coco)

In [None]:
print('cis test 10 epochs rpn + roi online data augmentation')
print('_'*80)
cis_coco_evaluator.summarize()

In [None]:
print('trans test 10 epochs rpn + roi online data augmentation')
print('_'*80)
trans_coco_evaluator.summarize()

## (OPTIONAL) Make Predictions with a model

### Load 10 random predictions

In [None]:
# Loads 10 images and makes the model do predictions on these images
# WARNING: Takes GPU ram space
train_features, train_labels = next(iter(trans_valid_dataloader))
image = list(image.to(device) for image in train_features)

model.eval()
with torch.no_grad():
      pred = model(image)

In [None]:
# Prints 10 images with the predictions before and after NMS
for image_i in range(len(image)):
    fig, ax = plt.subplots(1,3,figsize=(24,16))

    ax[0].imshow(train_features[image_i][0].squeeze(),cmap="gray")
    rect = patches.Rectangle((train_labels[image_i]['boxes'][0][0], 
                              train_labels[image_i]['boxes'][0][1]), 
                             train_labels[image_i]['boxes'][0][2]-train_labels[image_i]['boxes'][0][0], 
                             train_labels[image_i]['boxes'][0][3]-train_labels[image_i]['boxes'][0][1], 
                             linewidth=2, edgecolor='r', facecolor='none')
    ax[0].add_patch(rect)
    ax[0].set_title('Ground truth')

    # Predictions
    ax[1].imshow(train_features[image_i][0].squeeze(),cmap="gray")
    for i in range(len(pred[image_i]['boxes'])):
        rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                                  pred[image_i]['boxes'][i][1].cpu()), 
                                 (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                                 (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                                 linewidth=2, edgecolor='r', facecolor='none')
        ax[1].add_patch(rect)
    ax[1].set_title('Pred')

    # Predictions after NMS
    iou_threshold = 0.001 # param to tune
    boxes_to_keep = torchvision.ops.nms(pred[image_i]['boxes'], pred[image_i]['scores'], iou_threshold = iou_threshold).cpu()
    ax[2].imshow(train_features[image_i][0].squeeze(),cmap="gray")
    for i in boxes_to_keep:
        rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                                  pred[image_i]['boxes'][i][1].cpu()), 
                                 (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                                 (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                                 linewidth=2, edgecolor='r', facecolor='none')
        ax[2].add_patch(rect)

    ax[2].set_title('After NMS')

In [None]:
# Print a single image chosen by index from the last batch of 10 predictions
image_i = 3 # from 0 to 9 included

fig, ax = plt.subplots(1,3,figsize=(24,16))

ax[0].imshow(train_features[image_i][0].squeeze(),cmap="gray")
for i in range(len(train_labels[image_i]['boxes'])):
    rect = patches.Rectangle((train_labels[image_i]['boxes'][i][0], 
                            train_labels[image_i]['boxes'][i][1]), 
                            train_labels[image_i]['boxes'][i][2]-train_labels[image_i]['boxes'][i][0], 
                            train_labels[image_i]['boxes'][i][3]-train_labels[image_i]['boxes'][i][1], 
                            linewidth=2, edgecolor='r', facecolor='none')
    ax[0].add_patch(rect)
ax[0].set_title('Ground truth')

# Predictions
ax[1].imshow(train_features[image_i][0].squeeze(),cmap="gray")
for i in range(len(pred[image_i]['boxes'])):
    rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                              pred[image_i]['boxes'][i][1].cpu()), 
                             (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                             (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                             linewidth=2, edgecolor='r', facecolor='none')
    ax[1].add_patch(rect)
ax[1].set_title('Pred')

# Predictions after NMS
iou_threshold = 0.01 # param to tune
boxes_to_keep = torchvision.ops.nms(pred[image_i]['boxes'], pred[image_i]['scores'], iou_threshold = iou_threshold).cpu()
ax[2].imshow(train_features[image_i][0].squeeze(),cmap="gray")
for i in boxes_to_keep:
    rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                              pred[image_i]['boxes'][i][1].cpu()), 
                             (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                             (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                             linewidth=2, edgecolor='r', facecolor='none')
    ax[2].add_patch(rect)

ax[2].set_title('After NMS')

# Method 1 (Subspace alignment based Domain adaptation)

## Some new imports

In [None]:
# More imports needed to use the method
import torchvision.ops.boxes as bops
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from torch import nn

### Some utils method

In [None]:
def center_data(X, center_row=False):
    """
    """
    if center_row:
        # center data per row
        scaler_row = StandardScaler()
        X_scaled_row = scaler_row.fit_transform(X.T)

        # center data per column
        scaler_col = StandardScaler()
        X_scaled = scaler_col.fit_transform(X_scaled_row.T)
        return X_scaled
    else:
        # center data
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        return X_scaled

In [None]:
class FastRCNNPredictor_custom(nn.Module):
    """
    Standard classification + bounding box regression layers
    for Fast R-CNN.

    Args:
        in_channels (int): number of input channels
        num_classes (int): number of output classes (including background)
    """

    def __init__(self, in_channels, num_classes, m_transfo):
        super(FastRCNNPredictor_custom, self).__init__()
        
        self.cls_score = nn.Sequential(nn.Linear(in_features=1024, 
                                                 out_features = in_channels, 
                                                 bias=False), 
                                       nn.Linear(in_channels, num_classes))
        
        self.bbox_pred = nn.Sequential(nn.Linear(in_features=1024, 
                                                 out_features = in_channels, 
                                                 bias=False), 
                                       nn.Linear(in_channels, num_classes * 4))
        
        self.cls_score[0].weight = nn.Parameter(m_transfo, requires_grad = False)
        self.bbox_pred[0].weight = nn.Parameter(m_transfo, requires_grad = False)

    def forward(self, x):
        if x.dim() == 4:
            assert list(x.shape[2:]) == [1, 1]
        x = x.flatten(start_dim=1)
        scores = self.cls_score(x)
        bbox_deltas = self.bbox_pred(x)

        return scores, bbox_deltas

### Using the same model that we just trained

Otherwise, you can just load a model by uncommenting the following code

In [None]:
# # UNCOMMENT THE FOLLOWING LINE TO LOAD A MODEL:
# model, optimizer, lr_scheduler = load_model(3, "10_rpn_roi_4")

In [None]:
# Choose the number of dimensions to keep in PCA
d_pca = 512

# Tell the program to save the matrix created
save_matrixes = False
save_projected_matrixes = True
save_name = 'col_10_rpn_roi_online_512'

**Construct source matrix:** 

We keep output of model.roi_heads.box_head (vector of size 1024) as feature representations of bounding boxes extracted by the RPN (region proposal network). For us to stack a box representation to the source matrix, it has to have a IoU > thres_IoU with the ground truth of the given image. 

In [None]:
thres_IoU = 0.50
count = 0

X_source = torch.tensor([])
bbox_idx = torch.arange(1000)

model.eval()


for images, targets in train_dataloader: 
    images = [image.to(device) for image in images]
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    count += 1

    if count%100 == 0:
        print(count)
    
    # 
    with torch.no_grad():
        outputs = []
        hook = model.rpn.register_forward_hook(
        lambda self, input, output: outputs.append(output))

        outputs1 = []
        hook1 = model.roi_heads.box_head.register_forward_hook(
        lambda self, input, output: outputs1.append(output))

        res = model(images)
        hook.remove()
        hook1.remove()

    # 
    coords = outputs[0][0][0].cpu() # [1000,4]
    feat = outputs1[0].cpu() # [1000, 1024]

    gt = targets[0]['boxes'].cpu()
    
    bbox_idx_to_keep = torch.tensor([])
    
    # 
    for i in range(gt.shape[0]):

        IoUs = bops.box_iou(gt[i].reshape(1,4), coords)
        IoUs = IoUs.reshape(1000)
        bbox_idx_to_keep = torch.cat((bbox_idx_to_keep, bbox_idx[IoUs >= thres_IoU]),dim=0)

    X_source = torch.cat((X_source,feat[torch.unique(bbox_idx_to_keep).long()]), dim=0)

#### Save 1

In [None]:
# Save the matrix
if save_matrixes:
    torch.save(X_source, 'saved_matrixes/X_source_05_' + save_name + '.pt')

In [None]:
# Center the data
X_source_scaled = center_data(X_source)

In [None]:
# Apply PCA, keep only an amount of first components which gives the Projected source matrix

pca = PCA(n_components=d_pca)
pca.fit(X_source_scaled)

X_source_proj = pca.components_
X_source_proj = torch.from_numpy(X_source_proj)

#### Save 2

In [None]:
# Save the projected matrix
if save_projected_matrixes:
    torch.save(X_source_proj, 'saved_matrixes/X_source_proj_05_' + save_name + '.pt')

### Target data with batch size 1

In [None]:
# Initiate the dataloaders with batch size from the paper for better comparison
trans_test_batch1_dataloader = create_dataloader(trans_test_ann_path, 1, light=light)

 **Construct target matrix:** 
 
We keep output of model.roi_heads.box_head (vector of size 1024) as feature representations of bounding boxes
 extracted by the RPN (region proposal network). For us to stack a box representation to the source matrix, the predicted bbox associated with the feature has to have a confidence score > thres_conf_score (since we don't use target labels we can't use the IoU here).


In [None]:
# 30 minutes
thres_conf_score= 0.50 
count=0

X_target=torch.tensor([])

model.eval()

for images, targets in trans_test_batch1_dataloader: # trans location valid AND test ?
    images = [image.to(device) for image in images]
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    count+=1

    if count%100==0:
        print(count)

    with torch.no_grad():

        outputs = []
        hook = model.backbone.register_forward_hook(
        lambda self, input, output: outputs.append(output))
        res = model(images)
        hook.remove()

        box_features = model.roi_heads.box_roi_pool(outputs[0], [r['boxes'] for r in res], [i.shape[-2:] for i in images])
        box_features = model.roi_heads.box_head(box_features)

    X_target = torch.cat((X_target,box_features[res[0]['scores']>=thres_conf_score].cpu()), dim=0)


In [None]:
# Save the matrix
if save_matrixes:
    torch.save(X_target, 'saved_matrixes/X_target_05_' + save_name + '.pt')

In [None]:
# Center the data
X_target_scaled = center_data(X_target)

In [None]:
# Apply PCA, keep only an amount of first components which gives the Projected source matrix

pca_proj = PCA(n_components=d_pca)
pca_proj.fit(X_target_scaled)

X_target_proj = pca_proj.components_
X_target_proj = torch.from_numpy(X_target_proj)

In [None]:
plt.plot(pca_proj.explained_variance_ratio_) # we keep d dimensions
plt.grid()

In [None]:
# Save the projected matrix
if save_projected_matrixes:
    torch.save(X_target_proj, 'saved_matrixes/X_target_proj_05_' + save_name + '.pt')

### Transformation matrix M

𝑀 is obtained by minimizing the following Bregman matrix divergence (following closed-form solution given in the paper)

In [None]:
M = torch.matmul(X_source_proj, X_target_proj.T)

### Project source data into target aligned source subspace

In [None]:
Xa = torch.matmul(X_source_proj.T,M)

### Make the new model

In [None]:
# Sets the device for pytorch
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
# Put the matrixes on the right devices
M.to(device)
Xa.to(device)

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
model = get_model_from_pretrained_rpn(num_classes)

# move model to the right device
model.to(device)

# load fine-tuned weights from the model of the projections
model.load_state_dict(torch.load('saved_models/10_rpn_roi_4_model.pt'))

for param in model.parameters(): # to freeze all existing weights

    param.requires_grad = False

# vector are of size 100 after the transformation
model.roi_heads.box_predictor = FastRCNNPredictor_custom(M.shape[0], 2, Xa.T.float())
# model.roi_heads.box_predictor = FastRCNNPredictor_custom(in_channels=100, num_classes=2, m_transfo=Xa.T.float()) 

# move model to the right device
model.to(device)

# construct an optimizer
# We will only retrain model.roi_heads.box_predictor (2 last layers)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0003, momentum=0.9)

lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[5,10], gamma=0.1)

In [None]:
# weights to learn
for i in range(4):
    print(params[i].shape)

In [None]:
# Nb of weights in the optimizer
for i in range(len(optimizer.param_groups[0]['params'])):
    print(optimizer.param_groups[0]['params'][i].shape)

## Before training

In [None]:
# PARAMETERS TO TUNE BEFORE TRAINING
num_epochs = 15

# CHECK DEVICE BEFORE TRAINING
torch.cuda.get_device_name(0)

### This next cell starts the training of the model

In [None]:
# TRAIN
all_train_logs, all_trans_valid_logs, all_cis_valid_logs = train(dataloader=train_dataloader, num_epochs=num_epochs)

## Checking the log results

##### Ensures that if you hit the training cell, you don't lose the variables containing the logs from the last run

In [None]:
last_train_logs = all_train_logs
last_trans_valid_logs = all_trans_valid_logs
last_cis_valid_logs = all_cis_valid_logs

##### Converts the logs to lists and the tensors to numpy 

In [None]:
train_logs = train_logs_to_lst(last_train_logs)
cis_valid_logs = valid_logs_to_lst(last_cis_valid_logs)
trans_valid_logs = valid_logs_to_lst(last_trans_valid_logs)

In [None]:
last_train_logs = all_train_logs
last_train_logs = all_train_logs
last_trans_valid_logs = all_trans_valid_logs
last_cis_valid_logs = all_cis_valid_logs

In [None]:
train_logs = train_logs_to_lst(last_train_logs)
cis_valid_logs = valid_logs_to_lst(last_cis_valid_logs)
trans_valid_logs = valid_logs_to_lst(last_trans_valid_logs)

In [None]:
# Train loss to print (here we use global_avg but we can use: value, median, avg, max or global_avg)
results_train_loss = []

for i in range(num_epochs):
    results_train_loss.append(train_logs[i]['loss_box_reg']['global_avg'])
    
# Cis valid loss to print
results_cis_valid_loss = [] # cis

for i in range(num_epochs):
    loss_interm = 0
    for j in range(167):
        loss_interm += cis_valid_logs[(167 * i) + j]['loss_box_reg']
    results_cis_valid_loss.append(loss_interm)

# Trans valid loss to print
results_trans_valid_loss = [] # cis

for i in range(num_epochs):
    loss_interm = 0
    for j in range(154):
        loss_interm += trans_valid_logs[(154 * i) + j]['loss_box_reg']
    results_trans_valid_loss.append(loss_interm)

In [None]:
# Printing the different plots
fig, ax = plt.subplots(1,2, figsize=(20,6))

ax[0].plot(np.arange(1, num_epochs + 1), results_train_loss, label='train')
ax[0].set_title('Train loss per epoch')
ax[0].set_ylabel('loss_box_reg')
ax[0].set_xlabel('epoch')

plt.title('Train loss per epoch')
ax[1].plot(np.arange(1, num_epochs + 1), results_cis_valid_loss, label='cis')
ax[1].plot(np.arange(1, num_epochs + 1), results_trans_valid_loss, label='trans')
ax[1].set_title('Valid loss per epoch')
ax[1].set_ylabel('loss_box_reg')
ax[1].set_xlabel('epoch')
ax[1].legend()

In [None]:
fig.savefig("saved_figures/" + time.strftime("%Y%m%d_%H%M%S") + "_figure.pdf")

## Evaluate the new model

In [None]:
# takes +- 15min to run on cis_test
cis_coco_evaluator_method = evaluate(cis_test_dataloader, cis_coco)

In [None]:
# trans with method 3
model.roi_heads.box_predictor.cls_score[0].weight = nn.Parameter(X_target_proj.float(), requires_grad = False) 
model.roi_heads.box_predictor.bbox_pred[0].weight = nn.Parameter(X_target_proj.float(), requires_grad = False)
model.to(device)

In [None]:
# takes +- 15min to run on cis_test
trans_coco_evaluator_method = evaluate(trans_test_dataloader, trans_coco)

In [None]:
print('cis test 10 epochs rpn roi 4, method3.1 with 15 epochs & d=100')
print('_'*80)
cis_coco_evaluator_method.summarize()

In [None]:
print('trans test 10 epochs rpn+roi 4, method3.1 with 15 epochs & d=100')
print('_'*80)
trans_coco_evaluator_method.summarize()