# Imports

In [1]:
import os
import json
import time
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torch
import torchvision
from torchvision import transforms
from torchvision.transforms import ToTensor
from torchvision.io import read_image
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader
from torchmetrics.detection.map import MeanAveragePrecision
from PIL import Image
import pycocotools

In [2]:
# Imports local modules downloaded from TorchVision repo v0.8.2, references/detection
# https://github.com/pytorch/vision/tree/v0.8.2/references/detection
import utils
import transforms
import coco_eval
from engine import train_one_epoch, evaluate

In [3]:
# Imports from local lib files
from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
from engine import _get_iou_types 

# Functions and initiations

## File paths

In [4]:
# Set the paths to the annotation files that will retrieve the images with the split based on the annotations
output_path = 'output'
img_folder = 'eccv_18_all_images_sm'
cis_test_ann_path = 'eccv_18_annotation_files/cis_test_annotations.json'
cis_val_ann_path = 'eccv_18_annotation_files/cis_val_annotations.json'
train_ann_path = 'eccv_18_annotation_files/train_annotations.json'
trans_test_ann_path = 'eccv_18_annotation_files/trans_test_annotations.json'
trans_val_ann_path = 'eccv_18_annotation_files/trans_val_annotations.json'

# Load the json files of the annotations for better exploring of each images
cis_test_ann = json.load(open(cis_test_ann_path))
cis_val_ann = json.load(open(cis_val_ann_path))
train_ann = json.load(open(train_ann_path))
trans_test_ann = json.load(open(trans_test_ann_path))
trans_val_ann = json.load(open(trans_val_ann_path))

## Utils

In [5]:
# Sets the device for pytorch
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

### Transformation

In [6]:
# Make and horizontal flip data transformation with 50% chance to use as data augmentation in a data loader
# In paper :  ' ... and employ horizontal flipping for data augmentation. ( for detection)

import transforms as T   # from git hub repo
import torchvision.transforms as TorchTrans
# In paper :  ' ... and employ horizontal flipping for data augmentation. ( for detection)

colorTranformations = torch.nn.Sequential(
                      TorchTrans.RandomInvert(1), # or 0.6
                      TorchTrans.ColorJitter([.2,.3], [0.7,0.9], [.1,0.12])#jitter2 = T.ColorJitter([.2,.3], [0.7,0.9],  hue=.1)
)

### Images and dataset

In [7]:
# Method that returns a list with the idx of images with at least one bounding box (img_wbbox) and a 
# list with the number of bbox for each valid image (num_bbox)
def get_img_with_bbox(file_path):
  
    file = json.load(open(file_path))
    img_wbbox = []
    num_bbox = []

    for i in range(len(file['images'])):
        bboxes = [file['annotations'][j]['bbox'] 
                  for j in range(len(file['annotations'])) 
                  if file['annotations'][j]['image_id']==file['images'][i]['id'] 
                  and 'bbox' in file['annotations'][j].keys()]

        if len(bboxes)!=0:
            img_wbbox.append(i)

            num_bbox.append(len(bboxes))

    return img_wbbox, num_bbox

In [10]:
# Class used to create a custom dataset
class CustomImageDataset(Dataset):
    def __init__(self, label_path, img_dir, valid_img, transform = None, rotation = False):
        self.label_file = json.load(open(label_path))
        self.img_dir = img_dir
        self.transform = transform
        self.valid_img = valid_img
        self.rotation = rotation  
        self.rotate = T.RandomHorizontalFlip(0.5)
    
    def __len__(self):
        return len(self.valid_img)

    def __getitem__(self, idx):
        
        idx = self.valid_img[idx] # consider only images with bbox annotations
        img_path = os.path.join(self.img_dir, self.label_file['images'][idx]['file_name'])
        image = read_image(img_path)

        conv = torchvision.transforms.ToTensor()
        # if image.shape[0]==1:
        # some images have only one channel, we convert them to rgb
        image = Image.open(img_path).convert("RGB")
        image = conv(image)

        boxes = [self.label_file['annotations'][j]['bbox'] 
                 for j in range(len(self.label_file['annotations'])) 
                 if self.label_file['annotations'][j]['image_id']==self.label_file['images'][idx]['id']]
        
        label = [self.label_file['annotations'][j]['category_id'] 
                 for j in range(len(self.label_file['annotations'])) 
                 if self.label_file['annotations'][j]['image_id']==self.label_file['images'][idx]['id']]

        # transform bbox coords to adjust for resizing
        scale_x = image.shape[2] / self.label_file['images'][idx]['width'] 
        scale_y = image.shape[1] / self.label_file['images'][idx]['height']

        boxes = torch.as_tensor(boxes)
        for i in range(boxes.shape[0]):
            boxes[i][0] = torch.round(boxes[i][0] * scale_x)
            boxes[i][1] = torch.round(boxes[i][1] * scale_y)
            boxes[i][2] = torch.round(boxes[i][2] * scale_x)
            boxes[i][3] = torch.round(boxes[i][3] * scale_y)

            boxes[i][2] = boxes[i][0] + boxes[i][2] # to transform to pytorch bbox format
            boxes[i][3] = boxes[i][1] + boxes[i][3]

        label = torch.as_tensor(label)
        label = torch.where(label==30,0,1)  # 0 if empty (categ id = 30), 1 if animal
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = label
        target["image_id"] = image_id
        target['area']=area
        target['iscrowd']=iscrowd
        
        
        if self.rotation:
            image, target= self.rotate(image, target)
        if self.transform:
            image = self.transform(image)
        return image, target

## Create the model

### Pre-trained models
Inspred from https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/torchvision_finetuning_instance_segmentation.ipynb#scrollTo=YjNHjVMOyYlH

### Model with only the last layer to train (CNN layers)

In [11]:
# Get a pretrained model and set to train the last layer (CNN : model 1)
def get_model_from_pretrained_cnn(num_classes):

    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    for param in model.parameters(): # to freeze all existing weights
        param.requires_grad = False

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


# Get a pretrained model and set to train the last 2 layers (ROI + CNN : model 2)
def get_model_from_pretrained_roi(num_classes):

    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    for param in model.parameters(): # to freeze all existing weights
        param.requires_grad = False

    for param in model.roi_heads.parameters():
        param.requires_grad = True

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


# Get a pretrained model and set to train the last 3 layers (RPN + ROI + CNN : model 3)
def get_model_from_pretrained_rpn(num_classes):

    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    for param in model.parameters(): # to freeze all existing weights

        param.requires_grad = False

    for param in model.roi_heads.parameters():

        param.requires_grad = True

    for param in model.rpn.parameters():

        param.requires_grad = True

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


# Create a model based on a type preference between the 3 proposed
def create_model(model_type, normalize=False, num_classes=2, milestones=[5, 10]):

    # our dataset has two classes only - background and person
    num_classes = num_classes

    # get the model from the type we want using our helper function
    if model_type==1 or model_type=='cnn':
        model = get_model_from_pretrained_cnn(num_classes)
    elif model_type==2 or model_type=='roi':
        model = get_model_from_pretrained_roi(num_classes)
    elif model_type==3 or model_type=='rpn':
        model = get_model_from_pretrained_rpn(num_classes)
    else:
        return 'Please select a valid model. 1:CNN - 2:ROI - 3:RPN'

    # move model to the right device
    model.to(device)
    
    ## Mean and Std by chanel by pixel from the training set.  
    if normalize:
        model.transform.image_mean = [0.3321, 0.3406, 0.3210] # mean = [0.3321, 0.3406, 0.3210]
        model.transform.image_std = [0.2359, 0.2369, 0.2313] # std = [0.2359, 0.2369, 0.2313]

    # construct an SGD optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.0003, momentum=0.9)

    # like in the paper, construct the scheduler
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones = milestones, gamma=0.1)
    
    return model, optimizer, lr_scheduler


# Save the model, the optimizer and the scheduler into 3 separate files (~165MB)
def save_model(file_name = time.strftime("%Y%m%d_%H%M%S")):
    filename = file_name

    torch.save(model.state_dict(), 'saved_models/' + filename + '_model.pt')
    torch.save(optimizer.state_dict(), 'saved_models/' + filename + '_optimizer.pt')
    torch.save(lr_scheduler.state_dict(), 'saved_models/' + filename + '_scheduler.pt')
    print("Succesfully saved!")
    return None


# Load a model, an optimizer and a schduler into 3 different variables
def load_model(model_type, model_type_file_name, num_classes=2, milestones=[5, 10]):
    model, optimizer, lr_scheduler = create_model(model_type, num_classes, milestones)
    
    # load the model, the optimizer and the scheduler
    model.load_state_dict(torch.load('saved_models/' + model_type_file_name + '_model.pt'))
    optimizer.load_state_dict(torch.load('saved_models/' + model_type_file_name + '_optimizer.pt'))
    lr_scheduler.load_state_dict(torch.load('saved_models/' + model_type_file_name + '_scheduler.pt'))
    
    return model, optimizer, lr_scheduler

## Create the dataloaders
To load the data of the dataset efficiently for the model

In [12]:
# Create the full/light dataloader with the full/light dataset
def create_dataloader(ann_path, batch_size, transform = None, rotate=True, light=False, shuffle=True):
    images_with_bbox,_ = get_img_with_bbox(ann_path)
    if light:
        index = np.random.choice(range(len(images_with_bbox)), 100)
        images_with_bbox = [images_with_bbox[i] for i in index]
    data = CustomImageDataset(ann_path, img_folder, images_with_bbox, transform, rotate)
    return DataLoader(data, batch_size=batch_size, shuffle=shuffle, collate_fn=utils.collate_fn)

## Define the 'evaluate' fonction

In [13]:
# Evaluates the current model using the coco_evaluator passing through a test dataloader
def evaluate(dataloader, coco, nms=True, iou=0.35):
    apply_nms = nms
    iou_threshold = iou # param to potentially tune (threshold for nms)
    the_data_loader = dataloader # change to test set
    
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    model.eval()

    for images, targets in the_data_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        with torch.no_grad():

            pred=model(images)

            if apply_nms:
                boxes_to_keep = torchvision.ops.nms(pred[0]['boxes'], pred[0]['scores'], iou_threshold=iou_threshold).cpu()
                pred[0]['boxes'] = pred[0]['boxes'][boxes_to_keep]
                pred[0]['labels'] = pred[0]['labels'][boxes_to_keep]
                pred[0]['scores'] = pred[0]['scores'][boxes_to_keep]

            outputs = [{k: v.cpu() for k, v in t.items()} for t in pred]
            res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
            coco_evaluator.update(res)
    
    coco_evaluator.synchronize_between_processes()
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    
    return coco_evaluator

## Logs utils

#### Train logs utils

In [14]:
# Converts the smoothed values to a dictionnary of each values
def smoothed_value_to_str(smoothed_value):
    d_values = {}
    d_values['median'] = smoothed_value.median
    d_values['avg'] = smoothed_value.avg
    d_values['global_avg'] = smoothed_value.global_avg
    d_values['max'] = smoothed_value.max
    d_values['value'] = smoothed_value.value
    return d_values


# Converts the train logs from MetricLogger to list
def train_logs_to_lst(logs):
    lst = []
    for i in range(len(logs)):
        d = {}
        for key in logs[i].meters.keys():
            d[key] = smoothed_value_to_str(logs[i].meters[key])
        lst.append(d)
    return lst


# Puts the training logs into a json file with time dependent file name
def train_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    train_metric_logs = train_logs_to_lst(logs)
    filename = ftime + "_train_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(train_metric_logs, f, ensure_ascii=False, indent=4)
    return None


# Converts the train logs from MetricLogger to list
def train_logs_to_lst(logs):
    lst = []
    for i in range(len(logs)):
        d = {}
        for key in logs[i].meters.keys():
            d[key] = smoothed_value_to_str(logs[i].meters[key])
        lst.append(d)
    return lst


# Puts the training logs into a json file with time dependent file name
def train_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    train_metric_logs = train_logs_to_lst(logs)
    filename = ftime + "_train_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(train_metric_logs, f, ensure_ascii=False, indent=4)
    return None

#### Valid logs utils

In [15]:
# Merge the dicts of a list 
def merge_dict(logs):
    logs_better = []
    try:
        for i in range(len(logs)):
            logs_better.append({**logs[i][0], **logs[i][1], **logs[i][2], **logs[i][3]})
        return logs_better
    except:
        print(logs[0])
        logs_better = logs
        return logs_better
    return None


# Converts the valid logs from list of dictionnaries to string
# TODO: add if type == list to not do anything if its already a list
def valid_logs_to_lst(valid_logs):
    logs = merge_dict(valid_logs)
    lst = []
    for i in range(len(logs)):
        d = {}
        for key in logs[i].keys():
            d[key] = logs[i][key].cpu().numpy().tolist()
        lst.append(d)
    return lst


# Puts the cis validation logs into a json file with time dependent file name
def cis_valid_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    valid_metric_logs = valid_logs_to_lst(logs)
    filename = ftime + "_cis_valid_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(valid_metric_logs, f, ensure_ascii=False, indent=4)
    return None


# Puts the trans validation logs into a json file with time dependent file name
def trans_valid_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    valid_metric_logs = valid_logs_to_lst(logs)
    filename = ftime + "_trans_valid_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(valid_metric_logs, f, ensure_ascii=False, indent=4)
    return None

## Training the model

In [16]:
def train(dataloader, num_epochs, save_logs=True, save_model=True, print_freq=100):
    
    model.train()
    
    all_train_logs = []
    all_cis_valid_logs = []
    all_trans_valid_logs = []

    for epoch in range(num_epochs):
        
        # train for one epoch, printing every 100 images
        train_logs = train_one_epoch(model, optimizer, dataloader, device, epoch, print_freq)
        all_train_logs.append(train_logs)
        
        # update the learning rate
        lr_scheduler.step()
        
        # evaluate on the validation dataset after training one epoch
        for images, targets in trans_valid_dataloader: # can do batch of 10 prob.
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            with torch.no_grad():
                trans_loss_dict = model(images, targets)
                trans_loss_dict = [{k: loss.to('cpu')} for k, loss in trans_loss_dict.items()]
                all_trans_valid_logs.append(trans_loss_dict)


        for images, targets in cis_valid_dataloader: # can do batch of 10 prob.
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            with torch.no_grad():
                cis_loss_dict = model(images, targets)
                cis_loss_dict = [{k: loss.to('cpu')} for k, loss in cis_loss_dict.items()]
                all_cis_valid_logs.append(cis_loss_dict)
    
    filetime = time.strftime("%Y%m%d_%H%M%S")
    
    if save_logs:
        
        # save the train, cis valid and trans valid logs
        train_logs_to_json(all_train_logs, filetime)
        cis_valid_logs_to_json(all_cis_valid_logs, filetime)
        trans_valid_logs_to_json(all_trans_valid_logs, filetime)
        
    if save_model:
        
        # save the model, the optimizer and the scheduler
        torch.save(model.state_dict(), 'saved_models/' + filetime + '_model.pt')
        torch.save(optimizer.state_dict(), 'saved_models/' + filetime + '_optimizer.pt')
        torch.save(lr_scheduler.state_dict(), 'saved_models/' + filetime + '_scheduler.pt')
    
    
    
    return all_train_logs, all_trans_valid_logs, all_cis_valid_logs


# Interactive Part
#### Before training

In [17]:
# Set the lightweight configuration mode to use subset of data, simpler architecture and few epochs
# to quickly test the code for evaluation (False:0, True:1)
lightweight_mode = 0

# Set the data aumentation mode ('none', 'offline', 'online')
data_augmentation_mode = 'none'

#### You can specify the data augmentation transformation at will

In [None]:
def offline_augment_dataloader(light):
    if light:
        train_valid_img,_ = get_img_with_bbox(train_ann_path)
        train_data = CustomImageDataset(label_path=train_ann_path, img_dir=img_folder, valid_img=train_valid_img)
        train_data_colored = CustomImageDataset(label_path=train_ann_path, img_dir=img_folder, 
                                                # Transformations applied: transform=colorTranformations, rotate = True
                                                valid_img=train_valid_img,transform=colorTranformations)
        train_data_rotated = CustomImageDataset(label_path=train_ann_path, img_dir=img_folder, 
                                                valid_img=train_valid_img, rotation = True) 

        trainFinal = ConcatDataset([train_data, train_data_rotated, train_data_colored])
        return DataLoader(trainFinal, batch_size=1, shuffle=True, collate_fn=utils.collate_fn)
    else:
        train_valid_img,_ = get_img_with_bbox(train_ann_path)
        train_data = CustomImageDataset(label_path=train_ann_path, img_dir=img_folder, valid_img=train_valid_img)
        train_data_colored = CustomImageDataset(label_path=train_ann_path, img_dir=img_folder, 
                                                # Transformations applied: transform=colorTranformations, rotate = True
                                                valid_img=train_valid_img,transform=colorTranformations)
        train_data_rotated = CustomImageDataset(label_path=train_ann_path, img_dir=img_folder, 
                                                valid_img=train_valid_img, rotation = True) 

        trainFinal = ConcatDataset([train_data,train_data_rotated, train_data_colored])
        return DataLoader(trainFinal, batch_size=1, shuffle=True, collate_fn=utils.collate_fn)

In [18]:
# Initiate the dataloaders with batch size from the paper for better comparison

if lightweight_mode:
    valid = True
    if data_augmentation_mode == 'none':
        train_dataloader = create_dataloader(train_ann_path, 1, light=True)
    if data_augmentation_mode == 'online':
        train_dataloader = create_dataloader(train_ann_path, 1, light=True, transform=colorTranformations)
    if data_augmentation_mode == 'offline':
        train_dataloader = create_dataloader(train_ann_path, 1, light=True, transform=colorTranformations)
    else:
        valid = False
        print('Please enter a valid data_augmentation mode')
    
    if valid:
        cis_valid_dataloader = create_dataloader(cis_val_ann_path, 10, light=True)
        trans_valid_dataloader = create_dataloader(trans_val_ann_path, 10, light=True)
        cis_test_dataloader = create_dataloader(cis_test_ann_path, 10, light=True)
        trans_test_dataloader = create_dataloader(trans_test_ann_path, 10, light=True)
elif not lightweight_mode:
    valid = True
    if data_augmentation_mode == 'none':
        train_dataloader = create_dataloader(train_ann_path, 1)
    if data_augmentation_mode == 'online'
        train_dataloader = create_dataloader(train_ann_path, 1, transform=colorTranformations)
    if data_augmentation_mode == 'offline'
        train_dataloader = create_dataloader(train_ann_path, 1, transform=colorTranformations)
    else:
        valid = False
        print('Please enter a valid data_augmentation mode')
    if valid:
        cis_valid_dataloader = create_dataloader(cis_val_ann_path, 10)
        trans_valid_dataloader = create_dataloader(trans_val_ann_path, 10)
        cis_test_dataloader = create_dataloader(cis_test_ann_path, 10)
        trans_test_dataloader = create_dataloader(trans_test_ann_path, 10)
else:
    print("Please enter a valid lightweight option."))

In [None]:
# Loads the test dataset for coco evaluation later on
cis_coco = get_coco_api_from_dataset(cis_test_dataloader.dataset)
trans_coco = get_coco_api_from_dataset(trans_test_dataloader.dataset)

#### Specify the model to create and the parameters

In [19]:
model, optimizer, lr_scheduler = create_model(3, normalize=True)

In [20]:
# Parameters before training
num_epochs = 10

# Check if using the right device before training
torch.cuda.get_device_name(0)

'NVIDIA GeForce GTX 1080 Ti'

### This next cell starts the training of the model

In [None]:
model, optimizer, lr_scheduler = load_model(3, "20220430_071543")

In [None]:
# TRAIN
all_train_logs, all_trans_valid_logs, all_cis_valid_logs = train(dataloader=train_dataloader, num_epochs=num_epochs)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch: [0]  [    0/12099]  eta: 8:03:15  lr: 0.000001  loss: 1.3731 (1.3731)  loss_classifier: 1.1648 (1.1648)  loss_box_reg: 0.0892 (0.0892)  loss_objectness: 0.1130 (0.1130)  loss_rpn_box_reg: 0.0060 (0.0060)  time: 2.3965  data: 0.0795  max mem: 594
Epoch: [0]  [  100/12099]  eta: 0:39:23  lr: 0.000031  loss: 0.3243 (0.6969)  loss_classifier: 0.1266 (0.4951)  loss_box_reg: 0.0966 (0.0884)  loss_objectness: 0.0587 (0.1027)  loss_rpn_box_reg: 0.0035 (0.0107)  time: 0.1744  data: 0.0636  max mem: 652
Epoch: [0]  [  200/12099]  eta: 0:36:52  lr: 0.000061  loss: 0.2225 (0.4770)  loss_classifier: 0.0729 (0.2950)  loss_box_reg: 0.0499 (0.0828)  loss_objectness: 0.0491 (0.0898)  loss_rpn_box_reg: 0.0077 (0.0094)  time: 0.1764  data: 0.0647  max mem: 652
Epoch: [0]  [  300/12099]  eta: 0:35:49  lr: 0.000091  loss: 0.2261 (0.4068)  loss_classifier: 0.0690 (0.2264)  loss_box_reg: 0.0525 (0.0811)  loss_objectness: 0.0507 (0.0889)  loss_rpn_box_reg: 0.0049 (0.0103)  time: 0.1733  data: 0.0626  m

Epoch: [0]  [ 3300/12099]  eta: 0:25:27  lr: 0.000300  loss: 0.1627 (0.1933)  loss_classifier: 0.0641 (0.0799)  loss_box_reg: 0.0579 (0.0654)  loss_objectness: 0.0220 (0.0396)  loss_rpn_box_reg: 0.0038 (0.0083)  time: 0.1713  data: 0.0648  max mem: 652
Epoch: [0]  [ 3400/12099]  eta: 0:25:09  lr: 0.000300  loss: 0.1337 (0.1919)  loss_classifier: 0.0473 (0.0792)  loss_box_reg: 0.0406 (0.0651)  loss_objectness: 0.0139 (0.0393)  loss_rpn_box_reg: 0.0026 (0.0083)  time: 0.1698  data: 0.0644  max mem: 652
Epoch: [0]  [ 3500/12099]  eta: 0:24:51  lr: 0.000300  loss: 0.1436 (0.1909)  loss_classifier: 0.0595 (0.0787)  loss_box_reg: 0.0464 (0.0649)  loss_objectness: 0.0170 (0.0390)  loss_rpn_box_reg: 0.0025 (0.0083)  time: 0.1709  data: 0.0650  max mem: 652
Epoch: [0]  [ 3600/12099]  eta: 0:24:33  lr: 0.000300  loss: 0.1450 (0.1897)  loss_classifier: 0.0456 (0.0781)  loss_box_reg: 0.0439 (0.0647)  loss_objectness: 0.0153 (0.0386)  loss_rpn_box_reg: 0.0034 (0.0083)  time: 0.1685  data: 0.0623  m

Epoch: [0]  [ 6600/12099]  eta: 0:15:46  lr: 0.000300  loss: 0.1279 (0.1742)  loss_classifier: 0.0479 (0.0705)  loss_box_reg: 0.0608 (0.0640)  loss_objectness: 0.0101 (0.0321)  loss_rpn_box_reg: 0.0023 (0.0077)  time: 0.1699  data: 0.0635  max mem: 652
Epoch: [0]  [ 6700/12099]  eta: 0:15:29  lr: 0.000300  loss: 0.1407 (0.1739)  loss_classifier: 0.0455 (0.0703)  loss_box_reg: 0.0656 (0.0640)  loss_objectness: 0.0169 (0.0319)  loss_rpn_box_reg: 0.0030 (0.0076)  time: 0.1711  data: 0.0643  max mem: 652
Epoch: [0]  [ 6800/12099]  eta: 0:15:12  lr: 0.000300  loss: 0.1220 (0.1734)  loss_classifier: 0.0447 (0.0701)  loss_box_reg: 0.0619 (0.0640)  loss_objectness: 0.0138 (0.0317)  loss_rpn_box_reg: 0.0020 (0.0076)  time: 0.1707  data: 0.0641  max mem: 652
Epoch: [0]  [ 6900/12099]  eta: 0:14:54  lr: 0.000300  loss: 0.1196 (0.1732)  loss_classifier: 0.0432 (0.0700)  loss_box_reg: 0.0411 (0.0640)  loss_objectness: 0.0177 (0.0316)  loss_rpn_box_reg: 0.0016 (0.0076)  time: 0.1696  data: 0.0630  m

Epoch: [0]  [ 9900/12099]  eta: 0:06:18  lr: 0.000300  loss: 0.1327 (0.1683)  loss_classifier: 0.0479 (0.0672)  loss_box_reg: 0.0477 (0.0649)  loss_objectness: 0.0195 (0.0289)  loss_rpn_box_reg: 0.0023 (0.0073)  time: 0.1722  data: 0.0648  max mem: 652
Epoch: [0]  [10000/12099]  eta: 0:06:01  lr: 0.000300  loss: 0.1494 (0.1684)  loss_classifier: 0.0516 (0.0672)  loss_box_reg: 0.0868 (0.0650)  loss_objectness: 0.0131 (0.0289)  loss_rpn_box_reg: 0.0024 (0.0073)  time: 0.1798  data: 0.0725  max mem: 652
Epoch: [0]  [10100/12099]  eta: 0:05:44  lr: 0.000300  loss: 0.1847 (0.1685)  loss_classifier: 0.0673 (0.0672)  loss_box_reg: 0.0677 (0.0651)  loss_objectness: 0.0173 (0.0289)  loss_rpn_box_reg: 0.0031 (0.0073)  time: 0.1705  data: 0.0651  max mem: 652
Epoch: [0]  [10200/12099]  eta: 0:05:26  lr: 0.000300  loss: 0.1502 (0.1683)  loss_classifier: 0.0620 (0.0672)  loss_box_reg: 0.0699 (0.0651)  loss_objectness: 0.0125 (0.0288)  loss_rpn_box_reg: 0.0028 (0.0072)  time: 0.1876  data: 0.0746  m

Epoch: [1]  [ 1000/12099]  eta: 0:31:29  lr: 0.000300  loss: 0.1482 (0.1564)  loss_classifier: 0.0559 (0.0600)  loss_box_reg: 0.0736 (0.0688)  loss_objectness: 0.0126 (0.0214)  loss_rpn_box_reg: 0.0031 (0.0061)  time: 0.1804  data: 0.0677  max mem: 3863
Epoch: [1]  [ 1100/12099]  eta: 0:31:19  lr: 0.000300  loss: 0.1458 (0.1573)  loss_classifier: 0.0517 (0.0600)  loss_box_reg: 0.0589 (0.0689)  loss_objectness: 0.0107 (0.0221)  loss_rpn_box_reg: 0.0024 (0.0063)  time: 0.1728  data: 0.0624  max mem: 3863
Epoch: [1]  [ 1200/12099]  eta: 0:31:05  lr: 0.000300  loss: 0.1593 (0.1571)  loss_classifier: 0.0615 (0.0600)  loss_box_reg: 0.0666 (0.0691)  loss_objectness: 0.0124 (0.0218)  loss_rpn_box_reg: 0.0023 (0.0061)  time: 0.1772  data: 0.0658  max mem: 3863
Epoch: [1]  [ 1300/12099]  eta: 0:30:55  lr: 0.000300  loss: 0.1407 (0.1573)  loss_classifier: 0.0462 (0.0601)  loss_box_reg: 0.0725 (0.0691)  loss_objectness: 0.0155 (0.0218)  loss_rpn_box_reg: 0.0038 (0.0063)  time: 0.1738  data: 0.0683

Epoch: [1]  [ 4300/12099]  eta: 0:22:04  lr: 0.000300  loss: 0.1497 (0.1565)  loss_classifier: 0.0577 (0.0604)  loss_box_reg: 0.0619 (0.0690)  loss_objectness: 0.0139 (0.0208)  loss_rpn_box_reg: 0.0014 (0.0062)  time: 0.1622  data: 0.0655  max mem: 3863
Epoch: [1]  [ 4400/12099]  eta: 0:21:46  lr: 0.000300  loss: 0.1465 (0.1566)  loss_classifier: 0.0431 (0.0603)  loss_box_reg: 0.0688 (0.0691)  loss_objectness: 0.0083 (0.0209)  loss_rpn_box_reg: 0.0030 (0.0063)  time: 0.1615  data: 0.0607  max mem: 3863
Epoch: [1]  [ 4500/12099]  eta: 0:21:29  lr: 0.000300  loss: 0.1436 (0.1565)  loss_classifier: 0.0491 (0.0603)  loss_box_reg: 0.0614 (0.0691)  loss_objectness: 0.0153 (0.0208)  loss_rpn_box_reg: 0.0026 (0.0063)  time: 0.1635  data: 0.0627  max mem: 3863
Epoch: [1]  [ 4600/12099]  eta: 0:21:11  lr: 0.000300  loss: 0.1487 (0.1566)  loss_classifier: 0.0566 (0.0603)  loss_box_reg: 0.0724 (0.0692)  loss_objectness: 0.0189 (0.0208)  loss_rpn_box_reg: 0.0017 (0.0063)  time: 0.1649  data: 0.0623

Epoch: [1]  [ 7600/12099]  eta: 0:12:46  lr: 0.000300  loss: 0.1004 (0.1561)  loss_classifier: 0.0348 (0.0600)  loss_box_reg: 0.0594 (0.0694)  loss_objectness: 0.0109 (0.0206)  loss_rpn_box_reg: 0.0025 (0.0062)  time: 0.1588  data: 0.0613  max mem: 3863
Epoch: [1]  [ 7700/12099]  eta: 0:12:28  lr: 0.000300  loss: 0.1499 (0.1561)  loss_classifier: 0.0553 (0.0600)  loss_box_reg: 0.0652 (0.0694)  loss_objectness: 0.0135 (0.0206)  loss_rpn_box_reg: 0.0034 (0.0062)  time: 0.1634  data: 0.0623  max mem: 3863
Epoch: [1]  [ 7800/12099]  eta: 0:12:11  lr: 0.000300  loss: 0.1080 (0.1560)  loss_classifier: 0.0387 (0.0599)  loss_box_reg: 0.0539 (0.0693)  loss_objectness: 0.0118 (0.0206)  loss_rpn_box_reg: 0.0018 (0.0062)  time: 0.1611  data: 0.0630  max mem: 3863
Epoch: [1]  [ 7900/12099]  eta: 0:11:54  lr: 0.000300  loss: 0.1406 (0.1560)  loss_classifier: 0.0485 (0.0599)  loss_box_reg: 0.0633 (0.0693)  loss_objectness: 0.0123 (0.0205)  loss_rpn_box_reg: 0.0028 (0.0062)  time: 0.1691  data: 0.0658

Epoch: [1]  [10900/12099]  eta: 0:03:25  lr: 0.000300  loss: 0.1571 (0.1566)  loss_classifier: 0.0605 (0.0601)  loss_box_reg: 0.0666 (0.0699)  loss_objectness: 0.0129 (0.0204)  loss_rpn_box_reg: 0.0027 (0.0063)  time: 0.1595  data: 0.0628  max mem: 3863
Epoch: [1]  [11000/12099]  eta: 0:03:08  lr: 0.000300  loss: 0.1210 (0.1566)  loss_classifier: 0.0467 (0.0601)  loss_box_reg: 0.0570 (0.0699)  loss_objectness: 0.0100 (0.0204)  loss_rpn_box_reg: 0.0031 (0.0063)  time: 0.1834  data: 0.0798  max mem: 3863
Epoch: [1]  [11100/12099]  eta: 0:02:51  lr: 0.000300  loss: 0.1298 (0.1565)  loss_classifier: 0.0432 (0.0600)  loss_box_reg: 0.0663 (0.0698)  loss_objectness: 0.0090 (0.0204)  loss_rpn_box_reg: 0.0019 (0.0063)  time: 0.1591  data: 0.0630  max mem: 3863
Epoch: [1]  [11200/12099]  eta: 0:02:33  lr: 0.000300  loss: 0.1462 (0.1565)  loss_classifier: 0.0573 (0.0600)  loss_box_reg: 0.0699 (0.0699)  loss_objectness: 0.0099 (0.0204)  loss_rpn_box_reg: 0.0015 (0.0063)  time: 0.1582  data: 0.0622

Epoch: [2]  [ 2000/12099]  eta: 0:27:00  lr: 0.000300  loss: 0.1646 (0.1531)  loss_classifier: 0.0459 (0.0580)  loss_box_reg: 0.0699 (0.0697)  loss_objectness: 0.0114 (0.0194)  loss_rpn_box_reg: 0.0017 (0.0060)  time: 0.1630  data: 0.0648  max mem: 3863
Epoch: [2]  [ 2100/12099]  eta: 0:26:44  lr: 0.000300  loss: 0.1334 (0.1530)  loss_classifier: 0.0450 (0.0580)  loss_box_reg: 0.0712 (0.0696)  loss_objectness: 0.0122 (0.0194)  loss_rpn_box_reg: 0.0033 (0.0060)  time: 0.1617  data: 0.0644  max mem: 3863
Epoch: [2]  [ 2200/12099]  eta: 0:26:28  lr: 0.000300  loss: 0.1535 (0.1532)  loss_classifier: 0.0603 (0.0580)  loss_box_reg: 0.0779 (0.0697)  loss_objectness: 0.0139 (0.0194)  loss_rpn_box_reg: 0.0024 (0.0060)  time: 0.1577  data: 0.0615  max mem: 3863
Epoch: [2]  [ 2300/12099]  eta: 0:26:12  lr: 0.000300  loss: 0.1529 (0.1532)  loss_classifier: 0.0484 (0.0579)  loss_box_reg: 0.0721 (0.0696)  loss_objectness: 0.0111 (0.0197)  loss_rpn_box_reg: 0.0021 (0.0061)  time: 0.1603  data: 0.0635

Epoch: [2]  [ 5300/12099]  eta: 0:18:52  lr: 0.000300  loss: 0.1842 (0.1548)  loss_classifier: 0.0633 (0.0585)  loss_box_reg: 0.0598 (0.0708)  loss_objectness: 0.0209 (0.0194)  loss_rpn_box_reg: 0.0034 (0.0061)  time: 0.1738  data: 0.0692  max mem: 3863
Epoch: [2]  [ 5400/12099]  eta: 0:18:36  lr: 0.000300  loss: 0.1282 (0.1548)  loss_classifier: 0.0504 (0.0585)  loss_box_reg: 0.0580 (0.0708)  loss_objectness: 0.0088 (0.0194)  loss_rpn_box_reg: 0.0023 (0.0061)  time: 0.1763  data: 0.0698  max mem: 3863
Epoch: [2]  [ 5500/12099]  eta: 0:18:20  lr: 0.000300  loss: 0.1282 (0.1549)  loss_classifier: 0.0512 (0.0585)  loss_box_reg: 0.0525 (0.0709)  loss_objectness: 0.0177 (0.0194)  loss_rpn_box_reg: 0.0023 (0.0061)  time: 0.1625  data: 0.0635  max mem: 3863
Epoch: [2]  [ 5600/12099]  eta: 0:18:03  lr: 0.000300  loss: 0.1392 (0.1552)  loss_classifier: 0.0469 (0.0586)  loss_box_reg: 0.0668 (0.0710)  loss_objectness: 0.0139 (0.0195)  loss_rpn_box_reg: 0.0025 (0.0061)  time: 0.1677  data: 0.0679

Epoch: [2]  [ 8600/12099]  eta: 0:09:41  lr: 0.000300  loss: 0.1450 (0.1541)  loss_classifier: 0.0573 (0.0581)  loss_box_reg: 0.0753 (0.0707)  loss_objectness: 0.0098 (0.0193)  loss_rpn_box_reg: 0.0026 (0.0061)  time: 0.1620  data: 0.0655  max mem: 3863
Epoch: [2]  [ 8700/12099]  eta: 0:09:25  lr: 0.000300  loss: 0.1168 (0.1539)  loss_classifier: 0.0358 (0.0580)  loss_box_reg: 0.0502 (0.0706)  loss_objectness: 0.0095 (0.0193)  loss_rpn_box_reg: 0.0020 (0.0061)  time: 0.1599  data: 0.0639  max mem: 3863
Epoch: [2]  [ 8800/12099]  eta: 0:09:08  lr: 0.000300  loss: 0.1372 (0.1539)  loss_classifier: 0.0560 (0.0580)  loss_box_reg: 0.0614 (0.0706)  loss_objectness: 0.0076 (0.0193)  loss_rpn_box_reg: 0.0019 (0.0061)  time: 0.1771  data: 0.0733  max mem: 3863
Epoch: [2]  [ 8900/12099]  eta: 0:08:51  lr: 0.000300  loss: 0.1475 (0.1540)  loss_classifier: 0.0492 (0.0580)  loss_box_reg: 0.0760 (0.0706)  loss_objectness: 0.0188 (0.0193)  loss_rpn_box_reg: 0.0052 (0.0061)  time: 0.1711  data: 0.0690

Epoch: [2]  [11900/12099]  eta: 0:00:32  lr: 0.000300  loss: 0.1229 (0.1531)  loss_classifier: 0.0322 (0.0577)  loss_box_reg: 0.0327 (0.0703)  loss_objectness: 0.0133 (0.0191)  loss_rpn_box_reg: 0.0022 (0.0060)  time: 0.1641  data: 0.0659  max mem: 3863
Epoch: [2]  [12000/12099]  eta: 0:00:16  lr: 0.000300  loss: 0.1704 (0.1532)  loss_classifier: 0.0576 (0.0577)  loss_box_reg: 0.0623 (0.0703)  loss_objectness: 0.0118 (0.0191)  loss_rpn_box_reg: 0.0020 (0.0060)  time: 0.1592  data: 0.0625  max mem: 3863
Epoch: [2]  [12098/12099]  eta: 0:00:00  lr: 0.000300  loss: 0.1626 (0.1532)  loss_classifier: 0.0507 (0.0577)  loss_box_reg: 0.0749 (0.0704)  loss_objectness: 0.0145 (0.0191)  loss_rpn_box_reg: 0.0037 (0.0060)  time: 0.1601  data: 0.0636  max mem: 3863
Epoch: [2] Total time: 0:33:19 (0.1652 s / it)
Epoch: [3]  [    0/12099]  eta: 0:31:45  lr: 0.000300  loss: 0.2013 (0.2013)  loss_classifier: 0.0821 (0.0821)  loss_box_reg: 0.0968 (0.0968)  loss_objectness: 0.0162 (0.0162)  loss_rpn_box_r

Epoch: [3]  [ 3000/12099]  eta: 0:24:32  lr: 0.000300  loss: 0.1409 (0.1512)  loss_classifier: 0.0490 (0.0567)  loss_box_reg: 0.0738 (0.0704)  loss_objectness: 0.0100 (0.0180)  loss_rpn_box_reg: 0.0024 (0.0060)  time: 0.1593  data: 0.0630  max mem: 3863
Epoch: [3]  [ 3100/12099]  eta: 0:24:15  lr: 0.000300  loss: 0.1282 (0.1512)  loss_classifier: 0.0406 (0.0568)  loss_box_reg: 0.0626 (0.0704)  loss_objectness: 0.0096 (0.0180)  loss_rpn_box_reg: 0.0018 (0.0060)  time: 0.1620  data: 0.0646  max mem: 3863
Epoch: [3]  [ 3200/12099]  eta: 0:23:59  lr: 0.000300  loss: 0.1253 (0.1511)  loss_classifier: 0.0406 (0.0567)  loss_box_reg: 0.0604 (0.0704)  loss_objectness: 0.0094 (0.0180)  loss_rpn_box_reg: 0.0020 (0.0060)  time: 0.1589  data: 0.0630  max mem: 3863
Epoch: [3]  [ 3300/12099]  eta: 0:23:43  lr: 0.000300  loss: 0.1533 (0.1510)  loss_classifier: 0.0549 (0.0567)  loss_box_reg: 0.0678 (0.0704)  loss_objectness: 0.0132 (0.0180)  loss_rpn_box_reg: 0.0021 (0.0060)  time: 0.1611  data: 0.0644

Epoch: [3]  [ 6300/12099]  eta: 0:15:40  lr: 0.000300  loss: 0.1310 (0.1519)  loss_classifier: 0.0517 (0.0567)  loss_box_reg: 0.0703 (0.0709)  loss_objectness: 0.0121 (0.0183)  loss_rpn_box_reg: 0.0025 (0.0060)  time: 0.1603  data: 0.0647  max mem: 3863
Epoch: [3]  [ 6400/12099]  eta: 0:15:24  lr: 0.000300  loss: 0.1177 (0.1518)  loss_classifier: 0.0377 (0.0567)  loss_box_reg: 0.0597 (0.0709)  loss_objectness: 0.0142 (0.0183)  loss_rpn_box_reg: 0.0019 (0.0059)  time: 0.1649  data: 0.0658  max mem: 3863
Epoch: [3]  [ 6500/12099]  eta: 0:15:08  lr: 0.000300  loss: 0.1321 (0.1519)  loss_classifier: 0.0506 (0.0567)  loss_box_reg: 0.0533 (0.0709)  loss_objectness: 0.0106 (0.0183)  loss_rpn_box_reg: 0.0030 (0.0059)  time: 0.1615  data: 0.0652  max mem: 3863
Epoch: [3]  [ 6600/12099]  eta: 0:14:51  lr: 0.000300  loss: 0.1183 (0.1517)  loss_classifier: 0.0457 (0.0566)  loss_box_reg: 0.0566 (0.0708)  loss_objectness: 0.0097 (0.0183)  loss_rpn_box_reg: 0.0025 (0.0059)  time: 0.1601  data: 0.0640

Epoch: [3]  [ 9600/12099]  eta: 0:06:48  lr: 0.000300  loss: 0.1450 (0.1512)  loss_classifier: 0.0513 (0.0564)  loss_box_reg: 0.0667 (0.0707)  loss_objectness: 0.0116 (0.0183)  loss_rpn_box_reg: 0.0019 (0.0058)  time: 0.1626  data: 0.0663  max mem: 3863
Epoch: [3]  [ 9700/12099]  eta: 0:06:32  lr: 0.000300  loss: 0.0943 (0.1512)  loss_classifier: 0.0336 (0.0563)  loss_box_reg: 0.0455 (0.0707)  loss_objectness: 0.0122 (0.0184)  loss_rpn_box_reg: 0.0021 (0.0059)  time: 0.1605  data: 0.0639  max mem: 3863
Epoch: [3]  [ 9800/12099]  eta: 0:06:15  lr: 0.000300  loss: 0.1530 (0.1512)  loss_classifier: 0.0472 (0.0563)  loss_box_reg: 0.0784 (0.0706)  loss_objectness: 0.0112 (0.0184)  loss_rpn_box_reg: 0.0015 (0.0059)  time: 0.1620  data: 0.0656  max mem: 3863
Epoch: [3]  [ 9900/12099]  eta: 0:05:59  lr: 0.000300  loss: 0.1362 (0.1512)  loss_classifier: 0.0475 (0.0563)  loss_box_reg: 0.0723 (0.0707)  loss_objectness: 0.0065 (0.0184)  loss_rpn_box_reg: 0.0014 (0.0058)  time: 0.1608  data: 0.0640

Epoch: [4]  [  700/12099]  eta: 0:33:04  lr: 0.000300  loss: 0.1194 (0.1523)  loss_classifier: 0.0443 (0.0577)  loss_box_reg: 0.0482 (0.0722)  loss_objectness: 0.0079 (0.0172)  loss_rpn_box_reg: 0.0017 (0.0053)  time: 0.1630  data: 0.0645  max mem: 3863
Epoch: [4]  [  800/12099]  eta: 0:32:29  lr: 0.000300  loss: 0.1399 (0.1522)  loss_classifier: 0.0506 (0.0575)  loss_box_reg: 0.0725 (0.0717)  loss_objectness: 0.0095 (0.0177)  loss_rpn_box_reg: 0.0032 (0.0053)  time: 0.1664  data: 0.0681  max mem: 3863
Epoch: [4]  [  900/12099]  eta: 0:31:56  lr: 0.000300  loss: 0.1176 (0.1513)  loss_classifier: 0.0467 (0.0567)  loss_box_reg: 0.0611 (0.0713)  loss_objectness: 0.0081 (0.0179)  loss_rpn_box_reg: 0.0009 (0.0053)  time: 0.1613  data: 0.0634  max mem: 3863
Epoch: [4]  [ 1000/12099]  eta: 0:31:25  lr: 0.000300  loss: 0.1287 (0.1507)  loss_classifier: 0.0418 (0.0565)  loss_box_reg: 0.0688 (0.0713)  loss_objectness: 0.0088 (0.0176)  loss_rpn_box_reg: 0.0022 (0.0054)  time: 0.1598  data: 0.0637

Epoch: [4]  [ 4000/12099]  eta: 0:22:13  lr: 0.000300  loss: 0.1235 (0.1473)  loss_classifier: 0.0374 (0.0549)  loss_box_reg: 0.0541 (0.0692)  loss_objectness: 0.0083 (0.0175)  loss_rpn_box_reg: 0.0017 (0.0057)  time: 0.1656  data: 0.0665  max mem: 3863
Epoch: [4]  [ 4100/12099]  eta: 0:21:57  lr: 0.000300  loss: 0.1249 (0.1472)  loss_classifier: 0.0409 (0.0549)  loss_box_reg: 0.0652 (0.0693)  loss_objectness: 0.0101 (0.0174)  loss_rpn_box_reg: 0.0049 (0.0057)  time: 0.1646  data: 0.0657  max mem: 3863
Epoch: [4]  [ 4200/12099]  eta: 0:21:40  lr: 0.000300  loss: 0.1493 (0.1471)  loss_classifier: 0.0505 (0.0548)  loss_box_reg: 0.0694 (0.0692)  loss_objectness: 0.0104 (0.0174)  loss_rpn_box_reg: 0.0039 (0.0057)  time: 0.1633  data: 0.0665  max mem: 3863
Epoch: [4]  [ 4300/12099]  eta: 0:21:22  lr: 0.000300  loss: 0.1207 (0.1472)  loss_classifier: 0.0560 (0.0548)  loss_box_reg: 0.0589 (0.0692)  loss_objectness: 0.0095 (0.0174)  loss_rpn_box_reg: 0.0026 (0.0057)  time: 0.1591  data: 0.0635

Epoch: [4]  [ 7300/12099]  eta: 0:13:08  lr: 0.000300  loss: 0.1188 (0.1480)  loss_classifier: 0.0458 (0.0548)  loss_box_reg: 0.0590 (0.0698)  loss_objectness: 0.0102 (0.0176)  loss_rpn_box_reg: 0.0020 (0.0057)  time: 0.1618  data: 0.0638  max mem: 3863
Epoch: [4]  [ 7400/12099]  eta: 0:12:52  lr: 0.000300  loss: 0.1524 (0.1479)  loss_classifier: 0.0496 (0.0548)  loss_box_reg: 0.0725 (0.0699)  loss_objectness: 0.0089 (0.0176)  loss_rpn_box_reg: 0.0036 (0.0057)  time: 0.1604  data: 0.0641  max mem: 3863
Epoch: [4]  [ 7500/12099]  eta: 0:12:35  lr: 0.000300  loss: 0.1452 (0.1478)  loss_classifier: 0.0442 (0.0547)  loss_box_reg: 0.0780 (0.0698)  loss_objectness: 0.0120 (0.0175)  loss_rpn_box_reg: 0.0034 (0.0057)  time: 0.1570  data: 0.0621  max mem: 3863
Epoch: [4]  [ 7600/12099]  eta: 0:12:18  lr: 0.000300  loss: 0.1489 (0.1479)  loss_classifier: 0.0481 (0.0547)  loss_box_reg: 0.0671 (0.0698)  loss_objectness: 0.0125 (0.0176)  loss_rpn_box_reg: 0.0026 (0.0057)  time: 0.1588  data: 0.0627

Epoch: [4]  [10600/12099]  eta: 0:04:05  lr: 0.000300  loss: 0.1707 (0.1483)  loss_classifier: 0.0523 (0.0547)  loss_box_reg: 0.0744 (0.0703)  loss_objectness: 0.0089 (0.0176)  loss_rpn_box_reg: 0.0021 (0.0057)  time: 0.1660  data: 0.0672  max mem: 3863
Epoch: [4]  [10700/12099]  eta: 0:03:48  lr: 0.000300  loss: 0.1221 (0.1483)  loss_classifier: 0.0336 (0.0547)  loss_box_reg: 0.0659 (0.0703)  loss_objectness: 0.0115 (0.0176)  loss_rpn_box_reg: 0.0018 (0.0057)  time: 0.1664  data: 0.0674  max mem: 3863
Epoch: [4]  [10800/12099]  eta: 0:03:32  lr: 0.000300  loss: 0.1017 (0.1482)  loss_classifier: 0.0355 (0.0547)  loss_box_reg: 0.0505 (0.0703)  loss_objectness: 0.0117 (0.0176)  loss_rpn_box_reg: 0.0020 (0.0057)  time: 0.1612  data: 0.0644  max mem: 3863
Epoch: [4]  [10900/12099]  eta: 0:03:16  lr: 0.000300  loss: 0.1309 (0.1483)  loss_classifier: 0.0406 (0.0547)  loss_box_reg: 0.0645 (0.0703)  loss_objectness: 0.0123 (0.0176)  loss_rpn_box_reg: 0.0029 (0.0057)  time: 0.1589  data: 0.0628

Epoch: [5]  [ 1700/12099]  eta: 0:29:48  lr: 0.000030  loss: 0.1482 (0.1455)  loss_classifier: 0.0445 (0.0523)  loss_box_reg: 0.0820 (0.0695)  loss_objectness: 0.0111 (0.0180)  loss_rpn_box_reg: 0.0032 (0.0056)  time: 0.1785  data: 0.0740  max mem: 3863
Epoch: [5]  [ 1800/12099]  eta: 0:29:36  lr: 0.000030  loss: 0.1445 (0.1454)  loss_classifier: 0.0466 (0.0524)  loss_box_reg: 0.0661 (0.0694)  loss_objectness: 0.0125 (0.0181)  loss_rpn_box_reg: 0.0015 (0.0056)  time: 0.1806  data: 0.0736  max mem: 3863
Epoch: [5]  [ 1900/12099]  eta: 0:29:24  lr: 0.000030  loss: 0.1369 (0.1451)  loss_classifier: 0.0487 (0.0524)  loss_box_reg: 0.0699 (0.0692)  loss_objectness: 0.0096 (0.0179)  loss_rpn_box_reg: 0.0014 (0.0056)  time: 0.1915  data: 0.0825  max mem: 3863
Epoch: [5]  [ 2000/12099]  eta: 0:29:10  lr: 0.000030  loss: 0.1204 (0.1450)  loss_classifier: 0.0312 (0.0525)  loss_box_reg: 0.0568 (0.0690)  loss_objectness: 0.0090 (0.0178)  loss_rpn_box_reg: 0.0023 (0.0056)  time: 0.1765  data: 0.0697

Epoch: [5]  [ 5000/12099]  eta: 0:19:54  lr: 0.000030  loss: 0.1348 (0.1457)  loss_classifier: 0.0450 (0.0529)  loss_box_reg: 0.0785 (0.0697)  loss_objectness: 0.0073 (0.0174)  loss_rpn_box_reg: 0.0021 (0.0056)  time: 0.1818  data: 0.0756  max mem: 3863
Epoch: [5]  [ 5100/12099]  eta: 0:19:38  lr: 0.000030  loss: 0.1128 (0.1454)  loss_classifier: 0.0305 (0.0528)  loss_box_reg: 0.0613 (0.0697)  loss_objectness: 0.0125 (0.0173)  loss_rpn_box_reg: 0.0017 (0.0056)  time: 0.1754  data: 0.0731  max mem: 3863
Epoch: [5]  [ 5200/12099]  eta: 0:19:21  lr: 0.000030  loss: 0.1523 (0.1454)  loss_classifier: 0.0437 (0.0528)  loss_box_reg: 0.0610 (0.0696)  loss_objectness: 0.0121 (0.0174)  loss_rpn_box_reg: 0.0029 (0.0056)  time: 0.1704  data: 0.0668  max mem: 3863
Epoch: [5]  [ 5300/12099]  eta: 0:19:04  lr: 0.000030  loss: 0.1163 (0.1455)  loss_classifier: 0.0382 (0.0529)  loss_box_reg: 0.0575 (0.0696)  loss_objectness: 0.0063 (0.0174)  loss_rpn_box_reg: 0.0021 (0.0056)  time: 0.1599  data: 0.0636

# Checking the log results

##### Ensures that if you hit the training cell, you don't lose the variables containing the logs from the last run

In [None]:
last_train_logs = all_train_logs
last_trans_valid_logs = all_trans_valid_logs
last_cis_valid_logs = all_cis_valid_logs

##### Converts the logs to lists and the tensors to numpy 

In [None]:
train_logs = train_logs_to_lst(last_train_logs)
cis_valid_logs = valid_logs_to_lst(last_cis_valid_logs)
trans_valid_logs = valid_logs_to_lst(last_trans_valid_logs)

## Results

In [None]:
# To confirm that the data is loaded properly
n = len(train_logs)
print(n)

In [None]:
# Train loss to print (here we use global_avg but we can use: value, median, avg, max or global_avg)

results_train_loss = []

for i in range(n):
    results_train_loss.append(train_logs[i]['loss_box_reg']['global_avg'])
    
# Cis valid loss to print
results_cis_valid_loss = [] # cis

for i in range(n):
    loss_interm = 0
    for j in range(len(cis_valid_dataloader)):
        loss_interm += cis_valid_logs[(len(cis_valid_dataloader) * i) + j]['loss_rpn_box_reg']
    results_cis_valid_loss.append(loss_interm)

# Trans valid loss to print
results_trans_valid_loss = [] # trans

for i in range(n):
    loss_interm = 0
    for j in range(len(trans_valid_dataloader)):
        loss_interm += trans_valid_logs[(len(trans_valid_dataloader) * i) + j]['loss_rpn_box_reg']
    results_trans_valid_loss.append(loss_interm)

### Training and valid Plots

In [None]:
# Printing the different plots
fig, ax = plt.subplots(1,2, figsize=(20,6))

ax[0].plot(np.arange(1, n + 1), results_train_loss, label='train')
ax[0].set_title('Train loss per epoch')
ax[0].set_ylabel('loss_box_reg')
ax[0].set_xlabel('epoch')

plt.title('Train loss per epoch')
ax[1].plot(np.arange(1, n + 1), results_cis_valid_loss, label='cis')
ax[1].plot(np.arange(1, n + 1), results_trans_valid_loss, label='trans')
ax[1].set_title('Valid loss per epoch')
ax[1].set_ylabel('loss_box_reg')
ax[1].set_xlabel('epoch')
ax[1].legend()

#### Save the figure to pdf format in the figures folder

In [None]:
fig.savefig("saved_figures/" + time.strftime("%Y%m%d_%H%M%S") + "_figure.pdf")

# Evaluate on COCO detection metrics

### Test on COCO metrics from data loaders
##### 'For evaluation, we consider a detected box to be correct if its IoU ≥ 0.5 with a ground truth box.'

We need to look at the precison score with IoU=0.5, area=all and maxDets=100.
For the recall score, by default it's IoU=0.5:IoU=0.95.

In [None]:
# takes +- 25min to run on cis_test with full dataloader
cis_coco_evaluator = evaluate(cis_test_dataloader, cis_coco)

In [None]:
# takes +- 25min to run on trans_test with full dataloader
trans_coco_evaluator = evaluate(trans_test_dataloader, trans_coco)

In [None]:
print('cis test 15 epochs rpn + roi online data augmentation')
print('_'*80)
cis_coco_evaluator.summarize()

In [None]:
print('trans test 15 epochs rpn + roi online data augmentation')
print('_'*80)
trans_coco_evaluator.summarize()

## Make Predictions with a model

### Load 10 random predictions

In [None]:
# Loads 10 images and makes the model do predictions on these images
# WARNING: Takes GPU ram space
train_features, train_labels = next(iter(trans_valid_dataloader))
image = list(image.to(device) for image in train_features)

model.eval()
with torch.no_grad():
      pred = model(image)

In [None]:
# Prints 10 images with the predictions before and after NMS
for image_i in range(len(image)):
    fig, ax = plt.subplots(1,3,figsize=(24,16))

    ax[0].imshow(train_features[image_i][0].squeeze(),cmap="gray")
    rect = patches.Rectangle((train_labels[image_i]['boxes'][0][0], 
                              train_labels[image_i]['boxes'][0][1]), 
                             train_labels[image_i]['boxes'][0][2]-train_labels[image_i]['boxes'][0][0], 
                             train_labels[image_i]['boxes'][0][3]-train_labels[image_i]['boxes'][0][1], 
                             linewidth=2, edgecolor='r', facecolor='none')
    ax[0].add_patch(rect)
    ax[0].set_title('Ground truth')

    # Predictions
    ax[1].imshow(train_features[image_i][0].squeeze(),cmap="gray")
    for i in range(len(pred[image_i]['boxes'])):
        rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                                  pred[image_i]['boxes'][i][1].cpu()), 
                                 (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                                 (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                                 linewidth=2, edgecolor='r', facecolor='none')
        ax[1].add_patch(rect)
    ax[1].set_title('Pred')

    # Predictions after NMS
    iou_threshold = 0.001 # param to tune
    boxes_to_keep = torchvision.ops.nms(pred[image_i]['boxes'], pred[image_i]['scores'], iou_threshold = iou_threshold).cpu()
    ax[2].imshow(train_features[image_i][0].squeeze(),cmap="gray")
    for i in boxes_to_keep:
        rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                                  pred[image_i]['boxes'][i][1].cpu()), 
                                 (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                                 (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                                 linewidth=2, edgecolor='r', facecolor='none')
        ax[2].add_patch(rect)

    ax[2].set_title('After NMS')

In [None]:
# Print a single image chosen by index from the last batch of 10 predictions
image_i = 3 # from 0 to 9 included

fig, ax = plt.subplots(1,3,figsize=(24,16))

ax[0].imshow(train_features[image_i][0].squeeze(),cmap="gray")
for i in range(len(train_labels[image_i]['boxes'])):
    rect = patches.Rectangle((train_labels[image_i]['boxes'][i][0], 
                            train_labels[image_i]['boxes'][i][1]), 
                            train_labels[image_i]['boxes'][i][2]-train_labels[image_i]['boxes'][i][0], 
                            train_labels[image_i]['boxes'][i][3]-train_labels[image_i]['boxes'][i][1], 
                            linewidth=2, edgecolor='r', facecolor='none')
    ax[0].add_patch(rect)
ax[0].set_title('Ground truth')

# Predictions
ax[1].imshow(train_features[image_i][0].squeeze(),cmap="gray")
for i in range(len(pred[image_i]['boxes'])):
    rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                              pred[image_i]['boxes'][i][1].cpu()), 
                             (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                             (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                             linewidth=2, edgecolor='r', facecolor='none')
    ax[1].add_patch(rect)
ax[1].set_title('Pred')

# Predictions after NMS
iou_threshold = 0.01 # param to tune
boxes_to_keep = torchvision.ops.nms(pred[image_i]['boxes'], pred[image_i]['scores'], iou_threshold = iou_threshold).cpu()
ax[2].imshow(train_features[image_i][0].squeeze(),cmap="gray")
for i in boxes_to_keep:
    rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                              pred[image_i]['boxes'][i][1].cpu()), 
                             (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                             (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                             linewidth=2, edgecolor='r', facecolor='none')
    ax[2].add_patch(rect)

ax[2].set_title('After NMS')

# Method 1 (Subspace alignment based Domain adaptation)

## Some new imports

In [None]:
# More imports needed to use the method
import torchvision.ops.boxes as bops
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from torch import nn

### Some utils method

In [None]:
def center_data(X, center_row=False):
    if center_row:
        # center data per row
        scaler_row = StandardScaler()
        X_scaled_row = scaler_row.fit_transform(X.T)

        # center data per column
        scaler_col = StandardScaler()
        X_scaled = scaler_col.fit_transform(X_scaled_row.T)
        return X_scaled
    else:
        # center data
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        return X_scaled

### Using the same model that we just trained

Otherwise, you can just load a model by uncommenting the following code

In [None]:
# # UNCOMMENT THE FOLLOWING LINE TO LOAD A MODEL:
# model, optimizer, lr_scheduler = load_model(3, "10_rpn_roi_4")

In [None]:
# Choose the number of dimensions to keep in PCA
d_pca = 512

# Tell the program to save the matrix created
save_matrixes = True
save_name = 'col_10_rpn_roi_online_512'

Papers 

 1. https://arxiv.org/pdf/1507.05578.pdf

 2.  https://openaccess.thecvf.com/content_iccv_2013/papers/Fernando_Unsupervised_Visual_Domain_2013_ICCV_paper.pdf

**Construct source matrix:** 

We keep output of model.roi_heads.box_head (vector of size 1024) as feature representations of bounding boxes extracted by the RPN (region proposal network). For us to stack a box representation to the source matrix, it has to have a IoU > thres_IoU with the ground truth of the given image. 

In [None]:
thres_IoU = 0.50
count = 0

X_source = torch.tensor([])
bbox_idx = torch.arange(1000)

model.eval()

for images, targets in train_dataloader: 
    images = [image.to(device) for image in images]
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    count += 1

    if count%100 == 0:
        print(count)

    with torch.no_grad():
        outputs = []
        hook = model.rpn.register_forward_hook(
        lambda self, input, output: outputs.append(output))

        outputs1 = []
        hook1 = model.roi_heads.box_head.register_forward_hook(
        lambda self, input, output: outputs1.append(output))

        res = model(images)
        hook.remove()
        hook1.remove()

    coords = outputs[0][0][0].cpu() # [1000,4]
    feat = outputs1[0].cpu() # [1000, 1024]

    gt = targets[0]['boxes'].cpu()

    bbox_idx_to_keep = torch.tensor([])
    for i in range(gt.shape[0]):

        IoUs = bops.box_iou(gt[i].reshape(1,4), coords)
        IoUs = IoUs.reshape(1000)
        bbox_idx_to_keep = torch.cat((bbox_idx_to_keep, bbox_idx[IoUs >= thres_IoU]),dim=0)

    X_source = torch.cat((X_source,feat[torch.unique(bbox_idx_to_keep).long()]), dim=0)

#### Save 1

In [None]:
if save_matrixes:
    torch.save(X_source, 'saved_matrixes/X_source_05_' + save_name + '.pt')

In [None]:
# Center the data
X_source_scaled = center_data(X_source)

In [None]:
# Apply PCA, keep only an amount of first components which gives the Projected source matrix

pca = PCA(n_components=d_pca)
pca.fit(X_source_scaled)

X_source_proj = pca.components_
X_source_proj = torch.from_numpy(X_source_proj)

#### Save 2

In [None]:
if save_matrixes:
    torch.save(X_source_proj, 'saved_matrixes/X_source_proj_05_' + save_name + '.pt')

### Target data with batch size 1

In [None]:
# Initiate the dataloaders with batch size from the paper for better comparison
if lightweight_mode:
    trans_test_batch1_dataloader = create_dataloader(trans_test_ann_path, 1, light=True)
else:
    trans_test_batch1_dataloader = create_dataloader(trans_test_ann_path, 1)

 **Construct target matrix:** 
 
We keep output of model.roi_heads.box_head (vector of size 1024) as feature representations of bounding boxes
 extracted by the RPN (region proposal network). For us to stack a box representation to the source matrix, the predicted bbox associated with the feature has to have a confidence score > thres_conf_score (since we don't use target labels we can't use the IoU here).


In [None]:
# 30 minutes
thres_conf_score= 0.50 
count=0

X_target=torch.tensor([])

model.eval()

for images, targets in trans_test_batch1_dataloader: # trans location valid AND test ?
    images = [image.to(device) for image in images]
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    count+=1

    if count%100==0:
        print(count)

    with torch.no_grad():

        outputs = []
        hook = model.backbone.register_forward_hook(
        lambda self, input, output: outputs.append(output))
        res = model(images)
        hook.remove()

        box_features = model.roi_heads.box_roi_pool(outputs[0], [r['boxes'] for r in res], [i.shape[-2:] for i in images])
        box_features = model.roi_heads.box_head(box_features)

    X_target = torch.cat((X_target,box_features[res[0]['scores']>=thres_conf_score].cpu()), dim=0)


In [None]:
if save_matrixes:
    torch.save(X_target, 'saved_matrixes/X_target_05_' + save_name + '.pt')

In [None]:
# Center the data
X_target_scaled = center_data(X_target)

In [None]:
# Apply PCA, keep only an amount of first components which gives the Projected source matrix

pca_proj = PCA(n_components=d_pca)
pca_proj.fit(X_target_scaled)

X_target_proj = pca_proj.components_
X_target_proj = torch.from_numpy(X_target_proj)

In [None]:
plt.plot(pca_proj.explained_variance_ratio_) # we keep d dimensions
plt.grid()

In [None]:
X_target_proj.shape

In [None]:
if save_matrixes:
    torch.save(X_target_proj, 'saved_matrixes/X_target_proj_05_' + save_name + '.pt')

### Transformation matrix M

𝑀 is obtained by minimizing the following Bregman matrix divergence (following closed-form solution given in the paper)

In [None]:
M = torch.matmul(X_source_proj, X_target_proj.T) 

In [None]:
M.shape

### Project source data into target aligned source subspace

In [None]:
Xa = torch.matmul(X_source_proj.T,M)

In [None]:
Xa.shape

In [None]:
M.to(device)

In [None]:
Xa.to(device)

### Make the new model

In [None]:
class FastRCNNPredictor_custom(nn.Module):
    """
    Standard classification + bounding box regression layers
    for Fast R-CNN.

    Args:
        in_channels (int): number of input channels
        num_classes (int): number of output classes (including background)
    """

    def __init__(self, in_channels, num_classes, m_transfo):
        super(FastRCNNPredictor_custom, self).__init__()
        
        self.cls_score = nn.Sequential(nn.Linear(in_features=1024, 
                                                 out_features = in_channels, 
                                                 bias=False), 
                                       nn.Linear(in_channels, num_classes))
        
        self.bbox_pred = nn.Sequential(nn.Linear(in_features=1024, 
                                                 out_features = in_channels, 
                                                 bias=False), 
                                       nn.Linear(in_channels, num_classes * 4))
        
        self.cls_score[0].weight = nn.Parameter(m_transfo, requires_grad = False)
        self.bbox_pred[0].weight = nn.Parameter(m_transfo, requires_grad = False)

    def forward(self, x):
        if x.dim() == 4:
            assert list(x.shape[2:]) == [1, 1]
        x = x.flatten(start_dim=1)
        scores = self.cls_score(x)
        bbox_deltas = self.bbox_pred(x)

        return scores, bbox_deltas

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
model = get_model_from_pretrained_rpn(num_classes)

# move model to the right device
model.to(device)

# load fine-tuned weights from the model of the projections
model.load_state_dict(torch.load('saved_models/10_rpn_roi_4_model.pt'))

for param in model.parameters(): # to freeze all existing weights

    param.requires_grad = False

# vector are of size 100 after the transformation
model.roi_heads.box_predictor = FastRCNNPredictor_custom(M.shape[0], 2, Xa.T.float())
# model.roi_heads.box_predictor = FastRCNNPredictor_custom(in_channels=100, num_classes=2, m_transfo=Xa.T.float()) 

# move model to the right device
model.to(device)

# construct an optimizer
# We will only retrain model.roi_heads.box_predictor (2 last layers)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0003, momentum=0.9)

lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[5,10], gamma=0.1)

In [None]:
# weights to learn
for i in range(4):
    print(params[i].shape)

In [None]:
# Nb of weights in the optimizer
for i in range(len(optimizer.param_groups[0]['params'])):
    print(optimizer.param_groups[0]['params'][i].shape)

## Before training

In [None]:
# PARAMETERS TO TUNE BEFORE TRAINING
num_epochs = 15

# CHECK DEVICE BEFORE TRAINING
torch.cuda.get_device_name(0)

### This next cell starts the training of the model

In [None]:
# TRAIN
all_train_logs, all_trans_valid_logs, all_cis_valid_logs = train(dataloader=train_dataloader, num_epochs=num_epochs)

## Checking the log results

##### Ensures that if you hit the training cell, you don't lose the variables containing the logs from the last run

In [None]:
last_train_logs = all_train_logs
last_trans_valid_logs = all_trans_valid_logs
last_cis_valid_logs = all_cis_valid_logs

##### Converts the logs to lists and the tensors to numpy 

In [None]:
train_logs = train_logs_to_lst(last_train_logs)
cis_valid_logs = valid_logs_to_lst(last_cis_valid_logs)
trans_valid_logs = valid_logs_to_lst(last_trans_valid_logs)

In [None]:
last_train_logs = all_train_logs
last_train_logs = all_train_logs
last_trans_valid_logs = all_trans_valid_logs
last_cis_valid_logs = all_cis_valid_logs

In [None]:
train_logs = train_logs_to_lst(last_train_logs)
cis_valid_logs = valid_logs_to_lst(last_cis_valid_logs)
trans_valid_logs = valid_logs_to_lst(last_trans_valid_logs)

In [None]:
# Train loss to print (here we use global_avg but we can use: value, median, avg, max or global_avg)
results_train_loss = []

for i in range(num_epochs):
    results_train_loss.append(train_logs[i]['loss_box_reg']['global_avg'])
    
# Cis valid loss to print
results_cis_valid_loss = [] # cis

for i in range(num_epochs):
    loss_interm = 0
    for j in range(167):
        loss_interm += cis_valid_logs[(167 * i) + j]['loss_box_reg']
    results_cis_valid_loss.append(loss_interm)

# Trans valid loss to print
results_trans_valid_loss = [] # cis

for i in range(num_epochs):
    loss_interm = 0
    for j in range(154):
        loss_interm += trans_valid_logs[(154 * i) + j]['loss_box_reg']
    results_trans_valid_loss.append(loss_interm)

In [None]:
# Printing the different plots
fig, ax = plt.subplots(1,2, figsize=(20,6))

ax[0].plot(np.arange(1, num_epochs + 1), results_train_loss, label='train')
ax[0].set_title('Train loss per epoch')
ax[0].set_ylabel('loss_box_reg')
ax[0].set_xlabel('epoch')

plt.title('Train loss per epoch')
ax[1].plot(np.arange(1, num_epochs + 1), results_cis_valid_loss, label='cis')
ax[1].plot(np.arange(1, num_epochs + 1), results_trans_valid_loss, label='trans')
ax[1].set_title('Valid loss per epoch')
ax[1].set_ylabel('loss_box_reg')
ax[1].set_xlabel('epoch')
ax[1].legend()

In [None]:
fig.savefig("saved_figures/" + time.strftime("%Y%m%d_%H%M%S") + "_figure.pdf")

## Evaluate the new model

In [None]:
# takes +- 15min to run on cis_test
cis_coco_evaluator_method = evaluate(cis_test_dataloader, cis_coco)

In [None]:
# trans with method 3
model.roi_heads.box_predictor.cls_score[0].weight = nn.Parameter(X_target_proj.float(), requires_grad = False) 
model.roi_heads.box_predictor.bbox_pred[0].weight = nn.Parameter(X_target_proj.float(), requires_grad = False)
model.to(device)

In [None]:
# takes +- 15min to run on cis_test
trans_coco_evaluator_method = evaluate(trans_test_dataloader, trans_coco)

In [None]:
print('cis test 10 epochs rpn roi 4, method3.1 with 15 epochs & d=100')
print('_'*80)
cis_coco_evaluator_method.summarize()

In [None]:
print('trans test 10 epochs rpn+roi 4, method3.1 with 15 epochs & d=100')
print('_'*80)
trans_coco_evaluator_method.summarize()

### Load a model with Method 3.2

In [None]:
class FastRCNNPredictor_custom(nn.Module):
    """
    Standard classification + bounding box regression layers
    for Fast R-CNN.

    Args:
        in_channels (int): number of input channels
        num_classes (int): number of output classes (including background)
    """

    def __init__(self, in_channels, num_classes, m_transfo):
        super(FastRCNNPredictor_custom, self).__init__()
        self.cls_score = nn.Sequential(nn.Linear(in_features=1024, out_features = in_channels, bias=False),nn.Linear(in_channels, num_classes))
        self.bbox_pred = nn.Sequential(nn.Linear(in_features=1024, out_features = in_channels, bias=False), nn.Linear(in_channels, num_classes * 4))
        self.cls_score[0].weight = nn.Parameter(m_transfo, requires_grad = False)
        self.bbox_pred[0].weight = nn.Parameter(m_transfo, requires_grad = False)

    def forward(self, x):
        if x.dim() == 4:
            assert list(x.shape[2:]) == [1, 1]
        x = x.flatten(start_dim=1)
        scores = self.cls_score(x)
        bbox_deltas = self.bbox_pred(x)

        return scores, bbox_deltas

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
model = get_model_from_pretrained(num_classes)

# move model to the right device
model.to(device)

for param in model.parameters(): # to freeze all existing weights

    param.requires_grad = False

# vector are of size 100 after the transformation
model.roi_heads.box_predictor = FastRCNNPredictor_custom(M.shape[0], 2, Xa.T.float())
# model.roi_heads.box_predictor = FastRCNNPredictor_custom(in_channels=100, num_classes=2, m_transfo=Xa.T.float()) 

# move model to the right device
model.to(device)

# construct an optimizer
# We will only retrain model.roi_heads.box_predictor (2 last layers)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0003, momentum=0.9)

lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[5,10], gamma=0.1)

In [None]:
# load fine-tuned weights from the model of the projections
model.load_state_dict(torch.load('saved_models/50_rpn_roi_1_method3.2_512_model.pt'))
optimizer.load_state_dict(torch.load('saved_models/50_rpn_roi_1_method3.2_512_optimizer.pt'))
lr_scheduler.load_state_dict(torch.load('saved_models/50_rpn_roi_1_method3.2_512_scheduler.pt'))