# Imports

In [1]:
import time
import json
import matplotlib.pyplot as plt
import torch

import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
import os
from torchvision.io import read_image
from PIL import Image

import numpy as np
import matplotlib.patches as patches
from torchvision import transforms

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from torchmetrics.detection.map import MeanAveragePrecision

import pycocotools

In [2]:
# imports from local lib files
import utils
import transforms
import coco_eval
from engine import train_one_epoch, evaluate

In [3]:
# Imports for evaluation from local lib files
from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
from engine import _get_iou_types 

# Baseline

## File paths

In [4]:
output_path = 'output'
img_folder = 'eccv_18_all_images_sm'

cis_test_ann_path = 'eccv_18_annotation_files/cis_test_annotations.json'
cis_val_ann_path = 'eccv_18_annotation_files/cis_val_annotations.json'
train_ann_path = 'eccv_18_annotation_files/train_annotations.json'
trans_test_ann_path = 'eccv_18_annotation_files/trans_test_annotations.json'
trans_val_ann_path = 'eccv_18_annotation_files/trans_val_annotations.json'

## Basic data exploration

In [5]:
cis_test_ann = json.load(open(cis_test_ann_path))
cis_val_ann = json.load(open(cis_val_ann_path))
train_ann = json.load(open(train_ann_path))
trans_test_ann = json.load(open(trans_test_ann_path))
trans_val_ann = json.load(open(trans_val_ann_path))

In [6]:
print('cis test set length:', len(cis_test_ann['images']))
print('cis val set length:', len(cis_val_ann['images']))
print('train set length:', len(train_ann['images']))
print('trans test set length:', len(trans_test_ann['images']))
print('trans val set length:', len(trans_val_ann['images']))

cis test set length: 15827
cis val set length: 3484
train set length: 13553
trans test set length: 23275
trans val set length: 1725


In [7]:
# i = 0

# boxes = [trans_val_ann['annotations'][j]['bbox'] for j in range(len(trans_val_ann['annotations'])) 
#          if trans_val_ann['annotations'][j]['image_id']==trans_val_ann['images'][i]['id'] 
#          and 'bbox' in trans_val_ann['annotations'][j].keys()]

# img_path = os.path.join('eccv_18_all_images_sm', trans_val_ann['images'][i]['file_name']) # to change

# image = read_image(img_path)

# fig, ax = plt.subplots()
# ax.imshow(image[0].squeeze(),cmap="gray")

# scale_x = image.shape[2] / trans_val_ann['images'][i]['width'] 
# scale_y = image.shape[1] / trans_val_ann['images'][i]['height']

# boxes = torch.as_tensor(boxes)

# for i in range(boxes.shape[0]):
#     boxes[i][0] = torch.round(boxes[i][0] * scale_x)
#     boxes[i][1] = torch.round(boxes[i][1] * scale_y)
#     boxes[i][2] = torch.round(boxes[i][2] * scale_x)
#     boxes[i][3] = torch.round(boxes[i][3] * scale_y)

#     boxes[i][2] = boxes[i][0] + boxes[i][2]
#     boxes[i][3] = boxes[i][1] + boxes[i][3]

# target = {}
# target["boxes"] = boxes

# rect = patches.Rectangle((boxes[0][0], boxes[0][1]), boxes[0][2]-boxes[0][0], 
#                          boxes[0][3]-boxes[0][1], linewidth=2, edgecolor='r', facecolor='none')
# ax.add_patch(rect)

## Utils

In [8]:
# In paper :  ' ... and employ horizontal flipping for data augmentation. ( for detection)

import transforms as T   # from git hub repo

data_transform = {'train': T.RandomHorizontalFlip(0.5)}

In [9]:
# Returns a list with the idx of images with at least one bounding box (img_wbbox) and a 
# list with the number of bbox for each valid image (num_bbox)
def get_img_with_bbox(file_path):
  
    file = json.load(open(file_path))
    img_wbbox = []
    num_bbox = []

    for i in range(len(file['images'])):
        bboxes = [file['annotations'][j]['bbox'] 
                  for j in range(len(file['annotations'])) 
                  if file['annotations'][j]['image_id']==file['images'][i]['id'] 
                  and 'bbox' in file['annotations'][j].keys()]

        if len(bboxes)!=0:
            img_wbbox.append(i)

            num_bbox.append(len(bboxes))

    return img_wbbox, num_bbox

In [10]:
class CustomImageDataset(Dataset):
    def __init__(self, label_path, img_dir, valid_img, transform = None):
        self.label_file = json.load(open(label_path))
        self.img_dir = img_dir
        self.transform = transform
        self.valid_img = valid_img

    def __len__(self):
        return len(self.valid_img)

    def __getitem__(self, idx):
        
        idx = self.valid_img[idx] # consider only images with bbox annotations
        img_path = os.path.join(self.img_dir, self.label_file['images'][idx]['file_name'])
        image = read_image(img_path)

        conv = torchvision.transforms.ToTensor()
        # if image.shape[0]==1:
        # some images have only one channel, we convert them to rgb
        image = Image.open(img_path).convert("RGB")
        image = conv(image)

        boxes = [self.label_file['annotations'][j]['bbox'] 
                 for j in range(len(self.label_file['annotations'])) 
                 if self.label_file['annotations'][j]['image_id']==self.label_file['images'][idx]['id']]
        
        label = [self.label_file['annotations'][j]['category_id'] 
                 for j in range(len(self.label_file['annotations'])) 
                 if self.label_file['annotations'][j]['image_id']==self.label_file['images'][idx]['id']]

        # transform bbox coords to adjust for resizing
        scale_x = image.shape[2] / self.label_file['images'][idx]['width'] 
        scale_y = image.shape[1] / self.label_file['images'][idx]['height']

        boxes = torch.as_tensor(boxes)
        for i in range(boxes.shape[0]):
            boxes[i][0] = torch.round(boxes[i][0] * scale_x)
            boxes[i][1] = torch.round(boxes[i][1] * scale_y)
            boxes[i][2] = torch.round(boxes[i][2] * scale_x)
            boxes[i][3] = torch.round(boxes[i][3] * scale_y)

            boxes[i][2] = boxes[i][0] + boxes[i][2] # to transform to pytorch bbox format
            boxes[i][3] = boxes[i][1] + boxes[i][3]

        label = torch.as_tensor(label)
        label = torch.where(label==30,0,1)  # 0 if empty (categ id = 30), 1 if animal
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = label
        target["image_id"] = image_id
        target['area']=area
        target['iscrowd']=iscrowd

        # TO DO : resize all to same size

        if self.transform:
            # transform image AND target
            image, target = self.transform(image, target)

        return image, target

## Pre-trained models
Inspred from https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/torchvision_finetuning_instance_segmentation.ipynb#scrollTo=YjNHjVMOyYlH

### Model with only the last layer to train

In [31]:
def get_model_from_pretrained(num_classes):

    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    for param in model.parameters(): # to freeze all existing weights
        param.requires_grad = False

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

### Model with deeper layers to train

In [None]:
def get_model_from_pretrained(num_classes):

    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    for param in model.parameters(): # to freeze all existing weights
        param.requires_grad = False

    for param in model.roi_heads.parameters():
        param.requires_grad = True

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

### Model with even deeper layers to train

In [None]:
def get_model_from_pretrained(num_classes):

    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    for param in model.parameters(): # to freeze all existing weights

        param.requires_grad = False

    for param in model.roi_heads.parameters():

        param.requires_grad = True

    for param in model.rpn.parameters():

        param.requires_grad = True

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

### Params

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
model = get_model_from_pretrained(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0003, momentum=0.9)

# like in the paper, construct the scheduler
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[5,10], gamma=0.1)

## OPTIONAL - Loading/Importing a model
#### Need to initiate the model, the optimizer and de scheduler before loading

In [None]:
# NEED TO INITIATE THE MODEL, THE OPTIMIZER AND THE SCHEDULER BEFOREHAND (if )
# load the model, the optimizer and the scheduler
model.load_state_dict(torch.load('saved_models/50_rpn_roi_1_model.pt'))
optimizer.load_state_dict(torch.load('saved_models/50_rpn_roi_1_optimizer.pt'))
lr_scheduler.load_state_dict(torch.load('saved_models/50_rpn_roi_1_scheduler.pt'))

## Create the dataloaders
To load the data of the dataset efficiently for the model

In [11]:
def create_dataloader(ann_path, batch_size, shuffle=True, transform=None):
    images_with_bbox,_ = get_img_with_bbox(ann_path)
    data = CustomImageDataset(ann_path, img_folder, images_with_bbox, transform)
    return DataLoader(data, batch_size=batch_size, shuffle=shuffle, collate_fn=utils.collate_fn)

#### You can specify the data augmentation transformation at will

In [12]:
train_dataloader = create_dataloader(train_ann_path, 1)
cis_valid_dataloader = create_dataloader(cis_val_ann_path, 10)
trans_valid_dataloader = create_dataloader(trans_val_ann_path, 10)
cis_test_dataloader = create_dataloader(cis_test_ann_path, 10)
trans_test_dataloader = create_dataloader(trans_test_ann_path, 10)

## Logs utils

#### Train logs utils

In [13]:
import time

In [14]:
# Converts the smoothed values to a dictionnary of each values
def smoothed_value_to_str(smoothed_value):
    d_values = {}
    d_values['median'] = smoothed_value.median
    d_values['avg'] = smoothed_value.avg
    d_values['global_avg'] = smoothed_value.global_avg
    d_values['max'] = smoothed_value.max
    d_values['value'] = smoothed_value.value
    return d_values


# Converts the train logs from MetricLogger to list
def train_logs_to_lst(logs):
    lst = []
    for i in range(len(logs)):
        d = {}
        for key in logs[i].meters.keys():
            d[key] = smoothed_value_to_str(logs[i].meters[key])
        lst.append(d)
    return lst


# Puts the training logs into a json file with time dependent file name
def train_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    train_metric_logs = train_logs_to_lst(logs)
    filename = ftime + "_train_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(train_metric_logs, f, ensure_ascii=False, indent=4)

In [15]:
# Converts the train logs from MetricLogger to list
def train_logs_to_lst(logs):
    lst = []
    for i in range(len(logs)):
        d = {}
        for key in logs[i].meters.keys():
            d[key] = smoothed_value_to_str(logs[i].meters[key])
        lst.append(d)
    return lst

In [16]:
# Puts the training logs into a json file with time dependent file name
def train_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    train_metric_logs = train_logs_to_lst(logs)
    filename = ftime + "_train_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(train_metric_logs, f, ensure_ascii=False, indent=4)

#### Valid logs utils

In [17]:
# Merge the dicts of a list 
def merge_dict(logs):
    logs_better = []
    try:
        for i in range(len(logs)):
            logs_better.append({**logs[i][0], **logs[i][1], **logs[i][2], **logs[i][3]})
        return logs_better
    except:
        print(logs[0])
        logs_better = logs
        return logs_better

In [18]:
# Converts the valid logs from list of dictionnaries to string
# TODO: add if type == list to not do anything if its already a list
def valid_logs_to_lst(valid_logs):
    logs = merge_dict(valid_logs)
    lst = []
    for i in range(len(logs)):
        d = {}
        for key in logs[i].keys():
            d[key] = logs[i][key].cpu().numpy().tolist()
        lst.append(d)
    return lst

In [19]:
# Puts the cis validation logs into a json file with time dependent file name
def cis_valid_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    valid_metric_logs = valid_logs_to_lst(logs)
    filename = ftime + "_cis_valid_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(valid_metric_logs, f, ensure_ascii=False, indent=4)

In [20]:
# Puts the trans validation logs into a json file with time dependent file name
def trans_valid_logs_to_json(logs, ftime=time.strftime("%Y%m%d_%H%M%S")):
    valid_metric_logs = valid_logs_to_lst(logs)
    filename = ftime + "_trans_valid_logs.json"
    
    with open('saved_logs/' + filename, 'w', encoding='utf-8') as f:
        json.dump(valid_metric_logs, f, ensure_ascii=False, indent=4)

# Training the model

In [21]:
# Defining the train function
def train(dataloader, num_epochs, save_logs=True, save_model=True, print_freq=100):
    
    model.train()
    
    all_train_logs = []
    all_cis_valid_logs = []
    all_trans_valid_logs = []

    for epoch in range(num_epochs):
        
        # train for one epoch, printing every 100 images
        train_logs = train_one_epoch(model, optimizer, dataloader, device, epoch, print_freq)
        all_train_logs.append(train_logs)
        
        # update the learning rate
        lr_scheduler.step()
        
        # evaluate on the validation dataset after training one epoch
        for images, targets in trans_valid_dataloader: # can do batch of 10 prob.
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            with torch.no_grad():
                trans_loss_dict = model(images, targets)
                trans_loss_dict = [{k: loss.to('cpu')} for k, loss in trans_loss_dict.items()]
                all_trans_valid_logs.append(trans_loss_dict)


        for images, targets in cis_valid_dataloader: # can do batch of 10 prob.
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            with torch.no_grad():
                cis_loss_dict = model(images, targets)
                cis_loss_dict = [{k: loss.to('cpu')} for k, loss in cis_loss_dict.items()]
                all_cis_valid_logs.append(cis_loss_dict)
    
    filetime = time.strftime("%Y%m%d_%H%M%S")
    
    if save_logs:
        
        # save the train, cis valid and trans valid logs
        train_logs_to_json(all_train_logs, filetime)
        cis_valid_logs_to_json(all_cis_valid_logs, filetime)
        trans_valid_logs_to_json(all_trans_valid_logs, filetime)
        
    if save_model:
        
        # save the model, the optimizer and the scheduler
        torch.save(model.state_dict(), 'saved_models/' + filetime + '_model.pt')
        torch.save(optimizer.state_dict(), 'saved_models/' + filetime + '_optimizer.pt')
        torch.save(lr_scheduler.state_dict(), 'saved_models/' + filetime + '_scheduler.pt')
    
    
    
    return all_train_logs, all_trans_valid_logs, all_cis_valid_logs


### Before training

In [None]:
# PARAMETERS TO TUNE BEFORE TRAINING
num_epochs = 25

# CHECK DEVICE BEFORE TRAINING
torch.cuda.get_device_name(0)

### This next cell starts the training of the model

In [None]:
# TRAIN
all_train_logs, all_trans_valid_logs, all_cis_valid_logs = train(dataloader=train_dataloader, num_epochs=num_epochs)

### Saving the last training logs to variables
##### Ensures that if you hit the training cell, you don't lose the variables containing the logs from the last run

In [None]:
last_train_logs = all_train_logs
last_trans_valid_logs = all_trans_valid_logs
last_cis_valid_logs = all_cis_valid_logs

## OPTIONAL - Saving manually every logs from training to json files

In [None]:
# Saves the log with the same time
train_logs_to_json(last_train_logs)
trans_valid_logs_to_json(last_trans_valid_logs)
cis_valid_logs_to_json(last_cis_valid_logs)

## OPTIONAL - Saving the model

In [None]:
# save the model, the optimizer and the scheduler
# filetime = "25_epochs_roi_3_augment_method_3"
filetime = time.strftime("%Y%m%d_%H%M%S")

torch.save(model.state_dict(), 'saved_models/' + filetime + '_model.pt')
torch.save(optimizer.state_dict(), 'saved_models/' + filetime + '_optimizer.pt')
torch.save(lr_scheduler.state_dict(), 'saved_models/' + filetime + '_scheduler.pt')

## Exploration of the raw logs
##### Only look at the MetricLogger if you just trained the model. You cannot import the model and then check the MetricLogger.

In [None]:
all_train_logs[0].meters

In [None]:
all_train_logs[0].meters['loss_box_reg'].global_avg

In [None]:
len(train_dataloader)

#### Here we check the amount of logs per epoch for each categories and the type

In [None]:
print(all_cis_valid_logs[0])
print("total length:", len(all_cis_valid_logs))
print("-"*8)
print("per epoch length:", len(all_cis_valid_logs)/num_epochs)

In [None]:
print(all_trans_valid_logs[0])
print("total length:", len(all_trans_valid_logs))
print("-"*8)
print("per epoch length:", len(all_trans_valid_logs)/num_epochs)

## Looking at/Loading the logs in convenient ways
Here we define the variables "train_logs", "cis_valid_logs" and "trans_valid_logs" that will be used in the methods for the results and the visualisations.

We can import logs or use the ones from training.

### OPTIONAL - Can load some logs right here

In [None]:
# Imported logs - format: name = "NAME_OR_TIME"      Exemple file format: "NAME_OR_TIME_train_logs"

file_time_or_nickname = '10_roi' # VALUE TO CHANGE TO THE IMPORTED FILES

# Import training logs
with open('saved_logs/' + file_time_or_nickname + '_train_logs.json', "r") as f:
    train_logs = json.load(f)

# Import cis valid logs
with open('saved_logs/' + file_time_or_nickname + '_cis_valid_logs.json', "r") as f:
    cis_valid_logs = json.load(f)

# Import trans valid logs
with open('saved_logs/' + file_time_or_nickname + '_trans_valid_logs.json', "r") as f:
    trans_valid_logs = json.load(f)

### Put the last trained logs into convenient list variables
#### (USE THIS CELL ONLY IF MODEL HAVE BEEN TRAINED IN THIS KERNEL)
##### Converts the logs to lists and the tensors to numpy 

In [None]:
train_logs = train_logs_to_lst(last_train_logs)
cis_valid_logs = valid_logs_to_lst(last_cis_valid_logs)
trans_valid_logs = valid_logs_to_lst(last_trans_valid_logs)

## Results

In [None]:
# To confirm that the data is loaded properly
len(train_logs)

In [None]:
num_epochs = len(train_logs)

In [None]:
# Train loss to print (here we use global_avg but we can use: value, median, avg, max or global_avg)
results_train_loss = []

for i in range(num_epochs):
    results_train_loss.append(train_logs[i]['loss_box_reg']['global_avg'])

In [None]:
# Cis valid loss to print
results_cis_valid_loss = [] # cis

for i in range(num_epochs):
    loss_interm = 0
    for j in range(167):
        loss_interm += cis_valid_logs[(167 * i) + j]['loss_box_reg']
    results_cis_valid_loss.append(loss_interm)

In [None]:
# Trans valid loss to print
results_trans_valid_loss = [] # cis

for i in range(num_epochs):
    loss_interm = 0
    for j in range(154):
        loss_interm += trans_valid_logs[(154 * i) + j]['loss_box_reg']
    results_trans_valid_loss.append(loss_interm)

### Plots

In [None]:
# Printing the different plots
fig, ax = plt.subplots(1,2, figsize=(20,6))

ax[0].plot(np.arange(51, 50 + num_epochs + 1), results_train_loss, label='train')
ax[0].set_title('Train loss per epoch')
ax[0].set_ylabel('loss_box_reg')
ax[0].set_xlabel('epoch')

plt.title('Train loss per epoch')
ax[1].plot(np.arange(51, 50 + num_epochs + 1), results_cis_valid_loss, label='cis')
ax[1].plot(np.arange(51, 50 + num_epochs + 1), results_trans_valid_loss, label='trans')
ax[1].set_title('Valid loss per epoch')
ax[1].set_ylabel('loss_box_reg')
ax[1].set_xlabel('epoch')
ax[1].legend()


#### Save the figure to pdf format in the figures folder

In [None]:
fig.savefig("figures/" + time.strftime("%Y%m%d_%H%M%S") + "_figure.pdf")

## Make Predictions with a model

### Load 10 random predictions

In [None]:
# Loads 10 images and makes the model do predictions on these images
train_features, train_labels = next(iter(trans_valid_dataloader))
image = list(image.to(device) for image in train_features)

model.eval()
with torch.no_grad():
      pred = model(image)

In [None]:
# Prints 10 images with the predictions before and after NMS
# TODO: faire des méthodes pour simplifier le code
for image_i in range(len(image)):
    fig, ax = plt.subplots(1,3,figsize=(24,16))

    ax[0].imshow(train_features[image_i][0].squeeze(),cmap="gray")
    rect = patches.Rectangle((train_labels[image_i]['boxes'][0][0], 
                              train_labels[image_i]['boxes'][0][1]), 
                             train_labels[image_i]['boxes'][0][2]-train_labels[image_i]['boxes'][0][0], 
                             train_labels[image_i]['boxes'][0][3]-train_labels[image_i]['boxes'][0][1], 
                             linewidth=2, edgecolor='r', facecolor='none')
    ax[0].add_patch(rect)
    ax[0].set_title('Ground truth')

    # Predictions
    ax[1].imshow(train_features[image_i][0].squeeze(),cmap="gray")
    for i in range(len(pred[image_i]['boxes'])):
        rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                                  pred[image_i]['boxes'][i][1].cpu()), 
                                 (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                                 (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                                 linewidth=2, edgecolor='r', facecolor='none')
        ax[1].add_patch(rect)
    ax[1].set_title('Pred')

    # Predictions after NMS
    iou_threshold = 0.01 # param to tune
    boxes_to_keep = torchvision.ops.nms(pred[image_i]['boxes'], pred[image_i]['scores'], iou_threshold = iou_threshold).cpu()
    ax[2].imshow(train_features[image_i][0].squeeze(),cmap="gray")
    for i in boxes_to_keep:
        rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                                  pred[image_i]['boxes'][i][1].cpu()), 
                                 (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                                 (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                                 linewidth=2, edgecolor='r', facecolor='none')
        ax[2].add_patch(rect)

    ax[2].set_title('After NMS')

In [None]:
trans_val_ann['images'][train_labels[3]['image_id']]

In [None]:
# Print a single image chosen by index from the last batch of 10 predictions
image_i = 3 # from 0 to 9 included

fig, ax = plt.subplots(1,3,figsize=(24,16))

ax[0].imshow(train_features[image_i][0].squeeze(),cmap="gray")
for i in range(len(train_labels[image_i]['boxes'])):
    rect = patches.Rectangle((train_labels[image_i]['boxes'][i][0], 
                            train_labels[image_i]['boxes'][i][1]), 
                            train_labels[image_i]['boxes'][i][2]-train_labels[image_i]['boxes'][i][0], 
                            train_labels[image_i]['boxes'][i][3]-train_labels[image_i]['boxes'][i][1], 
                            linewidth=2, edgecolor='r', facecolor='none')
    ax[0].add_patch(rect)
ax[0].set_title('Ground truth')

# Predictions
ax[1].imshow(train_features[image_i][0].squeeze(),cmap="gray")
for i in range(len(pred[image_i]['boxes'])):
    rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                              pred[image_i]['boxes'][i][1].cpu()), 
                             (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                             (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                             linewidth=2, edgecolor='r', facecolor='none')
    ax[1].add_patch(rect)
ax[1].set_title('Pred')

# Predictions after NMS
iou_threshold = 0.01 # param to tune
boxes_to_keep = torchvision.ops.nms(pred[image_i]['boxes'], pred[image_i]['scores'], iou_threshold = iou_threshold).cpu()
ax[2].imshow(train_features[image_i][0].squeeze(),cmap="gray")
for i in boxes_to_keep:
    rect = patches.Rectangle((pred[image_i]['boxes'][i][0].cpu(), 
                              pred[image_i]['boxes'][i][1].cpu()), 
                             (pred[image_i]['boxes'][i][2]-pred[image_i]['boxes'][i][0]).cpu(), 
                             (pred[image_i]['boxes'][i][3]-pred[image_i]['boxes'][i][1]).cpu(), 
                             linewidth=2, edgecolor='r', facecolor='none')
    ax[2].add_patch(rect)

ax[2].set_title('After NMS')

In [None]:
boxes_to_keep

In [None]:
pred[image_i]['boxes'][0]

In [None]:
train_labels[image_i]['boxes']

# Evalutate on COCO detection metrics

### Cis Test on COCO metrics
##### 'For evaluation, we consider a detected box to be correct if its IoU ≥ 0.5 with a ground truth box.'

We need to look at the precison score with IoU=0.5, area=all and maxDets=100.
For the recall score, by default it's IoU=0.5:IoU=0.95.

In [None]:
# # TODO: méthode pour évaluer 
# def evaluate(dataloader):
#     apply_nms = True
#     iou_threshold = 0.35 # param to potentially tune
#     the_data_loader = dataloader # change to test set

#     coco = get_coco_api_from_dataset(the_data_loader.dataset)
#     iou_types = _get_iou_types(model)
#     coco_evaluator = CocoEvaluator(coco, iou_types)

#     model.eval()

#     for images, targets in the_data_loader:
#         images = [image.to(device) for image in images]
#         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

#         with torch.no_grad():

#             pred=model(images)

#             if apply_nms:
#                 boxes_to_keep = torchvision.ops.nms(pred[0]['boxes'], pred[0]['scores'], iou_threshold=iou_threshold).cpu()
#                 pred[0]['boxes'] = pred[0]['boxes'][boxes_to_keep]
#                 pred[0]['labels'] = pred[0]['labels'][boxes_to_keep]
#                 pred[0]['scores'] = pred[0]['scores'][boxes_to_keep]

#             outputs = [{k: v.cpu() for k, v in t.items()} for t in pred]
#             res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
#             coco_evaluator.update(res)

#     coco_evaluator.synchronize_between_processes()
#     coco_evaluator.accumulate()
#     print('_'*20)
#     print('Cis Test Data - Summary')
#     print(" ")
#     coco_evaluator.summarize()


In [None]:
# evaluate(cis_test_dataloader)

In [None]:
# evaluate(trans_test_dataloader)

In [None]:
# Evaluate perfo on COCO detection metrics

# takes +- 25min to run on cis_test

from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
from engine import _get_iou_types 

apply_nms = True
iou_threshold = 0.35 # param to potentially tune
the_data_loader = cis_test_dataloader # change to test set

coco = get_coco_api_from_dataset(the_data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)

model.eval()

for images, targets in the_data_loader:
    images = [image.to(device) for image in images]
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    with torch.no_grad():

        pred=model(images)

        if apply_nms:
            boxes_to_keep = torchvision.ops.nms(pred[0]['boxes'], pred[0]['scores'], iou_threshold=iou_threshold).cpu()
            pred[0]['boxes'] = pred[0]['boxes'][boxes_to_keep]
            pred[0]['labels'] = pred[0]['labels'][boxes_to_keep]
            pred[0]['scores'] = pred[0]['scores'][boxes_to_keep]

        outputs = [{k: v.cpu() for k, v in t.items()} for t in pred]
        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        coco_evaluator.update(res)

coco_evaluator.synchronize_between_processes()
coco_evaluator.accumulate()
print('_'*20)
print('Cis Test Data - Summary')
print(" ")
coco_evaluator.summarize()

### Trans Test on COCO metrics

In [None]:
# for param in model.roi_heads.box_predictor.cls_score[0].parameters():
#     print(param)

In [None]:
# model.roi_heads.box_predictor.cls_score[0].weight = nn.Parameter(X_target_proj.float(), requires_grad = False) 
# model.roi_heads.box_predictor.bbox_pred[0].weight = nn.Parameter(X_target_proj.float(), requires_grad = False)
# model.to(device)

In [None]:
# Evaluate perfo on COCO detection metrics

# takes +- 25min to run on trans_test

from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
from engine import _get_iou_types 

apply_nms = True
iou_threshold = 0.35 # param to potentially tune
the_data_loader = trans_test_dataloader # change to test set

coco = get_coco_api_from_dataset(the_data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)

model.eval()

for images, targets in the_data_loader:
    images = [image.to(device) for image in images]
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    with torch.no_grad():

        pred=model(images)

        if apply_nms:
            boxes_to_keep = torchvision.ops.nms(pred[0]['boxes'], pred[0]['scores'], iou_threshold=iou_threshold).cpu()
            pred[0]['boxes'] = pred[0]['boxes'][boxes_to_keep]
            pred[0]['labels'] = pred[0]['labels'][boxes_to_keep]
            pred[0]['scores'] = pred[0]['scores'][boxes_to_keep]

        outputs = [{k: v.cpu() for k, v in t.items()} for t in pred]
        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        coco_evaluator.update(res)

coco_evaluator.synchronize_between_processes()
coco_evaluator.accumulate()
print('_'*20)
print('Trans Test Data - Summary')
print(" ")
coco_evaluator.summarize()

# Methods

## Method 3 (Subspace alignment based Domain adaptation)

In [None]:
import torchvision.ops.boxes as bops
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from torch import nn

Papers 

 1. https://arxiv.org/pdf/1507.05578.pdf

 2.  https://openaccess.thecvf.com/content_iccv_2013/papers/Fernando_Unsupervised_Visual_Domain_2013_ICCV_paper.pdf

**Construct source matrix:** 

We keep output of model.roi_heads.box_head (vector of size 1024) as feature representations of bounding boxes extracted by the RPN (region proposal network). For us to stack a box representation to the source matrix, it has to have a IoU > thres_IoU with the ground truth of the given image. 

In [None]:
# 20 minutes
thres_IoU = 0.50
count = 0

X_source = torch.tensor([])
bbox_idx = torch.arange(1000)

model.eval()

for images, targets in train_dataloader: 
    images = [image.to(device) for image in images]
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    count += 1

    if count%100 == 0:
        print(count)

    with torch.no_grad():
        outputs = []
        hook = model.rpn.register_forward_hook(
        lambda self, input, output: outputs.append(output))

        outputs1 = []
        hook1 = model.roi_heads.box_head.register_forward_hook(
        lambda self, input, output: outputs1.append(output))

        res = model(images)
        hook.remove()
        hook1.remove()

    coords = outputs[0][0][0].cpu() # [1000,4]
    feat = outputs1[0].cpu() # [1000, 1024]

    gt = targets[0]['boxes'].cpu()

    bbox_idx_to_keep = torch.tensor([])
    for i in range(gt.shape[0]):

        IoUs = bops.box_iou(gt[i].reshape(1,4), coords)
        IoUs = IoUs.reshape(1000)
        bbox_idx_to_keep = torch.cat((bbox_idx_to_keep, bbox_idx[IoUs >= thres_IoU]),dim=0)

    X_source = torch.cat((X_source,feat[torch.unique(bbox_idx_to_keep).long()]), dim=0)

In [None]:
X_source.shape

### Save 1

In [None]:
torch.save(X_source, 'saved_data/X_source_05_roi_3_augment.pt')

In [None]:
# center data
scaler = StandardScaler()
X_source_scaled = scaler.fit_transform(X_source)

In [None]:
# Apply PCA, keep only the first 100 components which gives the Projected source matrix

pca = PCA(n_components=100)
pca.fit(X_source_scaled)

X_source_proj = pca.components_
X_source_proj = torch.from_numpy(X_source_proj)


In [None]:
X_source_proj.shape

In [None]:
plt.plot(pca.explained_variance_ratio_) 
plt.grid()

### Save 2

In [None]:
torch.save(X_source_proj, 'saved_data/X_source_proj_05_roi_3_augment.pt')

### Target data with batch size 1

In [None]:
# Target data/distribution = trans test set - Batch Size 1
trans_test_batch1_img,_ = get_img_with_bbox(trans_test_ann_path)
trans_test_batch1_data = CustomImageDataset(trans_test_ann_path, img_folder, trans_test_batch1_img)
trans_test_batch1_dataloader = DataLoader(trans_test_batch1_data, batch_size=1, shuffle=True, collate_fn=utils.collate_fn)

 **Construct target matrix:** 
 
We keep output of model.roi_heads.box_head (vector of size 1024) as feature representations of bounding boxes
 extracted by the RPN (region proposal network). For us to stack a box representation to the source matrix, the predicted bbox associated with the feature has to have a confidence score > thres_conf_score (since we don't use target labels we can't use the IoU here).


In [None]:
# 30 minutes
thres_conf_score= 0.50 
count=0

X_target=torch.tensor([])

model.eval()

for images, targets in trans_test_batch1_dataloader: # trans location valid AND test ?
    images = [image.to(device) for image in images]
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    count+=1

    if count%100==0:
        print(count)

    with torch.no_grad():

        outputs = []
        hook = model.backbone.register_forward_hook(
        lambda self, input, output: outputs.append(output))
        res = model(images)
        hook.remove()

        box_features = model.roi_heads.box_roi_pool(outputs[0], [r['boxes'] for r in res], [i.shape[-2:] for i in images])
        box_features = model.roi_heads.box_head(box_features)

    X_target = torch.cat((X_target,box_features[res[0]['scores']>=thres_conf_score].cpu()), dim=0)


In [None]:
X_target.shape

In [None]:
torch.save(X_target, 'saved_data/X_target_05_roi_3_augment.pt')

In [None]:
# center data
scaler = StandardScaler()
X_target_scaled = scaler.fit_transform(X_target)

In [None]:
# Apply PCA, keep only the first 100 components which gives the Projected source matrix

pca_proj = PCA(n_components=100)
pca_proj.fit(X_target_scaled)

X_target_proj = pca_proj.components_
X_target_proj = torch.from_numpy(X_target_proj)

In [None]:
plt.plot(pca_proj.explained_variance_ratio_) # we keep 100 dimensions
plt.grid()

In [None]:
X_target_proj.shape

In [None]:
torch.save(X_target_proj, 'saved_data/X_target_proj_05_roi_3_augment.pt')

### Transformation matrix M

𝑀 is obtained by minimizing the following Bregman matrix divergence (following closed-form solution given in the paper)

In [None]:
M = torch.matmul(X_source_proj, X_target_proj.T) 

In [None]:
M.shape

### Project source data into target aligned source subspace

In [None]:
Xa = torch.matmul(X_source_proj.T,M)

In [None]:
Xa.shape

In [None]:
# To project a given feature

# feat(1,1024) x Xa (1024,100)

### Projet target data in target subspace

In [None]:
# To project a given feature

# feat(1,1024) x X_target_proj.T (1024,100)

### Train adapted model

In [22]:
import torchvision.ops.boxes as bops
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from torch import nn

In [23]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [24]:
# Load because it takes time to generate the following matrices so they are saved
X_source_proj = torch.load('saved_data/X_source_proj_05_baseline.pt')
X_target_proj = torch.load('saved_data/X_target_proj_05_baseline.pt')

In [25]:
X_source_proj.device

device(type='cpu')

In [26]:
M = torch.matmul(X_source_proj, X_target_proj.T) # transformation matrix
print(M.shape)

Xa = torch.matmul(X_source_proj.T,M) # target aligned source subspace
print(Xa.shape)

torch.Size([100, 100])
torch.Size([1024, 100])


In [27]:
M.to(device)

tensor([[-0.8191,  0.1589,  0.3587,  ..., -0.0177, -0.0084,  0.0016],
        [ 0.2805,  0.3420,  0.7292,  ..., -0.0213, -0.0011, -0.0182],
        [ 0.1912,  0.2901,  0.0763,  ..., -0.0103, -0.0129, -0.0254],
        ...,
        [-0.0141,  0.0063, -0.0036,  ...,  0.1932,  0.1920,  0.0402],
        [-0.0139,  0.0112,  0.0021,  ...,  0.1608, -0.0424, -0.0748],
        [ 0.0040,  0.0064,  0.0025,  ...,  0.0759, -0.0684,  0.0766]],
       device='cuda:0', dtype=torch.float64)

In [28]:
Xa.to(device)

tensor([[ 0.0262, -0.0093,  0.0367,  ...,  0.0366, -0.0087, -0.0144],
        [-0.0297,  0.0100,  0.0555,  ...,  0.0271,  0.0269, -0.0035],
        [ 0.0213, -0.0397, -0.0225,  ...,  0.0159,  0.0171,  0.0009],
        ...,
        [-0.0041,  0.0041,  0.0438,  ..., -0.0110, -0.0143,  0.0113],
        [-0.0591, -0.0518, -0.0071,  ..., -0.0045,  0.0154,  0.0073],
        [ 0.0120, -0.0118,  0.0672,  ..., -0.0021, -0.0170,  0.0016]],
       device='cuda:0', dtype=torch.float64)

In [None]:
# class FastRCNNPredictor_custom(nn.Module):
#     """
#     Standard classification + bounding box regression layers
#     for Fast R-CNN.

#     Args:
#         in_channels (int): number of input channels
#         num_classes (int): number of output classes (including background)
#     """

#     def __init__(self, in_channels, num_classes, m_transfo):
#         super(FastRCNNPredictor_custom, self).__init__()
#         self.cls_score = nn.Sequential(nn.Linear(in_features = 1024, out_features = 100, bias=False), nn.Linear(in_channels, num_classes))
#         self.bbox_pred = nn.Sequential(nn.Linear(in_features = 1024, out_features = 100, bias=False), nn.Linear(in_channels, num_classes * 4))
#         self.cls_score[0].weight= nn.Parameter(m_transfo, requires_grad = False)
#         self.bbox_pred[0].weight= nn.Parameter(m_transfo, requires_grad = False)

#     def forward(self, x):
#         if x.dim() == 4:
#             assert list(x.shape[2:]) == [1, 1]
#         x = x.flatten(start_dim=1)
#         scores = self.cls_score(x)
#         bbox_deltas = self.bbox_pred(x)

#         return scores, bbox_deltas


In [34]:
class FastRCNNPredictor_custom(nn.Module):
    """
    Standard classification + bounding box regression layers
    for Fast R-CNN.

    Args:
        in_channels (int): number of input channels
        num_classes (int): number of output classes (including background)
    """

    def __init__(self, in_channels, num_classes, m_transfo):
        super(FastRCNNPredictor_custom, self).__init__()
        self.cls_score = nn.Sequential(nn.Linear(in_features=1024,out_features= in_channels, bias=False),nn.Linear(in_channels, num_classes))
        self.bbox_pred = nn.Sequential(nn.Linear(in_features=1024,out_features = in_channels, bias=False), nn.Linear(in_channels, num_classes * 4))
        self.cls_score[0].weight= nn.Parameter(m_transfo, requires_grad = False)
        self.bbox_pred[0].weight= nn.Parameter(m_transfo, requires_grad = False)

    def forward(self, x):
        if x.dim() == 4:
            assert list(x.shape[2:]) == [1, 1]
        x = x.flatten(start_dim=1)
        scores = self.cls_score(x)
        bbox_deltas = self.bbox_pred(x)

        return scores, bbox_deltas

In [35]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
model = get_model_from_pretrained(num_classes)

# move model to the right device
model.to(device)

# load fine-tuned weights
model.load_state_dict(torch.load('saved_models/25_base_model.pt'))


for param in model.parameters(): # to freeze all existing weights

    param.requires_grad = False

# vector are of size 100 after the transformation
model.roi_heads.box_predictor = FastRCNNPredictor_custom(M.shape[0], 2, Xa.T.float())
# model.roi_heads.box_predictor = FastRCNNPredictor_custom(in_channels=100, num_classes=2, m_transfo=Xa.T.float()) 

# move model to the right device
model.to(device)

# construct an optimizer
# We will only retrain model.roi_heads.box_predictor (2 last layers)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0003, momentum=0.9)

lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[5,10], gamma=0.1)

In [36]:
# weights to learn
for i in range(4):
    print(params[i].shape)

torch.Size([2, 100])
torch.Size([2])
torch.Size([8, 100])
torch.Size([8])


In [37]:
# Nb of weights in the optimizer
for i in range(len(optimizer.param_groups[0]['params'])):
    print(optimizer.param_groups[0]['params'][i].shape)

torch.Size([2, 100])
torch.Size([2])
torch.Size([8, 100])
torch.Size([8])


### Before training

In [38]:
# PARAMETERS TO TUNE BEFORE TRAINING
num_epochs = 25

# CHECK DEVICE BEFORE TRAINING
torch.cuda.get_device_name(0)

'NVIDIA GeForce GTX 1080 Ti'

### This next cell starts the training of the model

In [None]:
# TRAIN
all_train_logs, all_trans_valid_logs, all_cis_valid_logs = train(dataloader=train_dataloader, num_epochs=num_epochs)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch: [0]  [    0/12099]  eta: 3:40:48  lr: 0.000001  loss: 1.5123 (1.5123)  loss_classifier: 1.3656 (1.3656)  loss_box_reg: 0.0726 (0.0726)  loss_objectness: 0.0142 (0.0142)  loss_rpn_box_reg: 0.0599 (0.0599)  time: 1.0950  data: 0.0375  max mem: 520
Epoch: [0]  [  100/12099]  eta: 0:24:35  lr: 0.000031  loss: 0.4350 (0.7108)  loss_classifier: 0.2900 (0.5295)  loss_box_reg: 0.1152 (0.1385)  loss_objectness: 0.0188 (0.0343)  loss_rpn_box_reg: 0.0020 (0.0085)  time: 0.1111  data: 0.0298  max mem: 524
Epoch: [0]  [  200/12099]  eta: 0:23:40  lr: 0.000061  loss: 0.2547 (0.5305)  loss_classifier: 0.1008 (0.3466)  loss_box_reg: 0.1181 (0.1431)  loss_objectness: 0.0214 (0.0331)  loss_rpn_box_reg: 0.0031 (0.0077)  time: 0.1177  data: 0.0332  max mem: 524
Epoch: [0]  [  300/12099]  eta: 0:23:19  lr: 0.000091  loss: 0.3112 (0.4511)  loss_classifier: 0.0993 (0.2651)  loss_box_reg: 0.1632 (0.1452)  loss_objectness: 0.0043 (0.0339)  loss_rpn_box_reg: 0.0026 (0.0070)  time: 0.1179  data: 0.0329  m

Epoch: [0]  [ 3300/12099]  eta: 0:17:20  lr: 0.000300  loss: 0.1406 (0.2187)  loss_classifier: 0.0408 (0.0794)  loss_box_reg: 0.0731 (0.0987)  loss_objectness: 0.0062 (0.0328)  loss_rpn_box_reg: 0.0024 (0.0078)  time: 0.1116  data: 0.0288  max mem: 524
Epoch: [0]  [ 3400/12099]  eta: 0:17:06  lr: 0.000300  loss: 0.1357 (0.2167)  loss_classifier: 0.0390 (0.0785)  loss_box_reg: 0.0593 (0.0978)  loss_objectness: 0.0067 (0.0326)  loss_rpn_box_reg: 0.0023 (0.0078)  time: 0.1129  data: 0.0303  max mem: 524
Epoch: [0]  [ 3500/12099]  eta: 0:16:53  lr: 0.000300  loss: 0.1503 (0.2156)  loss_classifier: 0.0451 (0.0779)  loss_box_reg: 0.0602 (0.0969)  loss_objectness: 0.0071 (0.0329)  loss_rpn_box_reg: 0.0061 (0.0079)  time: 0.1134  data: 0.0306  max mem: 524
Epoch: [0]  [ 3600/12099]  eta: 0:16:42  lr: 0.000300  loss: 0.1161 (0.2136)  loss_classifier: 0.0415 (0.0771)  loss_box_reg: 0.0629 (0.0961)  loss_objectness: 0.0033 (0.0326)  loss_rpn_box_reg: 0.0015 (0.0079)  time: 0.1202  data: 0.0327  m

Epoch: [0]  [ 6600/12099]  eta: 0:10:39  lr: 0.000300  loss: 0.1257 (0.1882)  loss_classifier: 0.0422 (0.0658)  loss_box_reg: 0.0523 (0.0811)  loss_objectness: 0.0097 (0.0333)  loss_rpn_box_reg: 0.0022 (0.0080)  time: 0.1112  data: 0.0290  max mem: 524
Epoch: [0]  [ 6700/12099]  eta: 0:10:27  lr: 0.000300  loss: 0.1422 (0.1880)  loss_classifier: 0.0427 (0.0657)  loss_box_reg: 0.0639 (0.0808)  loss_objectness: 0.0095 (0.0335)  loss_rpn_box_reg: 0.0038 (0.0080)  time: 0.1111  data: 0.0295  max mem: 524
Epoch: [0]  [ 6800/12099]  eta: 0:10:15  lr: 0.000300  loss: 0.1105 (0.1872)  loss_classifier: 0.0434 (0.0654)  loss_box_reg: 0.0532 (0.0804)  loss_objectness: 0.0081 (0.0334)  loss_rpn_box_reg: 0.0016 (0.0081)  time: 0.1102  data: 0.0283  max mem: 524
Epoch: [0]  [ 6900/12099]  eta: 0:10:03  lr: 0.000300  loss: 0.1567 (0.1870)  loss_classifier: 0.0488 (0.0652)  loss_box_reg: 0.0475 (0.0801)  loss_objectness: 0.0479 (0.0336)  loss_rpn_box_reg: 0.0028 (0.0081)  time: 0.1101  data: 0.0279  m

Epoch: [0]  [ 9900/12099]  eta: 0:04:11  lr: 0.000300  loss: 0.1269 (0.1763)  loss_classifier: 0.0321 (0.0608)  loss_box_reg: 0.0561 (0.0741)  loss_objectness: 0.0132 (0.0335)  loss_rpn_box_reg: 0.0042 (0.0079)  time: 0.1124  data: 0.0298  max mem: 524
Epoch: [0]  [10000/12099]  eta: 0:04:00  lr: 0.000300  loss: 0.1500 (0.1761)  loss_classifier: 0.0382 (0.0607)  loss_box_reg: 0.0477 (0.0739)  loss_objectness: 0.0353 (0.0335)  loss_rpn_box_reg: 0.0038 (0.0079)  time: 0.1104  data: 0.0282  max mem: 524
Epoch: [0]  [10100/12099]  eta: 0:03:48  lr: 0.000300  loss: 0.1148 (0.1758)  loss_classifier: 0.0434 (0.0606)  loss_box_reg: 0.0351 (0.0738)  loss_objectness: 0.0057 (0.0335)  loss_rpn_box_reg: 0.0025 (0.0079)  time: 0.1113  data: 0.0282  max mem: 524
Epoch: [0]  [10200/12099]  eta: 0:03:37  lr: 0.000300  loss: 0.1236 (0.1755)  loss_classifier: 0.0437 (0.0604)  loss_box_reg: 0.0487 (0.0736)  loss_objectness: 0.0166 (0.0335)  loss_rpn_box_reg: 0.0022 (0.0079)  time: 0.1109  data: 0.0285  m

Epoch: [1]  [ 1000/12099]  eta: 0:20:40  lr: 0.000300  loss: 0.1211 (0.1493)  loss_classifier: 0.0396 (0.0501)  loss_box_reg: 0.0410 (0.0573)  loss_objectness: 0.0047 (0.0328)  loss_rpn_box_reg: 0.0020 (0.0091)  time: 0.1114  data: 0.0292  max mem: 3754
Epoch: [1]  [ 1100/12099]  eta: 0:20:28  lr: 0.000300  loss: 0.1360 (0.1491)  loss_classifier: 0.0341 (0.0499)  loss_box_reg: 0.0532 (0.0574)  loss_objectness: 0.0106 (0.0329)  loss_rpn_box_reg: 0.0017 (0.0089)  time: 0.1087  data: 0.0272  max mem: 3754
Epoch: [1]  [ 1200/12099]  eta: 0:20:17  lr: 0.000300  loss: 0.1490 (0.1504)  loss_classifier: 0.0459 (0.0500)  loss_box_reg: 0.0554 (0.0582)  loss_objectness: 0.0083 (0.0334)  loss_rpn_box_reg: 0.0017 (0.0089)  time: 0.1121  data: 0.0293  max mem: 3754
Epoch: [1]  [ 1300/12099]  eta: 0:20:05  lr: 0.000300  loss: 0.1570 (0.1499)  loss_classifier: 0.0467 (0.0500)  loss_box_reg: 0.0736 (0.0584)  loss_objectness: 0.0093 (0.0329)  loss_rpn_box_reg: 0.0049 (0.0086)  time: 0.1117  data: 0.0291

Epoch: [1]  [ 4300/12099]  eta: 0:14:28  lr: 0.000300  loss: 0.1626 (0.1501)  loss_classifier: 0.0600 (0.0497)  loss_box_reg: 0.0405 (0.0587)  loss_objectness: 0.0119 (0.0335)  loss_rpn_box_reg: 0.0027 (0.0082)  time: 0.1123  data: 0.0293  max mem: 3754
Epoch: [1]  [ 4400/12099]  eta: 0:14:17  lr: 0.000300  loss: 0.1294 (0.1499)  loss_classifier: 0.0343 (0.0496)  loss_box_reg: 0.0608 (0.0587)  loss_objectness: 0.0056 (0.0334)  loss_rpn_box_reg: 0.0028 (0.0082)  time: 0.1121  data: 0.0297  max mem: 3754
Epoch: [1]  [ 4500/12099]  eta: 0:14:06  lr: 0.000300  loss: 0.0841 (0.1497)  loss_classifier: 0.0272 (0.0495)  loss_box_reg: 0.0524 (0.0586)  loss_objectness: 0.0039 (0.0335)  loss_rpn_box_reg: 0.0020 (0.0082)  time: 0.1102  data: 0.0280  max mem: 3754
Epoch: [1]  [ 4600/12099]  eta: 0:13:54  lr: 0.000300  loss: 0.1390 (0.1496)  loss_classifier: 0.0367 (0.0495)  loss_box_reg: 0.0592 (0.0586)  loss_objectness: 0.0179 (0.0334)  loss_rpn_box_reg: 0.0034 (0.0081)  time: 0.1120  data: 0.0289

Epoch: [1]  [ 7600/12099]  eta: 0:08:21  lr: 0.000300  loss: 0.1247 (0.1483)  loss_classifier: 0.0319 (0.0491)  loss_box_reg: 0.0548 (0.0581)  loss_objectness: 0.0105 (0.0332)  loss_rpn_box_reg: 0.0031 (0.0080)  time: 0.1111  data: 0.0284  max mem: 3754
Epoch: [1]  [ 7700/12099]  eta: 0:08:10  lr: 0.000300  loss: 0.1193 (0.1484)  loss_classifier: 0.0429 (0.0491)  loss_box_reg: 0.0578 (0.0581)  loss_objectness: 0.0041 (0.0331)  loss_rpn_box_reg: 0.0022 (0.0080)  time: 0.1118  data: 0.0288  max mem: 3754
Epoch: [1]  [ 7800/12099]  eta: 0:07:59  lr: 0.000300  loss: 0.1468 (0.1483)  loss_classifier: 0.0446 (0.0491)  loss_box_reg: 0.0413 (0.0580)  loss_objectness: 0.0210 (0.0331)  loss_rpn_box_reg: 0.0036 (0.0080)  time: 0.1117  data: 0.0294  max mem: 3754
Epoch: [1]  [ 7900/12099]  eta: 0:07:48  lr: 0.000300  loss: 0.1074 (0.1482)  loss_classifier: 0.0359 (0.0491)  loss_box_reg: 0.0550 (0.0580)  loss_objectness: 0.0043 (0.0331)  loss_rpn_box_reg: 0.0018 (0.0080)  time: 0.1148  data: 0.0314

Epoch: [1]  [10900/12099]  eta: 0:02:13  lr: 0.000300  loss: 0.1583 (0.1481)  loss_classifier: 0.0376 (0.0489)  loss_box_reg: 0.0429 (0.0579)  loss_objectness: 0.0061 (0.0334)  loss_rpn_box_reg: 0.0019 (0.0079)  time: 0.1117  data: 0.0294  max mem: 3754
Epoch: [1]  [11000/12099]  eta: 0:02:02  lr: 0.000300  loss: 0.1227 (0.1480)  loss_classifier: 0.0438 (0.0489)  loss_box_reg: 0.0615 (0.0579)  loss_objectness: 0.0085 (0.0334)  loss_rpn_box_reg: 0.0028 (0.0079)  time: 0.1104  data: 0.0285  max mem: 3754
Epoch: [1]  [11100/12099]  eta: 0:01:51  lr: 0.000300  loss: 0.1639 (0.1481)  loss_classifier: 0.0401 (0.0489)  loss_box_reg: 0.0641 (0.0579)  loss_objectness: 0.0197 (0.0335)  loss_rpn_box_reg: 0.0034 (0.0079)  time: 0.1109  data: 0.0287  max mem: 3754
Epoch: [1]  [11200/12099]  eta: 0:01:40  lr: 0.000300  loss: 0.1367 (0.1481)  loss_classifier: 0.0446 (0.0489)  loss_box_reg: 0.0529 (0.0579)  loss_objectness: 0.0127 (0.0334)  loss_rpn_box_reg: 0.0037 (0.0079)  time: 0.1104  data: 0.0272

Epoch: [2]  [ 2000/12099]  eta: 0:18:41  lr: 0.000300  loss: 0.1168 (0.1462)  loss_classifier: 0.0392 (0.0478)  loss_box_reg: 0.0612 (0.0575)  loss_objectness: 0.0024 (0.0334)  loss_rpn_box_reg: 0.0027 (0.0076)  time: 0.1109  data: 0.0285  max mem: 3754
Epoch: [2]  [ 2100/12099]  eta: 0:18:30  lr: 0.000300  loss: 0.1407 (0.1459)  loss_classifier: 0.0372 (0.0475)  loss_box_reg: 0.0390 (0.0575)  loss_objectness: 0.0186 (0.0334)  loss_rpn_box_reg: 0.0037 (0.0076)  time: 0.1135  data: 0.0301  max mem: 3754
Epoch: [2]  [ 2200/12099]  eta: 0:18:19  lr: 0.000300  loss: 0.1294 (0.1455)  loss_classifier: 0.0331 (0.0474)  loss_box_reg: 0.0441 (0.0574)  loss_objectness: 0.0085 (0.0332)  loss_rpn_box_reg: 0.0019 (0.0076)  time: 0.1109  data: 0.0291  max mem: 3754
Epoch: [2]  [ 2300/12099]  eta: 0:18:08  lr: 0.000300  loss: 0.1034 (0.1452)  loss_classifier: 0.0317 (0.0473)  loss_box_reg: 0.0531 (0.0574)  loss_objectness: 0.0019 (0.0329)  loss_rpn_box_reg: 0.0019 (0.0076)  time: 0.1131  data: 0.0306

Epoch: [2]  [ 5300/12099]  eta: 0:12:35  lr: 0.000300  loss: 0.1194 (0.1447)  loss_classifier: 0.0322 (0.0476)  loss_box_reg: 0.0538 (0.0568)  loss_objectness: 0.0053 (0.0328)  loss_rpn_box_reg: 0.0037 (0.0075)  time: 0.1117  data: 0.0290  max mem: 3754
Epoch: [2]  [ 5400/12099]  eta: 0:12:24  lr: 0.000300  loss: 0.1178 (0.1449)  loss_classifier: 0.0484 (0.0476)  loss_box_reg: 0.0502 (0.0568)  loss_objectness: 0.0111 (0.0329)  loss_rpn_box_reg: 0.0030 (0.0075)  time: 0.1117  data: 0.0292  max mem: 3754
Epoch: [2]  [ 5500/12099]  eta: 0:12:13  lr: 0.000300  loss: 0.0914 (0.1447)  loss_classifier: 0.0357 (0.0476)  loss_box_reg: 0.0496 (0.0569)  loss_objectness: 0.0029 (0.0327)  loss_rpn_box_reg: 0.0026 (0.0075)  time: 0.1105  data: 0.0278  max mem: 3754
Epoch: [2]  [ 5600/12099]  eta: 0:12:02  lr: 0.000300  loss: 0.1085 (0.1449)  loss_classifier: 0.0376 (0.0476)  loss_box_reg: 0.0477 (0.0568)  loss_objectness: 0.0057 (0.0328)  loss_rpn_box_reg: 0.0029 (0.0075)  time: 0.1109  data: 0.0281

Epoch: [2]  [ 8600/12099]  eta: 0:06:29  lr: 0.000300  loss: 0.1472 (0.1452)  loss_classifier: 0.0447 (0.0478)  loss_box_reg: 0.0480 (0.0568)  loss_objectness: 0.0201 (0.0329)  loss_rpn_box_reg: 0.0031 (0.0077)  time: 0.1105  data: 0.0286  max mem: 3754
Epoch: [2]  [ 8700/12099]  eta: 0:06:18  lr: 0.000300  loss: 0.1039 (0.1452)  loss_classifier: 0.0392 (0.0478)  loss_box_reg: 0.0572 (0.0568)  loss_objectness: 0.0054 (0.0329)  loss_rpn_box_reg: 0.0024 (0.0077)  time: 0.1103  data: 0.0285  max mem: 3754
Epoch: [2]  [ 8800/12099]  eta: 0:06:07  lr: 0.000300  loss: 0.1279 (0.1450)  loss_classifier: 0.0434 (0.0477)  loss_box_reg: 0.0600 (0.0567)  loss_objectness: 0.0056 (0.0328)  loss_rpn_box_reg: 0.0028 (0.0077)  time: 0.1118  data: 0.0296  max mem: 3754
Epoch: [2]  [ 8900/12099]  eta: 0:05:56  lr: 0.000300  loss: 0.1212 (0.1449)  loss_classifier: 0.0362 (0.0477)  loss_box_reg: 0.0538 (0.0567)  loss_objectness: 0.0211 (0.0328)  loss_rpn_box_reg: 0.0023 (0.0077)  time: 0.1095  data: 0.0272

Epoch: [2]  [11900/12099]  eta: 0:00:22  lr: 0.000300  loss: 0.1381 (0.1454)  loss_classifier: 0.0509 (0.0477)  loss_box_reg: 0.0529 (0.0565)  loss_objectness: 0.0269 (0.0333)  loss_rpn_box_reg: 0.0031 (0.0079)  time: 0.1105  data: 0.0284  max mem: 3754
Epoch: [2]  [12000/12099]  eta: 0:00:11  lr: 0.000300  loss: 0.1406 (0.1455)  loss_classifier: 0.0377 (0.0477)  loss_box_reg: 0.0581 (0.0566)  loss_objectness: 0.0131 (0.0333)  loss_rpn_box_reg: 0.0058 (0.0078)  time: 0.1098  data: 0.0283  max mem: 3754
Epoch: [2]  [12098/12099]  eta: 0:00:00  lr: 0.000300  loss: 0.1212 (0.1455)  loss_classifier: 0.0386 (0.0477)  loss_box_reg: 0.0387 (0.0566)  loss_objectness: 0.0124 (0.0334)  loss_rpn_box_reg: 0.0032 (0.0079)  time: 0.1112  data: 0.0283  max mem: 3754
Epoch: [2] Total time: 0:22:27 (0.1114 s / it)
Epoch: [3]  [    0/12099]  eta: 0:22:41  lr: 0.000300  loss: 0.0521 (0.0521)  loss_classifier: 0.0249 (0.0249)  loss_box_reg: 0.0242 (0.0242)  loss_objectness: 0.0015 (0.0015)  loss_rpn_box_r

Epoch: [3]  [ 3000/12099]  eta: 0:16:46  lr: 0.000300  loss: 0.1334 (0.1434)  loss_classifier: 0.0491 (0.0461)  loss_box_reg: 0.0437 (0.0552)  loss_objectness: 0.0060 (0.0337)  loss_rpn_box_reg: 0.0021 (0.0083)  time: 0.1100  data: 0.0283  max mem: 3754
Epoch: [3]  [ 3100/12099]  eta: 0:16:35  lr: 0.000300  loss: 0.1447 (0.1432)  loss_classifier: 0.0425 (0.0461)  loss_box_reg: 0.0492 (0.0552)  loss_objectness: 0.0177 (0.0335)  loss_rpn_box_reg: 0.0024 (0.0084)  time: 0.1120  data: 0.0300  max mem: 3754
Epoch: [3]  [ 3200/12099]  eta: 0:16:24  lr: 0.000300  loss: 0.1224 (0.1431)  loss_classifier: 0.0339 (0.0460)  loss_box_reg: 0.0622 (0.0553)  loss_objectness: 0.0016 (0.0334)  loss_rpn_box_reg: 0.0022 (0.0083)  time: 0.1115  data: 0.0290  max mem: 3754
Epoch: [3]  [ 3300/12099]  eta: 0:16:13  lr: 0.000300  loss: 0.1291 (0.1431)  loss_classifier: 0.0317 (0.0461)  loss_box_reg: 0.0470 (0.0552)  loss_objectness: 0.0206 (0.0334)  loss_rpn_box_reg: 0.0022 (0.0083)  time: 0.1122  data: 0.0295

Epoch: [3]  [ 6300/12099]  eta: 0:10:42  lr: 0.000300  loss: 0.1148 (0.1437)  loss_classifier: 0.0430 (0.0469)  loss_box_reg: 0.0557 (0.0562)  loss_objectness: 0.0097 (0.0327)  loss_rpn_box_reg: 0.0024 (0.0079)  time: 0.1127  data: 0.0292  max mem: 3754
Epoch: [3]  [ 6400/12099]  eta: 0:10:31  lr: 0.000300  loss: 0.1247 (0.1439)  loss_classifier: 0.0405 (0.0469)  loss_box_reg: 0.0523 (0.0561)  loss_objectness: 0.0168 (0.0329)  loss_rpn_box_reg: 0.0042 (0.0079)  time: 0.1120  data: 0.0290  max mem: 3754
Epoch: [3]  [ 6500/12099]  eta: 0:10:20  lr: 0.000300  loss: 0.1188 (0.1438)  loss_classifier: 0.0314 (0.0469)  loss_box_reg: 0.0457 (0.0562)  loss_objectness: 0.0061 (0.0328)  loss_rpn_box_reg: 0.0023 (0.0079)  time: 0.1096  data: 0.0283  max mem: 3754
Epoch: [3]  [ 6600/12099]  eta: 0:10:09  lr: 0.000300  loss: 0.1407 (0.1438)  loss_classifier: 0.0367 (0.0469)  loss_box_reg: 0.0537 (0.0562)  loss_objectness: 0.0080 (0.0328)  loss_rpn_box_reg: 0.0026 (0.0079)  time: 0.1125  data: 0.0296

Epoch: [3]  [ 9600/12099]  eta: 0:04:37  lr: 0.000300  loss: 0.1284 (0.1443)  loss_classifier: 0.0357 (0.0470)  loss_box_reg: 0.0467 (0.0559)  loss_objectness: 0.0093 (0.0334)  loss_rpn_box_reg: 0.0033 (0.0080)  time: 0.1101  data: 0.0283  max mem: 3754
Epoch: [3]  [ 9700/12099]  eta: 0:04:25  lr: 0.000300  loss: 0.0883 (0.1443)  loss_classifier: 0.0329 (0.0469)  loss_box_reg: 0.0476 (0.0559)  loss_objectness: 0.0030 (0.0334)  loss_rpn_box_reg: 0.0023 (0.0080)  time: 0.1090  data: 0.0271  max mem: 3754
Epoch: [3]  [ 9800/12099]  eta: 0:04:14  lr: 0.000300  loss: 0.1324 (0.1444)  loss_classifier: 0.0436 (0.0470)  loss_box_reg: 0.0583 (0.0559)  loss_objectness: 0.0108 (0.0334)  loss_rpn_box_reg: 0.0019 (0.0080)  time: 0.1098  data: 0.0275  max mem: 3754
Epoch: [3]  [ 9900/12099]  eta: 0:04:03  lr: 0.000300  loss: 0.0992 (0.1442)  loss_classifier: 0.0319 (0.0470)  loss_box_reg: 0.0474 (0.0559)  loss_objectness: 0.0105 (0.0334)  loss_rpn_box_reg: 0.0022 (0.0080)  time: 0.1115  data: 0.0299

Epoch: [4]  [  700/12099]  eta: 0:21:05  lr: 0.000300  loss: 0.1133 (0.1501)  loss_classifier: 0.0477 (0.0478)  loss_box_reg: 0.0502 (0.0567)  loss_objectness: 0.0074 (0.0377)  loss_rpn_box_reg: 0.0026 (0.0079)  time: 0.1130  data: 0.0304  max mem: 3754
Epoch: [4]  [  800/12099]  eta: 0:20:54  lr: 0.000300  loss: 0.1276 (0.1494)  loss_classifier: 0.0412 (0.0471)  loss_box_reg: 0.0450 (0.0562)  loss_objectness: 0.0171 (0.0382)  loss_rpn_box_reg: 0.0015 (0.0079)  time: 0.1102  data: 0.0275  max mem: 3754
Epoch: [4]  [  900/12099]  eta: 0:20:44  lr: 0.000300  loss: 0.1358 (0.1482)  loss_classifier: 0.0427 (0.0472)  loss_box_reg: 0.0391 (0.0559)  loss_objectness: 0.0201 (0.0372)  loss_rpn_box_reg: 0.0025 (0.0079)  time: 0.1109  data: 0.0295  max mem: 3754
Epoch: [4]  [ 1000/12099]  eta: 0:20:34  lr: 0.000300  loss: 0.1368 (0.1476)  loss_classifier: 0.0452 (0.0470)  loss_box_reg: 0.0562 (0.0557)  loss_objectness: 0.0075 (0.0373)  loss_rpn_box_reg: 0.0017 (0.0077)  time: 0.1105  data: 0.0286

Epoch: [4]  [ 4000/12099]  eta: 0:15:01  lr: 0.000300  loss: 0.0979 (0.1438)  loss_classifier: 0.0324 (0.0469)  loss_box_reg: 0.0426 (0.0556)  loss_objectness: 0.0121 (0.0337)  loss_rpn_box_reg: 0.0028 (0.0077)  time: 0.1101  data: 0.0277  max mem: 3754
Epoch: [4]  [ 4100/12099]  eta: 0:14:50  lr: 0.000300  loss: 0.1015 (0.1435)  loss_classifier: 0.0236 (0.0467)  loss_box_reg: 0.0383 (0.0554)  loss_objectness: 0.0082 (0.0336)  loss_rpn_box_reg: 0.0039 (0.0077)  time: 0.1119  data: 0.0294  max mem: 3754
Epoch: [4]  [ 4200/12099]  eta: 0:14:38  lr: 0.000300  loss: 0.0977 (0.1434)  loss_classifier: 0.0328 (0.0467)  loss_box_reg: 0.0410 (0.0555)  loss_objectness: 0.0010 (0.0335)  loss_rpn_box_reg: 0.0043 (0.0077)  time: 0.1108  data: 0.0282  max mem: 3754
Epoch: [4]  [ 4300/12099]  eta: 0:14:27  lr: 0.000300  loss: 0.0801 (0.1429)  loss_classifier: 0.0295 (0.0466)  loss_box_reg: 0.0333 (0.0554)  loss_objectness: 0.0030 (0.0332)  loss_rpn_box_reg: 0.0030 (0.0077)  time: 0.1095  data: 0.0277

Epoch: [4]  [ 7300/12099]  eta: 0:08:53  lr: 0.000300  loss: 0.1030 (0.1428)  loss_classifier: 0.0341 (0.0463)  loss_box_reg: 0.0458 (0.0556)  loss_objectness: 0.0060 (0.0330)  loss_rpn_box_reg: 0.0026 (0.0079)  time: 0.1143  data: 0.0299  max mem: 3754
Epoch: [4]  [ 7400/12099]  eta: 0:08:42  lr: 0.000300  loss: 0.1337 (0.1428)  loss_classifier: 0.0367 (0.0463)  loss_box_reg: 0.0576 (0.0556)  loss_objectness: 0.0056 (0.0330)  loss_rpn_box_reg: 0.0023 (0.0079)  time: 0.1117  data: 0.0290  max mem: 3754
Epoch: [4]  [ 7500/12099]  eta: 0:08:31  lr: 0.000300  loss: 0.1215 (0.1428)  loss_classifier: 0.0412 (0.0462)  loss_box_reg: 0.0509 (0.0556)  loss_objectness: 0.0209 (0.0331)  loss_rpn_box_reg: 0.0021 (0.0079)  time: 0.1112  data: 0.0291  max mem: 3754
Epoch: [4]  [ 7600/12099]  eta: 0:08:20  lr: 0.000300  loss: 0.1419 (0.1428)  loss_classifier: 0.0467 (0.0463)  loss_box_reg: 0.0579 (0.0556)  loss_objectness: 0.0130 (0.0331)  loss_rpn_box_reg: 0.0014 (0.0079)  time: 0.1114  data: 0.0284

Epoch: [4]  [10600/12099]  eta: 0:02:47  lr: 0.000300  loss: 0.1191 (0.1431)  loss_classifier: 0.0426 (0.0464)  loss_box_reg: 0.0527 (0.0556)  loss_objectness: 0.0041 (0.0333)  loss_rpn_box_reg: 0.0028 (0.0078)  time: 0.1102  data: 0.0275  max mem: 3754
Epoch: [4]  [10700/12099]  eta: 0:02:35  lr: 0.000300  loss: 0.1038 (0.1431)  loss_classifier: 0.0386 (0.0464)  loss_box_reg: 0.0436 (0.0556)  loss_objectness: 0.0051 (0.0333)  loss_rpn_box_reg: 0.0017 (0.0079)  time: 0.1115  data: 0.0286  max mem: 3754
Epoch: [4]  [10800/12099]  eta: 0:02:24  lr: 0.000300  loss: 0.1218 (0.1431)  loss_classifier: 0.0325 (0.0464)  loss_box_reg: 0.0536 (0.0556)  loss_objectness: 0.0066 (0.0333)  loss_rpn_box_reg: 0.0025 (0.0078)  time: 0.1121  data: 0.0294  max mem: 3754
Epoch: [4]  [10900/12099]  eta: 0:02:13  lr: 0.000300  loss: 0.1025 (0.1430)  loss_classifier: 0.0297 (0.0464)  loss_box_reg: 0.0556 (0.0556)  loss_objectness: 0.0022 (0.0332)  loss_rpn_box_reg: 0.0036 (0.0078)  time: 0.1138  data: 0.0312

Epoch: [5]  [ 1700/12099]  eta: 0:19:14  lr: 0.000030  loss: 0.1210 (0.1457)  loss_classifier: 0.0326 (0.0472)  loss_box_reg: 0.0500 (0.0564)  loss_objectness: 0.0189 (0.0340)  loss_rpn_box_reg: 0.0035 (0.0082)  time: 0.1130  data: 0.0295  max mem: 3754
Epoch: [5]  [ 1800/12099]  eta: 0:19:03  lr: 0.000030  loss: 0.0943 (0.1446)  loss_classifier: 0.0389 (0.0470)  loss_box_reg: 0.0431 (0.0561)  loss_objectness: 0.0013 (0.0335)  loss_rpn_box_reg: 0.0019 (0.0081)  time: 0.1085  data: 0.0273  max mem: 3754
Epoch: [5]  [ 1900/12099]  eta: 0:18:52  lr: 0.000030  loss: 0.1157 (0.1442)  loss_classifier: 0.0349 (0.0468)  loss_box_reg: 0.0587 (0.0560)  loss_objectness: 0.0072 (0.0332)  loss_rpn_box_reg: 0.0027 (0.0081)  time: 0.1114  data: 0.0288  max mem: 3754
Epoch: [5]  [ 2000/12099]  eta: 0:18:41  lr: 0.000030  loss: 0.1296 (0.1442)  loss_classifier: 0.0441 (0.0467)  loss_box_reg: 0.0531 (0.0560)  loss_objectness: 0.0086 (0.0334)  loss_rpn_box_reg: 0.0021 (0.0081)  time: 0.1103  data: 0.0287

Epoch: [5]  [ 5000/12099]  eta: 0:13:08  lr: 0.000030  loss: 0.1074 (0.1422)  loss_classifier: 0.0348 (0.0464)  loss_box_reg: 0.0440 (0.0553)  loss_objectness: 0.0087 (0.0328)  loss_rpn_box_reg: 0.0024 (0.0077)  time: 0.1113  data: 0.0288  max mem: 3754
Epoch: [5]  [ 5100/12099]  eta: 0:12:57  lr: 0.000030  loss: 0.1017 (0.1423)  loss_classifier: 0.0330 (0.0463)  loss_box_reg: 0.0475 (0.0553)  loss_objectness: 0.0047 (0.0329)  loss_rpn_box_reg: 0.0025 (0.0077)  time: 0.1109  data: 0.0277  max mem: 3754
Epoch: [5]  [ 5200/12099]  eta: 0:12:46  lr: 0.000030  loss: 0.1285 (0.1423)  loss_classifier: 0.0414 (0.0463)  loss_box_reg: 0.0379 (0.0553)  loss_objectness: 0.0201 (0.0329)  loss_rpn_box_reg: 0.0030 (0.0078)  time: 0.1124  data: 0.0297  max mem: 3754
Epoch: [5]  [ 5300/12099]  eta: 0:12:35  lr: 0.000030  loss: 0.1495 (0.1424)  loss_classifier: 0.0463 (0.0463)  loss_box_reg: 0.0675 (0.0554)  loss_objectness: 0.0198 (0.0329)  loss_rpn_box_reg: 0.0046 (0.0078)  time: 0.1137  data: 0.0314

Epoch: [5]  [ 8300/12099]  eta: 0:07:02  lr: 0.000030  loss: 0.1145 (0.1436)  loss_classifier: 0.0328 (0.0468)  loss_box_reg: 0.0508 (0.0556)  loss_objectness: 0.0221 (0.0335)  loss_rpn_box_reg: 0.0040 (0.0077)  time: 0.1119  data: 0.0297  max mem: 3754
Epoch: [5]  [ 8400/12099]  eta: 0:06:51  lr: 0.000030  loss: 0.1074 (0.1436)  loss_classifier: 0.0364 (0.0468)  loss_box_reg: 0.0499 (0.0556)  loss_objectness: 0.0052 (0.0335)  loss_rpn_box_reg: 0.0017 (0.0078)  time: 0.1115  data: 0.0291  max mem: 3754
Epoch: [5]  [ 8500/12099]  eta: 0:06:40  lr: 0.000030  loss: 0.1001 (0.1435)  loss_classifier: 0.0330 (0.0468)  loss_box_reg: 0.0396 (0.0556)  loss_objectness: 0.0071 (0.0334)  loss_rpn_box_reg: 0.0017 (0.0078)  time: 0.1084  data: 0.0264  max mem: 3754
Epoch: [5]  [ 8600/12099]  eta: 0:06:28  lr: 0.000030  loss: 0.1157 (0.1434)  loss_classifier: 0.0448 (0.0468)  loss_box_reg: 0.0525 (0.0556)  loss_objectness: 0.0102 (0.0333)  loss_rpn_box_reg: 0.0034 (0.0078)  time: 0.1104  data: 0.0284

Epoch: [5]  [11600/12099]  eta: 0:00:55  lr: 0.000030  loss: 0.1099 (0.1431)  loss_classifier: 0.0333 (0.0466)  loss_box_reg: 0.0389 (0.0553)  loss_objectness: 0.0070 (0.0333)  loss_rpn_box_reg: 0.0023 (0.0079)  time: 0.1107  data: 0.0287  max mem: 3754
Epoch: [5]  [11700/12099]  eta: 0:00:44  lr: 0.000030  loss: 0.1326 (0.1431)  loss_classifier: 0.0297 (0.0466)  loss_box_reg: 0.0419 (0.0554)  loss_objectness: 0.0095 (0.0333)  loss_rpn_box_reg: 0.0031 (0.0079)  time: 0.1098  data: 0.0279  max mem: 3754
Epoch: [5]  [11800/12099]  eta: 0:00:33  lr: 0.000030  loss: 0.1230 (0.1431)  loss_classifier: 0.0296 (0.0466)  loss_box_reg: 0.0598 (0.0553)  loss_objectness: 0.0075 (0.0333)  loss_rpn_box_reg: 0.0017 (0.0079)  time: 0.1117  data: 0.0289  max mem: 3754
Epoch: [5]  [11900/12099]  eta: 0:00:22  lr: 0.000030  loss: 0.1094 (0.1430)  loss_classifier: 0.0389 (0.0465)  loss_box_reg: 0.0441 (0.0553)  loss_objectness: 0.0040 (0.0333)  loss_rpn_box_reg: 0.0014 (0.0079)  time: 0.1114  data: 0.0290

Epoch: [6]  [ 2700/12099]  eta: 0:17:25  lr: 0.000030  loss: 0.1340 (0.1414)  loss_classifier: 0.0343 (0.0462)  loss_box_reg: 0.0471 (0.0551)  loss_objectness: 0.0106 (0.0324)  loss_rpn_box_reg: 0.0059 (0.0077)  time: 0.1117  data: 0.0280  max mem: 3754
Epoch: [6]  [ 2800/12099]  eta: 0:17:14  lr: 0.000030  loss: 0.1202 (0.1416)  loss_classifier: 0.0362 (0.0463)  loss_box_reg: 0.0485 (0.0552)  loss_objectness: 0.0143 (0.0323)  loss_rpn_box_reg: 0.0032 (0.0077)  time: 0.1104  data: 0.0282  max mem: 3754
Epoch: [6]  [ 2900/12099]  eta: 0:17:03  lr: 0.000030  loss: 0.1078 (0.1412)  loss_classifier: 0.0378 (0.0463)  loss_box_reg: 0.0438 (0.0551)  loss_objectness: 0.0068 (0.0322)  loss_rpn_box_reg: 0.0033 (0.0077)  time: 0.1105  data: 0.0275  max mem: 3754
Epoch: [6]  [ 3000/12099]  eta: 0:16:52  lr: 0.000030  loss: 0.1070 (0.1414)  loss_classifier: 0.0303 (0.0461)  loss_box_reg: 0.0534 (0.0551)  loss_objectness: 0.0095 (0.0323)  loss_rpn_box_reg: 0.0031 (0.0079)  time: 0.1095  data: 0.0274

Epoch: [6]  [ 6000/12099]  eta: 0:11:18  lr: 0.000030  loss: 0.1147 (0.1420)  loss_classifier: 0.0364 (0.0463)  loss_box_reg: 0.0424 (0.0553)  loss_objectness: 0.0037 (0.0327)  loss_rpn_box_reg: 0.0055 (0.0077)  time: 0.1110  data: 0.0287  max mem: 3754
Epoch: [6]  [ 6100/12099]  eta: 0:11:07  lr: 0.000030  loss: 0.1488 (0.1421)  loss_classifier: 0.0326 (0.0463)  loss_box_reg: 0.0492 (0.0553)  loss_objectness: 0.0182 (0.0328)  loss_rpn_box_reg: 0.0053 (0.0077)  time: 0.1113  data: 0.0288  max mem: 3754
Epoch: [6]  [ 6200/12099]  eta: 0:10:56  lr: 0.000030  loss: 0.1483 (0.1422)  loss_classifier: 0.0366 (0.0463)  loss_box_reg: 0.0367 (0.0553)  loss_objectness: 0.0158 (0.0328)  loss_rpn_box_reg: 0.0022 (0.0078)  time: 0.1094  data: 0.0271  max mem: 3754
Epoch: [6]  [ 6300/12099]  eta: 0:10:44  lr: 0.000030  loss: 0.1219 (0.1422)  loss_classifier: 0.0345 (0.0463)  loss_box_reg: 0.0567 (0.0553)  loss_objectness: 0.0072 (0.0329)  loss_rpn_box_reg: 0.0035 (0.0078)  time: 0.1100  data: 0.0281

Epoch: [6]  [ 9300/12099]  eta: 0:05:11  lr: 0.000030  loss: 0.1257 (0.1428)  loss_classifier: 0.0514 (0.0464)  loss_box_reg: 0.0581 (0.0554)  loss_objectness: 0.0135 (0.0332)  loss_rpn_box_reg: 0.0027 (0.0078)  time: 0.1104  data: 0.0283  max mem: 3754
Epoch: [6]  [ 9400/12099]  eta: 0:05:00  lr: 0.000030  loss: 0.1123 (0.1427)  loss_classifier: 0.0377 (0.0464)  loss_box_reg: 0.0621 (0.0553)  loss_objectness: 0.0033 (0.0332)  loss_rpn_box_reg: 0.0025 (0.0078)  time: 0.1128  data: 0.0306  max mem: 3754
Epoch: [6]  [ 9500/12099]  eta: 0:04:49  lr: 0.000030  loss: 0.1224 (0.1428)  loss_classifier: 0.0371 (0.0464)  loss_box_reg: 0.0477 (0.0553)  loss_objectness: 0.0067 (0.0333)  loss_rpn_box_reg: 0.0035 (0.0078)  time: 0.1111  data: 0.0293  max mem: 3754
Epoch: [6]  [ 9600/12099]  eta: 0:04:38  lr: 0.000030  loss: 0.1034 (0.1428)  loss_classifier: 0.0344 (0.0464)  loss_box_reg: 0.0387 (0.0553)  loss_objectness: 0.0063 (0.0333)  loss_rpn_box_reg: 0.0025 (0.0078)  time: 0.1093  data: 0.0275

Epoch: [7]  [  400/12099]  eta: 0:21:36  lr: 0.000030  loss: 0.1160 (0.1455)  loss_classifier: 0.0314 (0.0454)  loss_box_reg: 0.0578 (0.0554)  loss_objectness: 0.0136 (0.0368)  loss_rpn_box_reg: 0.0031 (0.0078)  time: 0.1132  data: 0.0309  max mem: 3754
Epoch: [7]  [  500/12099]  eta: 0:21:29  lr: 0.000030  loss: 0.1323 (0.1448)  loss_classifier: 0.0434 (0.0462)  loss_box_reg: 0.0550 (0.0549)  loss_objectness: 0.0111 (0.0358)  loss_rpn_box_reg: 0.0030 (0.0079)  time: 0.1129  data: 0.0305  max mem: 3754
Epoch: [7]  [  600/12099]  eta: 0:21:21  lr: 0.000030  loss: 0.1088 (0.1445)  loss_classifier: 0.0308 (0.0468)  loss_box_reg: 0.0465 (0.0556)  loss_objectness: 0.0011 (0.0345)  loss_rpn_box_reg: 0.0027 (0.0076)  time: 0.1143  data: 0.0309  max mem: 3754
Epoch: [7]  [  700/12099]  eta: 0:21:11  lr: 0.000030  loss: 0.1297 (0.1464)  loss_classifier: 0.0565 (0.0474)  loss_box_reg: 0.0538 (0.0562)  loss_objectness: 0.0107 (0.0348)  loss_rpn_box_reg: 0.0034 (0.0080)  time: 0.1120  data: 0.0293

Epoch: [7]  [ 3700/12099]  eta: 0:15:40  lr: 0.000030  loss: 0.1265 (0.1435)  loss_classifier: 0.0386 (0.0466)  loss_box_reg: 0.0650 (0.0557)  loss_objectness: 0.0091 (0.0333)  loss_rpn_box_reg: 0.0023 (0.0080)  time: 0.1129  data: 0.0305  max mem: 3754
Epoch: [7]  [ 3800/12099]  eta: 0:15:28  lr: 0.000030  loss: 0.1046 (0.1436)  loss_classifier: 0.0361 (0.0467)  loss_box_reg: 0.0561 (0.0558)  loss_objectness: 0.0063 (0.0331)  loss_rpn_box_reg: 0.0047 (0.0080)  time: 0.1112  data: 0.0291  max mem: 3754
Epoch: [7]  [ 3900/12099]  eta: 0:15:17  lr: 0.000030  loss: 0.1120 (0.1436)  loss_classifier: 0.0351 (0.0467)  loss_box_reg: 0.0522 (0.0558)  loss_objectness: 0.0109 (0.0332)  loss_rpn_box_reg: 0.0035 (0.0080)  time: 0.1121  data: 0.0286  max mem: 3754
Epoch: [7]  [ 4000/12099]  eta: 0:15:06  lr: 0.000030  loss: 0.1204 (0.1435)  loss_classifier: 0.0303 (0.0466)  loss_box_reg: 0.0408 (0.0558)  loss_objectness: 0.0041 (0.0332)  loss_rpn_box_reg: 0.0013 (0.0080)  time: 0.1115  data: 0.0291

Epoch: [7]  [ 7000/12099]  eta: 0:09:29  lr: 0.000030  loss: 0.1005 (0.1440)  loss_classifier: 0.0332 (0.0466)  loss_box_reg: 0.0410 (0.0554)  loss_objectness: 0.0013 (0.0340)  loss_rpn_box_reg: 0.0020 (0.0080)  time: 0.1100  data: 0.0280  max mem: 3754
Epoch: [7]  [ 7100/12099]  eta: 0:09:17  lr: 0.000030  loss: 0.1520 (0.1441)  loss_classifier: 0.0515 (0.0467)  loss_box_reg: 0.0523 (0.0555)  loss_objectness: 0.0107 (0.0340)  loss_rpn_box_reg: 0.0025 (0.0080)  time: 0.1114  data: 0.0281  max mem: 3754
Epoch: [7]  [ 7200/12099]  eta: 0:09:06  lr: 0.000030  loss: 0.1230 (0.1441)  loss_classifier: 0.0321 (0.0467)  loss_box_reg: 0.0565 (0.0555)  loss_objectness: 0.0106 (0.0340)  loss_rpn_box_reg: 0.0044 (0.0080)  time: 0.1119  data: 0.0295  max mem: 3754
Epoch: [7]  [ 7300/12099]  eta: 0:08:55  lr: 0.000030  loss: 0.1185 (0.1441)  loss_classifier: 0.0350 (0.0467)  loss_box_reg: 0.0485 (0.0555)  loss_objectness: 0.0026 (0.0339)  loss_rpn_box_reg: 0.0033 (0.0080)  time: 0.1120  data: 0.0293

Epoch: [7]  [10300/12099]  eta: 0:03:20  lr: 0.000030  loss: 0.1137 (0.1430)  loss_classifier: 0.0336 (0.0464)  loss_box_reg: 0.0421 (0.0553)  loss_objectness: 0.0056 (0.0334)  loss_rpn_box_reg: 0.0034 (0.0079)  time: 0.1111  data: 0.0290  max mem: 3754
Epoch: [7]  [10400/12099]  eta: 0:03:09  lr: 0.000030  loss: 0.1062 (0.1431)  loss_classifier: 0.0360 (0.0465)  loss_box_reg: 0.0587 (0.0554)  loss_objectness: 0.0038 (0.0334)  loss_rpn_box_reg: 0.0031 (0.0079)  time: 0.1112  data: 0.0295  max mem: 3754
Epoch: [7]  [10500/12099]  eta: 0:02:58  lr: 0.000030  loss: 0.1100 (0.1430)  loss_classifier: 0.0288 (0.0464)  loss_box_reg: 0.0451 (0.0553)  loss_objectness: 0.0051 (0.0333)  loss_rpn_box_reg: 0.0035 (0.0079)  time: 0.1098  data: 0.0281  max mem: 3754
Epoch: [7]  [10600/12099]  eta: 0:02:46  lr: 0.000030  loss: 0.1124 (0.1429)  loss_classifier: 0.0417 (0.0464)  loss_box_reg: 0.0517 (0.0553)  loss_objectness: 0.0059 (0.0333)  loss_rpn_box_reg: 0.0019 (0.0079)  time: 0.1108  data: 0.0294

Epoch: [8]  [ 1400/12099]  eta: 0:19:53  lr: 0.000030  loss: 0.1331 (0.1421)  loss_classifier: 0.0493 (0.0467)  loss_box_reg: 0.0572 (0.0560)  loss_objectness: 0.0046 (0.0317)  loss_rpn_box_reg: 0.0028 (0.0076)  time: 0.1128  data: 0.0303  max mem: 3754
Epoch: [8]  [ 1500/12099]  eta: 0:19:42  lr: 0.000030  loss: 0.1223 (0.1426)  loss_classifier: 0.0407 (0.0467)  loss_box_reg: 0.0463 (0.0560)  loss_objectness: 0.0020 (0.0323)  loss_rpn_box_reg: 0.0027 (0.0077)  time: 0.1128  data: 0.0306  max mem: 3754
Epoch: [8]  [ 1600/12099]  eta: 0:19:31  lr: 0.000030  loss: 0.1048 (0.1426)  loss_classifier: 0.0412 (0.0468)  loss_box_reg: 0.0522 (0.0561)  loss_objectness: 0.0060 (0.0321)  loss_rpn_box_reg: 0.0037 (0.0077)  time: 0.1144  data: 0.0311  max mem: 3754
Epoch: [8]  [ 1700/12099]  eta: 0:19:20  lr: 0.000030  loss: 0.1133 (0.1422)  loss_classifier: 0.0360 (0.0468)  loss_box_reg: 0.0369 (0.0558)  loss_objectness: 0.0129 (0.0319)  loss_rpn_box_reg: 0.0015 (0.0077)  time: 0.1117  data: 0.0289

Epoch: [8]  [ 4700/12099]  eta: 0:13:45  lr: 0.000030  loss: 0.0847 (0.1418)  loss_classifier: 0.0280 (0.0463)  loss_box_reg: 0.0396 (0.0560)  loss_objectness: 0.0031 (0.0320)  loss_rpn_box_reg: 0.0021 (0.0075)  time: 0.1118  data: 0.0290  max mem: 3754
Epoch: [8]  [ 4800/12099]  eta: 0:13:34  lr: 0.000030  loss: 0.1585 (0.1417)  loss_classifier: 0.0450 (0.0462)  loss_box_reg: 0.0633 (0.0560)  loss_objectness: 0.0138 (0.0319)  loss_rpn_box_reg: 0.0020 (0.0076)  time: 0.1118  data: 0.0281  max mem: 3754
Epoch: [8]  [ 4900/12099]  eta: 0:13:23  lr: 0.000030  loss: 0.1169 (0.1416)  loss_classifier: 0.0422 (0.0462)  loss_box_reg: 0.0484 (0.0559)  loss_objectness: 0.0079 (0.0319)  loss_rpn_box_reg: 0.0035 (0.0076)  time: 0.1104  data: 0.0279  max mem: 3754
Epoch: [8]  [ 5000/12099]  eta: 0:13:11  lr: 0.000030  loss: 0.1201 (0.1418)  loss_classifier: 0.0319 (0.0462)  loss_box_reg: 0.0576 (0.0560)  loss_objectness: 0.0036 (0.0319)  loss_rpn_box_reg: 0.0020 (0.0076)  time: 0.1118  data: 0.0292

Epoch: [8]  [ 8000/12099]  eta: 0:07:36  lr: 0.000030  loss: 0.1227 (0.1425)  loss_classifier: 0.0409 (0.0461)  loss_box_reg: 0.0473 (0.0555)  loss_objectness: 0.0163 (0.0332)  loss_rpn_box_reg: 0.0033 (0.0077)  time: 0.1116  data: 0.0292  max mem: 3754
Epoch: [8]  [ 8100/12099]  eta: 0:07:25  lr: 0.000030  loss: 0.1252 (0.1424)  loss_classifier: 0.0364 (0.0461)  loss_box_reg: 0.0466 (0.0555)  loss_objectness: 0.0064 (0.0331)  loss_rpn_box_reg: 0.0035 (0.0077)  time: 0.1105  data: 0.0279  max mem: 3754
Epoch: [8]  [ 8200/12099]  eta: 0:07:14  lr: 0.000030  loss: 0.1073 (0.1425)  loss_classifier: 0.0296 (0.0462)  loss_box_reg: 0.0397 (0.0555)  loss_objectness: 0.0129 (0.0332)  loss_rpn_box_reg: 0.0026 (0.0077)  time: 0.1125  data: 0.0297  max mem: 3754
Epoch: [8]  [ 8300/12099]  eta: 0:07:03  lr: 0.000030  loss: 0.1252 (0.1425)  loss_classifier: 0.0326 (0.0462)  loss_box_reg: 0.0461 (0.0555)  loss_objectness: 0.0128 (0.0332)  loss_rpn_box_reg: 0.0020 (0.0077)  time: 0.1114  data: 0.0284

Epoch: [8]  [11300/12099]  eta: 0:01:29  lr: 0.000030  loss: 0.0995 (0.1425)  loss_classifier: 0.0339 (0.0462)  loss_box_reg: 0.0468 (0.0551)  loss_objectness: 0.0043 (0.0332)  loss_rpn_box_reg: 0.0017 (0.0079)  time: 0.1124  data: 0.0291  max mem: 3754
Epoch: [8]  [11400/12099]  eta: 0:01:17  lr: 0.000030  loss: 0.1212 (0.1426)  loss_classifier: 0.0322 (0.0463)  loss_box_reg: 0.0503 (0.0551)  loss_objectness: 0.0120 (0.0333)  loss_rpn_box_reg: 0.0020 (0.0079)  time: 0.1169  data: 0.0333  max mem: 3754
Epoch: [8]  [11500/12099]  eta: 0:01:06  lr: 0.000030  loss: 0.1375 (0.1427)  loss_classifier: 0.0376 (0.0463)  loss_box_reg: 0.0579 (0.0551)  loss_objectness: 0.0167 (0.0334)  loss_rpn_box_reg: 0.0056 (0.0079)  time: 0.1096  data: 0.0276  max mem: 3754
Epoch: [8]  [11600/12099]  eta: 0:00:55  lr: 0.000030  loss: 0.1153 (0.1428)  loss_classifier: 0.0387 (0.0463)  loss_box_reg: 0.0456 (0.0552)  loss_objectness: 0.0093 (0.0334)  loss_rpn_box_reg: 0.0027 (0.0079)  time: 0.1103  data: 0.0276

Epoch: [9]  [ 2400/12099]  eta: 0:18:03  lr: 0.000030  loss: 0.1186 (0.1421)  loss_classifier: 0.0297 (0.0462)  loss_box_reg: 0.0366 (0.0554)  loss_objectness: 0.0076 (0.0329)  loss_rpn_box_reg: 0.0032 (0.0076)  time: 0.1114  data: 0.0284  max mem: 3754
Epoch: [9]  [ 2500/12099]  eta: 0:17:52  lr: 0.000030  loss: 0.1381 (0.1420)  loss_classifier: 0.0358 (0.0462)  loss_box_reg: 0.0614 (0.0555)  loss_objectness: 0.0083 (0.0327)  loss_rpn_box_reg: 0.0038 (0.0077)  time: 0.1124  data: 0.0294  max mem: 3754
Epoch: [9]  [ 2600/12099]  eta: 0:17:41  lr: 0.000030  loss: 0.1247 (0.1425)  loss_classifier: 0.0523 (0.0463)  loss_box_reg: 0.0497 (0.0556)  loss_objectness: 0.0120 (0.0330)  loss_rpn_box_reg: 0.0024 (0.0076)  time: 0.1119  data: 0.0288  max mem: 3754
Epoch: [9]  [ 2700/12099]  eta: 0:17:30  lr: 0.000030  loss: 0.1286 (0.1419)  loss_classifier: 0.0393 (0.0462)  loss_box_reg: 0.0527 (0.0555)  loss_objectness: 0.0153 (0.0326)  loss_rpn_box_reg: 0.0050 (0.0076)  time: 0.1113  data: 0.0293

Epoch: [9]  [ 5700/12099]  eta: 0:11:54  lr: 0.000030  loss: 0.1248 (0.1421)  loss_classifier: 0.0419 (0.0462)  loss_box_reg: 0.0629 (0.0551)  loss_objectness: 0.0133 (0.0330)  loss_rpn_box_reg: 0.0053 (0.0079)  time: 0.1100  data: 0.0275  max mem: 3754
Epoch: [9]  [ 5800/12099]  eta: 0:11:43  lr: 0.000030  loss: 0.0924 (0.1421)  loss_classifier: 0.0282 (0.0462)  loss_box_reg: 0.0509 (0.0551)  loss_objectness: 0.0034 (0.0329)  loss_rpn_box_reg: 0.0028 (0.0079)  time: 0.1114  data: 0.0286  max mem: 3754
Epoch: [9]  [ 5900/12099]  eta: 0:11:32  lr: 0.000030  loss: 0.1150 (0.1419)  loss_classifier: 0.0367 (0.0462)  loss_box_reg: 0.0452 (0.0551)  loss_objectness: 0.0269 (0.0327)  loss_rpn_box_reg: 0.0040 (0.0079)  time: 0.1133  data: 0.0307  max mem: 3754
Epoch: [9]  [ 6000/12099]  eta: 0:11:21  lr: 0.000030  loss: 0.1068 (0.1417)  loss_classifier: 0.0337 (0.0462)  loss_box_reg: 0.0460 (0.0552)  loss_objectness: 0.0022 (0.0325)  loss_rpn_box_reg: 0.0022 (0.0079)  time: 0.1121  data: 0.0299

Epoch: [9]  [ 9000/12099]  eta: 0:05:45  lr: 0.000030  loss: 0.1172 (0.1429)  loss_classifier: 0.0361 (0.0465)  loss_box_reg: 0.0551 (0.0554)  loss_objectness: 0.0090 (0.0332)  loss_rpn_box_reg: 0.0034 (0.0079)  time: 0.1122  data: 0.0286  max mem: 3754
Epoch: [9]  [ 9100/12099]  eta: 0:05:34  lr: 0.000030  loss: 0.1919 (0.1431)  loss_classifier: 0.0579 (0.0465)  loss_box_reg: 0.0589 (0.0554)  loss_objectness: 0.0180 (0.0333)  loss_rpn_box_reg: 0.0040 (0.0079)  time: 0.1113  data: 0.0293  max mem: 3754
Epoch: [9]  [ 9200/12099]  eta: 0:05:23  lr: 0.000030  loss: 0.1315 (0.1432)  loss_classifier: 0.0433 (0.0465)  loss_box_reg: 0.0548 (0.0554)  loss_objectness: 0.0252 (0.0334)  loss_rpn_box_reg: 0.0024 (0.0079)  time: 0.1124  data: 0.0300  max mem: 3754
Epoch: [9]  [ 9300/12099]  eta: 0:05:12  lr: 0.000030  loss: 0.1009 (0.1431)  loss_classifier: 0.0305 (0.0465)  loss_box_reg: 0.0459 (0.0554)  loss_objectness: 0.0039 (0.0334)  loss_rpn_box_reg: 0.0024 (0.0079)  time: 0.1084  data: 0.0259

Epoch: [10]  [  100/12099]  eta: 0:22:25  lr: 0.000003  loss: 0.1132 (0.1548)  loss_classifier: 0.0353 (0.0510)  loss_box_reg: 0.0595 (0.0587)  loss_objectness: 0.0103 (0.0377)  loss_rpn_box_reg: 0.0027 (0.0074)  time: 0.1110  data: 0.0281  max mem: 3754
Epoch: [10]  [  200/12099]  eta: 0:22:07  lr: 0.000003  loss: 0.1351 (0.1468)  loss_classifier: 0.0350 (0.0485)  loss_box_reg: 0.0623 (0.0570)  loss_objectness: 0.0080 (0.0339)  loss_rpn_box_reg: 0.0033 (0.0075)  time: 0.1105  data: 0.0274  max mem: 3754
Epoch: [10]  [  300/12099]  eta: 0:21:53  lr: 0.000003  loss: 0.1116 (0.1460)  loss_classifier: 0.0390 (0.0485)  loss_box_reg: 0.0476 (0.0576)  loss_objectness: 0.0024 (0.0325)  loss_rpn_box_reg: 0.0033 (0.0074)  time: 0.1106  data: 0.0281  max mem: 3754
Epoch: [10]  [  400/12099]  eta: 0:21:40  lr: 0.000003  loss: 0.1538 (0.1479)  loss_classifier: 0.0487 (0.0484)  loss_box_reg: 0.0539 (0.0578)  loss_objectness: 0.0134 (0.0338)  loss_rpn_box_reg: 0.0040 (0.0079)  time: 0.1116  data: 0.

Epoch: [10]  [ 3400/12099]  eta: 0:16:07  lr: 0.000003  loss: 0.1177 (0.1437)  loss_classifier: 0.0383 (0.0465)  loss_box_reg: 0.0427 (0.0557)  loss_objectness: 0.0201 (0.0336)  loss_rpn_box_reg: 0.0026 (0.0078)  time: 0.1123  data: 0.0294  max mem: 3754
Epoch: [10]  [ 3500/12099]  eta: 0:15:56  lr: 0.000003  loss: 0.1433 (0.1437)  loss_classifier: 0.0449 (0.0465)  loss_box_reg: 0.0462 (0.0558)  loss_objectness: 0.0258 (0.0335)  loss_rpn_box_reg: 0.0040 (0.0079)  time: 0.1123  data: 0.0294  max mem: 3754
Epoch: [10]  [ 3600/12099]  eta: 0:15:45  lr: 0.000003  loss: 0.1027 (0.1433)  loss_classifier: 0.0313 (0.0464)  loss_box_reg: 0.0466 (0.0558)  loss_objectness: 0.0111 (0.0333)  loss_rpn_box_reg: 0.0025 (0.0080)  time: 0.1136  data: 0.0307  max mem: 3754
Epoch: [10]  [ 3700/12099]  eta: 0:15:34  lr: 0.000003  loss: 0.1054 (0.1434)  loss_classifier: 0.0327 (0.0463)  loss_box_reg: 0.0342 (0.0557)  loss_objectness: 0.0104 (0.0334)  loss_rpn_box_reg: 0.0043 (0.0079)  time: 0.1119  data: 0.

Epoch: [10]  [ 6700/12099]  eta: 0:10:01  lr: 0.000003  loss: 0.1315 (0.1431)  loss_classifier: 0.0386 (0.0464)  loss_box_reg: 0.0384 (0.0552)  loss_objectness: 0.0065 (0.0336)  loss_rpn_box_reg: 0.0026 (0.0080)  time: 0.1120  data: 0.0297  max mem: 3754
Epoch: [10]  [ 6800/12099]  eta: 0:09:50  lr: 0.000003  loss: 0.1143 (0.1431)  loss_classifier: 0.0399 (0.0463)  loss_box_reg: 0.0557 (0.0553)  loss_objectness: 0.0025 (0.0335)  loss_rpn_box_reg: 0.0025 (0.0079)  time: 0.1127  data: 0.0294  max mem: 3754
Epoch: [10]  [ 6900/12099]  eta: 0:09:39  lr: 0.000003  loss: 0.1370 (0.1430)  loss_classifier: 0.0463 (0.0464)  loss_box_reg: 0.0450 (0.0552)  loss_objectness: 0.0105 (0.0335)  loss_rpn_box_reg: 0.0033 (0.0079)  time: 0.1123  data: 0.0294  max mem: 3754
Epoch: [10]  [ 7000/12099]  eta: 0:09:28  lr: 0.000003  loss: 0.1208 (0.1433)  loss_classifier: 0.0387 (0.0464)  loss_box_reg: 0.0476 (0.0553)  loss_objectness: 0.0217 (0.0336)  loss_rpn_box_reg: 0.0027 (0.0079)  time: 0.1106  data: 0.

Epoch: [10]  [10000/12099]  eta: 0:03:54  lr: 0.000003  loss: 0.1288 (0.1433)  loss_classifier: 0.0417 (0.0466)  loss_box_reg: 0.0556 (0.0553)  loss_objectness: 0.0163 (0.0335)  loss_rpn_box_reg: 0.0030 (0.0079)  time: 0.1129  data: 0.0302  max mem: 3754
Epoch: [10]  [10100/12099]  eta: 0:03:43  lr: 0.000003  loss: 0.0996 (0.1433)  loss_classifier: 0.0352 (0.0466)  loss_box_reg: 0.0358 (0.0553)  loss_objectness: 0.0077 (0.0335)  loss_rpn_box_reg: 0.0021 (0.0079)  time: 0.1124  data: 0.0300  max mem: 3754
Epoch: [10]  [10200/12099]  eta: 0:03:31  lr: 0.000003  loss: 0.1479 (0.1433)  loss_classifier: 0.0397 (0.0466)  loss_box_reg: 0.0525 (0.0554)  loss_objectness: 0.0304 (0.0335)  loss_rpn_box_reg: 0.0040 (0.0079)  time: 0.1114  data: 0.0289  max mem: 3754
Epoch: [10]  [10300/12099]  eta: 0:03:20  lr: 0.000003  loss: 0.0996 (0.1433)  loss_classifier: 0.0318 (0.0465)  loss_box_reg: 0.0466 (0.0553)  loss_objectness: 0.0090 (0.0336)  loss_rpn_box_reg: 0.0027 (0.0079)  time: 0.1120  data: 0.

Epoch: [11]  [ 1000/12099]  eta: 0:20:43  lr: 0.000003  loss: 0.1372 (0.1452)  loss_classifier: 0.0441 (0.0472)  loss_box_reg: 0.0450 (0.0539)  loss_objectness: 0.0089 (0.0370)  loss_rpn_box_reg: 0.0028 (0.0071)  time: 0.1133  data: 0.0301  max mem: 3754
Epoch: [11]  [ 1100/12099]  eta: 0:20:32  lr: 0.000003  loss: 0.1464 (0.1458)  loss_classifier: 0.0544 (0.0477)  loss_box_reg: 0.0521 (0.0543)  loss_objectness: 0.0085 (0.0368)  loss_rpn_box_reg: 0.0043 (0.0071)  time: 0.1123  data: 0.0300  max mem: 3754
Epoch: [11]  [ 1200/12099]  eta: 0:20:20  lr: 0.000003  loss: 0.1049 (0.1449)  loss_classifier: 0.0296 (0.0474)  loss_box_reg: 0.0466 (0.0543)  loss_objectness: 0.0062 (0.0363)  loss_rpn_box_reg: 0.0024 (0.0070)  time: 0.1118  data: 0.0290  max mem: 3754
Epoch: [11]  [ 1300/12099]  eta: 0:20:09  lr: 0.000003  loss: 0.0987 (0.1443)  loss_classifier: 0.0367 (0.0473)  loss_box_reg: 0.0451 (0.0544)  loss_objectness: 0.0033 (0.0356)  loss_rpn_box_reg: 0.0020 (0.0071)  time: 0.1129  data: 0.

Epoch: [11]  [ 4300/12099]  eta: 0:14:33  lr: 0.000003  loss: 0.1059 (0.1417)  loss_classifier: 0.0275 (0.0461)  loss_box_reg: 0.0405 (0.0548)  loss_objectness: 0.0072 (0.0330)  loss_rpn_box_reg: 0.0047 (0.0077)  time: 0.1116  data: 0.0281  max mem: 3754
Epoch: [11]  [ 4400/12099]  eta: 0:14:21  lr: 0.000003  loss: 0.1156 (0.1420)  loss_classifier: 0.0352 (0.0461)  loss_box_reg: 0.0463 (0.0549)  loss_objectness: 0.0072 (0.0332)  loss_rpn_box_reg: 0.0018 (0.0077)  time: 0.1119  data: 0.0285  max mem: 3754
Epoch: [11]  [ 4500/12099]  eta: 0:14:10  lr: 0.000003  loss: 0.1380 (0.1421)  loss_classifier: 0.0345 (0.0462)  loss_box_reg: 0.0550 (0.0549)  loss_objectness: 0.0119 (0.0332)  loss_rpn_box_reg: 0.0024 (0.0078)  time: 0.1113  data: 0.0286  max mem: 3754
Epoch: [11]  [ 4600/12099]  eta: 0:13:59  lr: 0.000003  loss: 0.0959 (0.1419)  loss_classifier: 0.0392 (0.0461)  loss_box_reg: 0.0509 (0.0550)  loss_objectness: 0.0023 (0.0331)  loss_rpn_box_reg: 0.0017 (0.0077)  time: 0.1116  data: 0.

Epoch: [11]  [ 7600/12099]  eta: 0:08:23  lr: 0.000003  loss: 0.1256 (0.1425)  loss_classifier: 0.0387 (0.0460)  loss_box_reg: 0.0555 (0.0552)  loss_objectness: 0.0083 (0.0334)  loss_rpn_box_reg: 0.0043 (0.0080)  time: 0.1141  data: 0.0306  max mem: 3754
Epoch: [11]  [ 7700/12099]  eta: 0:08:12  lr: 0.000003  loss: 0.1074 (0.1426)  loss_classifier: 0.0361 (0.0460)  loss_box_reg: 0.0414 (0.0552)  loss_objectness: 0.0068 (0.0334)  loss_rpn_box_reg: 0.0013 (0.0080)  time: 0.1138  data: 0.0307  max mem: 3754
Epoch: [11]  [ 7800/12099]  eta: 0:08:00  lr: 0.000003  loss: 0.1380 (0.1426)  loss_classifier: 0.0386 (0.0460)  loss_box_reg: 0.0343 (0.0552)  loss_objectness: 0.0043 (0.0334)  loss_rpn_box_reg: 0.0022 (0.0080)  time: 0.1117  data: 0.0279  max mem: 3754
Epoch: [11]  [ 7900/12099]  eta: 0:07:49  lr: 0.000003  loss: 0.1449 (0.1426)  loss_classifier: 0.0368 (0.0460)  loss_box_reg: 0.0489 (0.0552)  loss_objectness: 0.0167 (0.0334)  loss_rpn_box_reg: 0.0047 (0.0080)  time: 0.1124  data: 0.

Epoch: [11]  [10900/12099]  eta: 0:02:14  lr: 0.000003  loss: 0.1257 (0.1424)  loss_classifier: 0.0464 (0.0462)  loss_box_reg: 0.0486 (0.0551)  loss_objectness: 0.0103 (0.0332)  loss_rpn_box_reg: 0.0021 (0.0079)  time: 0.1246  data: 0.0357  max mem: 3754
Epoch: [11]  [11000/12099]  eta: 0:02:03  lr: 0.000003  loss: 0.1167 (0.1424)  loss_classifier: 0.0425 (0.0462)  loss_box_reg: 0.0510 (0.0551)  loss_objectness: 0.0036 (0.0332)  loss_rpn_box_reg: 0.0029 (0.0079)  time: 0.1172  data: 0.0324  max mem: 3754
Epoch: [11]  [11100/12099]  eta: 0:01:52  lr: 0.000003  loss: 0.1256 (0.1424)  loss_classifier: 0.0417 (0.0462)  loss_box_reg: 0.0535 (0.0551)  loss_objectness: 0.0074 (0.0332)  loss_rpn_box_reg: 0.0028 (0.0079)  time: 0.1239  data: 0.0345  max mem: 3754
Epoch: [11]  [11200/12099]  eta: 0:01:40  lr: 0.000003  loss: 0.1480 (0.1425)  loss_classifier: 0.0495 (0.0462)  loss_box_reg: 0.0681 (0.0551)  loss_objectness: 0.0130 (0.0332)  loss_rpn_box_reg: 0.0016 (0.0079)  time: 0.1204  data: 0.

Epoch: [12]  [ 1900/12099]  eta: 0:20:12  lr: 0.000003  loss: 0.1093 (0.1470)  loss_classifier: 0.0350 (0.0469)  loss_box_reg: 0.0370 (0.0548)  loss_objectness: 0.0260 (0.0365)  loss_rpn_box_reg: 0.0032 (0.0087)  time: 0.1268  data: 0.0372  max mem: 3754
Epoch: [12]  [ 2000/12099]  eta: 0:20:03  lr: 0.000003  loss: 0.1339 (0.1471)  loss_classifier: 0.0399 (0.0471)  loss_box_reg: 0.0741 (0.0551)  loss_objectness: 0.0071 (0.0362)  loss_rpn_box_reg: 0.0024 (0.0086)  time: 0.1264  data: 0.0369  max mem: 3754
Epoch: [12]  [ 2100/12099]  eta: 0:19:52  lr: 0.000003  loss: 0.1027 (0.1464)  loss_classifier: 0.0370 (0.0468)  loss_box_reg: 0.0476 (0.0550)  loss_objectness: 0.0066 (0.0360)  loss_rpn_box_reg: 0.0027 (0.0086)  time: 0.1213  data: 0.0348  max mem: 3754
Epoch: [12]  [ 2200/12099]  eta: 0:19:41  lr: 0.000003  loss: 0.1247 (0.1463)  loss_classifier: 0.0424 (0.0469)  loss_box_reg: 0.0460 (0.0551)  loss_objectness: 0.0061 (0.0357)  loss_rpn_box_reg: 0.0024 (0.0086)  time: 0.1164  data: 0.

Epoch: [12]  [ 5200/12099]  eta: 0:13:53  lr: 0.000003  loss: 0.1360 (0.1439)  loss_classifier: 0.0422 (0.0468)  loss_box_reg: 0.0499 (0.0556)  loss_objectness: 0.0102 (0.0336)  loss_rpn_box_reg: 0.0029 (0.0078)  time: 0.1126  data: 0.0306  max mem: 3754
Epoch: [12]  [ 5300/12099]  eta: 0:13:40  lr: 0.000003  loss: 0.1409 (0.1440)  loss_classifier: 0.0422 (0.0468)  loss_box_reg: 0.0371 (0.0556)  loss_objectness: 0.0395 (0.0337)  loss_rpn_box_reg: 0.0056 (0.0078)  time: 0.1123  data: 0.0301  max mem: 3754
Epoch: [12]  [ 5400/12099]  eta: 0:13:27  lr: 0.000003  loss: 0.1172 (0.1437)  loss_classifier: 0.0372 (0.0467)  loss_box_reg: 0.0404 (0.0555)  loss_objectness: 0.0070 (0.0337)  loss_rpn_box_reg: 0.0026 (0.0079)  time: 0.1134  data: 0.0310  max mem: 3754
Epoch: [12]  [ 5500/12099]  eta: 0:13:14  lr: 0.000003  loss: 0.1217 (0.1439)  loss_classifier: 0.0383 (0.0467)  loss_box_reg: 0.0519 (0.0555)  loss_objectness: 0.0092 (0.0338)  loss_rpn_box_reg: 0.0043 (0.0079)  time: 0.1133  data: 0.

In [None]:
torch.version.cuda

In [None]:
torch.version.git_version

In [None]:
torchvision.version.git_version