In [1]:
!pip install -q pycocotools

In [2]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import torchvision
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.transforms import ToTensor, Compose, Resize, Normalize
from torchvision.transforms import functional as TF
from torchvision.ops import box_iou

import numpy as np
import math
from PIL import Image, ImageDraw
import os
import json
import random
from tqdm import tqdm

from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import pycocotools.mask as mask_util

In [3]:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [4]:
!rm -rf /kaggle/working/cityscapes
!mkdir -p /kaggle/working/cityscapes/leftImg8bit/train
!mkdir -p /kaggle/working/cityscapes/gtFine/train
!mkdir -p /kaggle/working/cityscapes/leftImg8bit/val
!mkdir -p /kaggle/working/cityscapes/gtFine/val


In [5]:
!cp -r /kaggle/input/cityscapes/leftImg8bit_trainvaltest/leftImg8bit/train/aachen /kaggle/working/cityscapes/leftImg8bit/train/
!cp -r /kaggle/input/cityscapes/gtFine_trainvaltest/gtFine/train/aachen /kaggle/working/cityscapes/gtFine/train/
!cp -r /kaggle/input/cityscapes/leftImg8bit_trainvaltest/leftImg8bit/train/bremen /kaggle/working/cityscapes/leftImg8bit/train/
!cp -r /kaggle/input/cityscapes/gtFine_trainvaltest/gtFine/train/bremen /kaggle/working/cityscapes/gtFine/train/
!cp -r /kaggle/input/cityscapes/leftImg8bit_trainvaltest/leftImg8bit/train/hamburg /kaggle/working/cityscapes/leftImg8bit/train/
!cp -r /kaggle/input/cityscapes/gtFine_trainvaltest/gtFine/train/hamburg /kaggle/working/cityscapes/gtFine/train/

# Sao chép toàn bộ tập validation
!cp -r /kaggle/input/cityscapes/leftImg8bit_trainvaltest/leftImg8bit/val/* /kaggle/working/cityscapes/leftImg8bit/val/
!cp -r /kaggle/input/cityscapes/gtFine_trainvaltest/gtFine/val/* /kaggle/working/cityscapes/gtFine/val/

In [6]:
CITYSCAPES_CLASSES = [
    '__background__', 'person', 'rider', 'car', 'truck', 'bus', 'train', 
    'motorcycle', 'bicycle'
]

In [7]:
def class_name_to_id(class_name):
    return CITYSCAPES_CLASSES.index(class_name) if class_name in CITYSCAPES_CLASSES else -1

In [8]:
class CityscapesDataset(Dataset):
    def __init__(self, root_dir, set_type='train', image_size=(512, 1024)):
        self.root_dir = root_dir
        self.set_type = set_type
        self.image_size = image_size
        self.img_dir = os.path.join(root_dir, 'leftImg8bit', set_type)
        self.ann_dir = os.path.join(root_dir, 'gtFine', set_type)
        
        self.img_paths = []
        for city in sorted(os.listdir(self.img_dir)):
            city_path = os.path.join(self.img_dir, city)
            for img_file in sorted(os.listdir(city_path)):
                if img_file.endswith('_leftImg8bit.png'):
                    self.img_paths.append(os.path.join(city_path, img_file))
        
        print(f"Loaded {len(self.img_paths)} {set_type} images.")

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, index):
        img_path = self.img_paths[index]
        ann_path = img_path.replace('_leftImg8bit.png', '_gtFine_polygons.json').replace('leftImg8bit', 'gtFine')
        
        img = Image.open(img_path).convert("RGB")
        original_w, original_h = img.size
        
        with open(ann_path, 'r') as f:
            ann = json.load(f)

        boxes, labels, masks = [], [], []
        for obj in ann.get('objects', []):
            label_id = class_name_to_id(obj.get('label'))
            if label_id == -1: continue

            poly = obj.get('polygon')
            if not poly or len(poly) < 3: continue

            poly_flat = [coord for point in poly for coord in point]
            x_coords, y_coords = poly_flat[0::2], poly_flat[1::2]
            x_min, y_min = min(x_coords), min(y_coords)
            x_max, y_max = max(x_coords), max(y_coords)

            if x_max <= x_min or y_max <= y_min: continue
            
            boxes.append([x_min, y_min, x_max, y_max])
            labels.append(label_id)
            
            mask = Image.new('L', (original_w, original_h), 0)
            ImageDraw.Draw(mask).polygon([tuple(p) for p in poly], outline=1, fill=1)
            masks.append(np.array(mask))

        if not boxes:
            return self.__getitem__(random.randint(0, len(self) - 1))

        # --- Transforms ---
        img = TF.to_tensor(img)
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        masks = torch.as_tensor(np.array(masks), dtype=torch.uint8)

        target = {
            'boxes': boxes,
            'labels': labels,
            'masks': masks,
            'image_id': torch.tensor([index + 1]) # Khớp với COCO format (1-based)
        }
        
        return img, target

def collate_fn(batch):
    return tuple(zip(*batch))

In [9]:
def create_cityscapes_coco_json(dataset, output_file):
    coco_output = {
        "images": [],
        "annotations": [],
        "categories": [{"id": i, "name": name} for i, name in enumerate(CITYSCAPES_CLASSES) if name != '__background__']
    }
    
    ann_id_counter = 1
    for img_idx in tqdm(range(len(dataset)), desc="Creating COCO GT file"):
        img_path = dataset.img_paths[img_idx]
        with Image.open(img_path) as img:
            width, height = img.size
        
        coco_output["images"].append({
            "id": img_idx + 1,
            "width": width,
            "height": height,
            "file_name": os.path.basename(img_path)
        })
        
        _, target = dataset[img_idx]
        
        for i in range(len(target['boxes'])):
            box = target['boxes'][i].tolist()
            x1, y1, x2, y2 = box
            bbox = [x1, y1, x2 - x1, y2 - y1]
            
            annotation_info = {
                "id": ann_id_counter,
                "image_id": img_idx + 1,
                "category_id": target['labels'][i].item(),
                "bbox": bbox,
                "area": bbox[2] * bbox[3],
                "iscrowd": 0,
                "segmentation": mask_util.encode(np.asfortranarray(target['masks'][i].numpy()))
            }
            annotation_info['segmentation']['counts'] = annotation_info['segmentation']['counts'].decode('utf-8')

            coco_output["annotations"].append(annotation_info)
            ann_id_counter += 1
            
    with open(output_file, 'w') as f:
        json.dump(coco_output, f)
    print(f"COCO GT file created at {output_file}")

In [10]:
def get_model(num_classes):
    # pre-train trên COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights="MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT")

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
    
    return model

In [11]:
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50):
    model.train()
    
    lr_scheduler = None
    if epoch == 0:
        # Warmup scheduler
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)
        lr_scheduler = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=warmup_factor, total_iters=warmup_iters)

    pbar = tqdm(data_loader, desc=f"Training Epoch {epoch+1}")
    for i, (images, targets) in enumerate(pbar):
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        if not math.isfinite(losses.item()):
            print(f"Loss is {losses.item()}, stopping training")
            return

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()
        
        loss_value = losses.item()
        pbar.set_postfix(loss=f"{loss_value:.4f}")

In [12]:
def evaluate_map(model, data_loader, device, coco_gt):
    model.eval()
    coco_results = []
    
    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Evaluating for mAP"):
            images = list(img.to(device) for img in images)
            image_ids = [t['image_id'].item() for t in targets]

            outputs = model(images)
            
            for i, output in enumerate(outputs):
                boxes = output["boxes"].to("cpu")
                labels = output["labels"].to("cpu")
                scores = output["scores"].to("cpu")
                masks = output["masks"].to("cpu")

                for j in range(boxes.shape[0]):
                    box = boxes[j].tolist()
                    x1, y1, x2, y2 = box
                    
                    mask_pred = masks[j, 0] > 0.5
                    rle = mask_util.encode(np.asfortranarray(mask_pred.numpy()))
                    rle['counts'] = rle['counts'].decode('utf-8')
                    
                    result = {
                        "image_id": image_ids[i],
                        "category_id": labels[j].item(),
                        "bbox": [x1, y1, x2 - x1, y2 - y1],
                        "score": scores[j].item(),
                        "segmentation": rle,
                    }
                    coco_results.append(result)

    if not coco_results:
        return {"bbox_mAP": 0.0, "segm_mAP": 0.0}

    pred_file_path = "/kaggle/working/predictions.json"
    with open(pred_file_path, "w") as f:
        json.dump(coco_results, f)
    
    coco_dt = coco_gt.loadRes(pred_file_path)

    metrics = {}
    for iou_type in ["bbox", "segm"]:
        print(f"\n--- Evaluating {iou_type.upper()} mAP ---")
        coco_eval = COCOeval(coco_gt, coco_dt, iou_type)
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()
        metrics[f"{iou_type}_mAP"] = coco_eval.stats[0]
        
    return metrics

In [13]:
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # --- Config ---
    NUM_CLASSES = len(CITYSCAPES_CLASSES)
    DATA_ROOT = "/kaggle/working/cityscapes"
    BATCH_SIZE = 2
    NUM_EPOCHS = 10
    
    # --- Dataset & DataLoader ---
    dataset = CityscapesDataset(root_dir=DATA_ROOT, set_type='train')
    dataset_val = CityscapesDataset(root_dir=DATA_ROOT, set_type='val')
    
    data_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn, num_workers=2)
    data_loader_val = DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn, num_workers=2)

    # --- Chuẩn bị file GT cho evaluation ---
    gt_file_path = "/kaggle/working/cityscapes_val_coco.json"
    if not os.path.exists(gt_file_path):
        create_cityscapes_coco_json(dataset_val, gt_file_path)
    coco_gt = COCO(gt_file_path)
    
    # --- Model, Optimizer, Scheduler ---
    model = get_model(NUM_CLASSES)
    model.to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    # --- Vòng lặp Training & Evaluation ---
    best_map = 0.0
    for epoch in range(NUM_EPOCHS):
        train_one_epoch(model, optimizer, data_loader, device, epoch)
        lr_scheduler.step()
        
        metrics = evaluate_map(model, data_loader_val, device, coco_gt)
        bbox_map = metrics.get("bbox_mAP", 0.0)
        
        print(f"\n[Validation] Epoch {epoch+1}: Bbox mAP: {bbox_map:.4f}")
        
        if bbox_map > best_map:
            best_map = bbox_map
            torch.save(model.state_dict(), 'best_model.pth')
            print(f"Saved new best model with Bbox mAP: {best_map:.4f}")

if __name__ == "__main__":
    main()

Using device: cuda
Loaded 738 train images.
Loaded 500 val images.


Creating COCO GT file: 100%|██████████| 500/500 [02:35<00:00,  3.21it/s]


COCO GT file created at /kaggle/working/cityscapes_val_coco.json
loading annotations into memory...
Done (t=0.04s)
creating index...
index created!


Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_v2_coco-73cbd019.pth" to /root/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_v2_coco-73cbd019.pth
100%|██████████| 177M/177M [00:01<00:00, 179MB/s]
Training Epoch 1: 100%|██████████| 369/369 [05:54<00:00,  1.04it/s, loss=0.9968]
Evaluating for mAP: 100%|██████████| 250/250 [13:02<00:00,  3.13s/it]


Loading and preparing results...
DONE (t=0.40s)
creating index...
index created!

--- Evaluating BBOX mAP ---
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=6.32s).
Accumulating evaluation results...
DONE (t=0.54s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.280
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.532
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.257
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.080
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.222
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.416
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.211
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.379
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.409
 Average Recall     (AR) @[ IoU=0.50:0.

Training Epoch 2: 100%|██████████| 369/369 [05:54<00:00,  1.04it/s, loss=0.9453]
Evaluating for mAP: 100%|██████████| 250/250 [12:37<00:00,  3.03s/it]


Loading and preparing results...
DONE (t=0.40s)
creating index...
index created!

--- Evaluating BBOX mAP ---
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=5.74s).
Accumulating evaluation results...
DONE (t=0.47s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.292
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.539
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.280
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.091
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.252
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.438
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.215
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.397
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.430
 Average Recall     (AR) @[ IoU=0.50:0.

Training Epoch 3: 100%|██████████| 369/369 [05:54<00:00,  1.04it/s, loss=1.0002]
Evaluating for mAP: 100%|██████████| 250/250 [10:55<00:00,  2.62s/it]


Loading and preparing results...
DONE (t=0.37s)
creating index...
index created!

--- Evaluating BBOX mAP ---
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=5.30s).
Accumulating evaluation results...
DONE (t=0.43s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.316
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.556
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.311
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.089
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.294
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.456
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.229
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.410
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.442
 Average Recall     (AR) @[ IoU=0.50:0.

Training Epoch 4: 100%|██████████| 369/369 [05:55<00:00,  1.04it/s, loss=0.4258]
Evaluating for mAP: 100%|██████████| 250/250 [09:31<00:00,  2.29s/it]


Loading and preparing results...
DONE (t=0.35s)
creating index...
index created!

--- Evaluating BBOX mAP ---
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=5.02s).
Accumulating evaluation results...
DONE (t=0.38s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.341
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.578
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.348
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.091
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.299
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.495
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.245
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.427
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.457
 Average Recall     (AR) @[ IoU=0.50:0.

Training Epoch 5: 100%|██████████| 369/369 [05:55<00:00,  1.04it/s, loss=1.0316]
Evaluating for mAP: 100%|██████████| 250/250 [09:09<00:00,  2.20s/it]


Loading and preparing results...
DONE (t=0.30s)
creating index...
index created!

--- Evaluating BBOX mAP ---
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=4.63s).
Accumulating evaluation results...
DONE (t=0.38s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.349
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.594
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.354
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.087
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.302
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.503
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.253
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.433
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.463
 Average Recall     (AR) @[ IoU=0.50:0.

Training Epoch 6: 100%|██████████| 369/369 [05:55<00:00,  1.04it/s, loss=0.8052]
Evaluating for mAP: 100%|██████████| 250/250 [08:58<00:00,  2.15s/it]


Loading and preparing results...
DONE (t=0.33s)
creating index...
index created!

--- Evaluating BBOX mAP ---
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=4.66s).
Accumulating evaluation results...
DONE (t=0.39s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.347
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.585
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.348
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.090
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.288
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.505
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.253
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.434
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.464
 Average Recall     (AR) @[ IoU=0.50:0.

Training Epoch 7: 100%|██████████| 369/369 [05:55<00:00,  1.04it/s, loss=0.5552]
Evaluating for mAP: 100%|██████████| 250/250 [08:55<00:00,  2.14s/it]


Loading and preparing results...
DONE (t=0.37s)
creating index...
index created!

--- Evaluating BBOX mAP ---
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=4.70s).
Accumulating evaluation results...
DONE (t=0.38s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.351
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.593
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.356
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.092
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.289
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.514
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.257
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.441
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.470
 Average Recall     (AR) @[ IoU=0.50:0.

Training Epoch 8: 100%|██████████| 369/369 [05:55<00:00,  1.04it/s, loss=0.5556]
Evaluating for mAP: 100%|██████████| 250/250 [08:39<00:00,  2.08s/it]


Loading and preparing results...
DONE (t=0.33s)
creating index...
index created!

--- Evaluating BBOX mAP ---
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=4.57s).
Accumulating evaluation results...
DONE (t=0.37s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.347
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.589
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.349
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.087
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.300
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.505
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.249
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.432
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.460
 Average Recall     (AR) @[ IoU=0.50:0.

Training Epoch 9: 100%|██████████| 369/369 [05:55<00:00,  1.04it/s, loss=0.8996]
Evaluating for mAP: 100%|██████████| 250/250 [08:33<00:00,  2.05s/it]


Loading and preparing results...
DONE (t=0.33s)
creating index...
index created!

--- Evaluating BBOX mAP ---
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=4.59s).
Accumulating evaluation results...
DONE (t=0.37s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.350
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.589
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.355
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.087
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.298
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.513
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.253
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.431
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.460
 Average Recall     (AR) @[ IoU=0.50:0.

Training Epoch 10: 100%|██████████| 369/369 [05:56<00:00,  1.04it/s, loss=0.5981]
Evaluating for mAP: 100%|██████████| 250/250 [08:47<00:00,  2.11s/it]


Loading and preparing results...
DONE (t=0.29s)
creating index...
index created!

--- Evaluating BBOX mAP ---
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=4.65s).
Accumulating evaluation results...
DONE (t=0.38s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.351
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.592
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.355
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.089
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.291
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.515
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.257
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.435
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.464
 Average Recall     (AR) @[ IoU=0.50:0.