In [2]:
!pip install ultralytics
!pip install torch torchvision
!pip install numpy pandas matplotlib
!pip install pycocotools

Collecting ultralytics
  Downloading ultralytics-8.3.197-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.17-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.197-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.17-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.197 ultralytics-thop-2.0.17


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import torch
from ultralytics import YOLO
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np
import pandas as pd


#Benchmarking YOLO Models (YOLOv8, RT-DETR, YOLOv11n)


def benchmark_yolo_models():
    
    models ={
        "YOLOv8": "yolov8n.pt",
        "RT-DETR": "rtdetr-l.pt",
        "YOLOv11n": "yolo11n.pt"
    }

    results = {}

    for model_name, model_weights in models.items():
        print(f"--- Benchmarking {model_name} ---")

        model = YOLO(model_weights)

        model.train(data="brick-kilns.yaml", epochs=30, imgsz=640, batch=16, name=f"{model_name}_training")

        metrics = model.val()

        results[model_name] = {
            "Precision": metrics.box.map50,
            "Recall": metrics.box.r,
            "F1-Score": 2 * (metrics.box.map50 * metrics.box.r) / (metrics.box.map50 + metrics.box.r),
            "mAP-50": metrics.box.map50,
            "mAP50-95": metrics.box.map
        }

    return results


yolo_results = benchmark_yolo_models()


all_results ={}
all_results.update(yolo_results)

df = pd.DataFrame.from_dict(all_results, orient='index')
print("\n--- Benchmark Results ---")
df

--- Benchmarking YOLOv8 ---
Ultralytics 8.3.196 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=brick-kilns.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=YOLOv8_training3, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K       1/30      12.4G      1.404      13.05     0.5925         13        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.9s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.2s
                   all         61        125      0.806      0.228      0.198     0.0819

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K       2/30      12.8G     0.7155      0.686     0.1778         23        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 33.9s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.6it/s 1.2s
                   all         61        125       0.93      0.302      0.304       0.15

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K       3/30      12.5G     0.6974     0.6137     0.1694          8        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.1s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.885      0.217      0.205     0.0765

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K       4/30      13.1G     0.6029     0.5736     0.1421         11        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.935      0.278      0.276      0.146

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K       5/30      12.7G     0.6128     0.5611      0.142          8        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.908      0.284       0.27       0.14

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K       6/30      12.5G     0.5721     0.5694     0.1355          6        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.1s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.251      0.296      0.278      0.144

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K       7/30      12.7G     0.5982      0.567     0.1396         11        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.3s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.2s
                   all         61        125      0.588       0.12      0.125     0.0672

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K       8/30      12.8G     0.5666      0.556     0.1317          7        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 33.7s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.2s
                   all         61        125      0.928      0.301      0.293      0.157

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K       9/30      12.9G     0.5697     0.5325     0.1353         12        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 33.4s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.934       0.31      0.305      0.167

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      10/30      12.6G     0.5503      0.532     0.1251         12        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.1s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.924      0.316      0.292      0.175

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      11/30      12.8G     0.5064     0.5127      0.112          5        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.1s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.927      0.309      0.288      0.171

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      12/30      12.8G     0.4913     0.5377     0.1076         10        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 33.4s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.2s
                   all         61        125      0.601      0.306      0.295      0.177

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      13/30      12.9G     0.4966     0.5236     0.1123          7        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 33.5s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.934      0.309        0.3      0.173

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      14/30      12.5G      0.492     0.4978     0.1132         11        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.2s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.2s
                   all         61        125      0.937      0.307      0.305      0.176

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      15/30      12.6G     0.4495      0.489    0.09948         11        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 33.6s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.1s
                   all         61        125      0.942      0.312      0.301      0.188

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      16/30      12.8G     0.4422     0.5051    0.09901          9        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.3s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.932      0.309      0.298      0.189

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      17/30      12.7G      0.479     0.5193     0.1087         23        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.3s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.605      0.343      0.299      0.186

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      18/30      12.8G     0.4477     0.4778    0.09913         12        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.3s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.933      0.293      0.296      0.174

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      19/30      12.5G     0.4523     0.4766    0.09507         16        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.1s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.938      0.302      0.301      0.191

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      20/30        13G     0.4558     0.4852     0.1049          8        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.649      0.341      0.314      0.198
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      21/30        13G     0.4242     0.4772     0.1083          6        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 35.1s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.2s
                   all         61        125      0.663      0.357      0.311      0.183

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      22/30      12.5G     0.3939     0.4673      0.099          6        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 33.9s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.2s
                   all         61        125      0.689       0.35      0.316      0.211

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      23/30      12.8G     0.4002     0.4646      0.102          9        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.1s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.809      0.359      0.347      0.218

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      24/30      12.8G      0.367      0.458    0.09174          5        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.1s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.2s
                   all         61        125      0.669      0.379      0.315      0.216

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      25/30      12.8G     0.3744      0.449    0.09265          8        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 33.8s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.732       0.41      0.382       0.25

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      26/30      12.8G     0.3824     0.4386     0.1003          7        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.2s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125       0.68       0.42      0.353      0.244

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      27/30      12.8G     0.3487      0.433    0.08827          9        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.3s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.716      0.451      0.367      0.248

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      28/30      12.9G     0.3569     0.4219    0.08583          5        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.2s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.8it/s 1.1s
                   all         61        125      0.642      0.359      0.308       0.21

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      29/30      12.8G     0.3517     0.4279    0.09102          8        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.2s
                   all         61        125      0.647      0.394      0.307      0.211

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[K      30/30      12.8G     0.3233     0.4137    0.08148         10        640: 100% ━━━━━━━━━━━━ 31/31 0.9it/s 34.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.7it/s 1.2s
                   all         61        125      0.689       0.41      0.346      0.233

30 epochs completed in 0.350 hours.
Optimizer stripped from /content/runs/detect/RT-DETR_training3/weights/last.pt, 66.1MB
Optimizer stripped from /content/runs/detect/RT-DETR_training3/weights/best.pt, 66.1MB

Validating /content/runs/detect/RT-DETR_training3/weights/best.pt...
Ultralytics 8.3.196 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
rt-detr-l summary: 302 layers, 31,989,905 parameters, 0 gradients, 103.4 GFLOPs
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 1.3it/s 1.6s
                   all         61        125      0.732       0.41      0.382       0.25
  

Unnamed: 0,Precision,Recall,F1-Score,mAP-50,mAP50-95
YOLOv8,0.387609,"[0.0, 0.2, 0.9814814814814815]","[0.0, 0.2638550587633776, 0.5557428059580319]",0.387609,0.252687
RT-DETR,0.382352,"[0.0, 0.26666666666666666, 0.9629629629629629]","[0.0, 0.3141992008406139, 0.5473678294545244]",0.382352,0.248357
YOLOv11n,0.367236,"[0.0, 0.2, 0.9537037037037037]","[0.0, 0.25896529035398386, 0.5302808377606685]",0.367236,0.242785


In [None]:
df

Unnamed: 0,Precision,Recall,F1-Score,mAP-50,mAP50-95
YOLOv8,0.387609,"[0.0, 0.2, 0.9814814814814815]","[0.0, 0.2638550587633776, 0.5557428059580319]",0.387609,0.252687
RT-DETR,0.382352,"[0.0, 0.26666666666666666, 0.9629629629629629]","[0.0, 0.3141992008406139, 0.5473678294545244]",0.382352,0.248357
YOLOv11n,0.367236,"[0.0, 0.2, 0.9537037037037037]","[0.0, 0.25896529035398386, 0.5302808377606685]",0.367236,0.242785


In [None]:
%%writefile yolo_to_coco.py

import os
import json
import argparse
from PIL import Image
import datetime
from tqdm import tqdm

def yolo_to_coco(image_dir, label_dir, output_json_path, class_map):
    coco_output = {
        "info": {
            "description": "Brick Kiln Dataset",
            "version": "1.0",
            "year": datetime.date.today().year,
            "date_created": datetime.datetime.utcnow().isoformat(' ')
        },
        "licenses": [],
        "images": [],
        "annotations": [],
        "categories": []
    }

    for yolo_id, coco_id in class_map.items():
        coco_output["categories"].append({
            "id": coco_id,
            "name": "brick_kiln",
            "supercategory": "none"
        })

    image_id_counter =1
    annotation_id_counter =1

    print(f"Reading images from: {image_dir}")
    print(f"Reading labels from: {label_dir}")

    for filename in tqdm(os.listdir(image_dir)):
        if not filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            continue

        image_path = os.path.join(image_dir, filename)

        try:
            with Image.open(image_path) as img:
                width, height = img.size
        except Exception as e:
            print(f"Warning: Could not open image {image_path}. Skipping. Error: {e}")
            continue

        image_info = {
            "id": image_id_counter,
            "file_name": filename,
            "width": width,
            "height": height
        }
        coco_output["images"].append(image_info)

        label_filename = os.path.splitext(filename)[0] + ".txt"
        label_path = os.path.join(label_dir, label_filename)

        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) != 5: continue

                    yolo_class_id = int(parts[0])

                    if yolo_class_id not in class_map:
                        continue

                    coco_category_id = class_map[yolo_class_id]

                    x_center_norm, y_center_norm, w_norm, h_norm = map(float, parts[1:])

                    box_w = w_norm * width
                    box_h = h_norm * height
                    x_min = (x_center_norm * width) - (box_w / 2)
                    y_min = (y_center_norm * height) - (box_h / 2)

                    annotation_info = {
                        "id": annotation_id_counter,
                        "image_id": image_id_counter,
                        "category_id": coco_category_id,
                        "bbox": [x_min, y_min, box_w, box_h],
                        "area": box_w * box_h,
                        "iscrowd": 0
                    }
                    coco_output["annotations"].append(annotation_info)
                    annotation_id_counter += 1

        image_id_counter += 1

    with open(output_json_path, 'w') as f:
        json.dump(coco_output, f, indent=4)

    print(f"\nSuccessfully created COCO annotation file at: {output_json_path}")


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Convert YOLO format annotations to COCO format.")
    parser.add_argument('--image-dir', type=str, required=True)
    parser.add_argument('--label-dir', type=str, required=True)
    parser.add_argument('--output-json', type=str, required=True)
    parser.add_argument('--class-map', type=str, required=True)

    args = parser.parse_args()

    try:
        yolo_id, coco_id = map(int, args.class_map.split(':'))
        class_map = {yolo_id: coco_id}
    except Exception as e:
        print(f"Error parsing class map: {e}")
    else:
        yolo_to_coco(args.image_dir, args.label_dir, args.output_json, class_map)

Overwriting yolo_to_coco.py


In [None]:
# Generate train.json
!python yolo_to_coco.py \
    --image-dir "/content/drive/MyDrive/BrickKiln_Split/images/train" \
    --label-dir "/content/drive/MyDrive/BrickKiln_Split/labels/train" \
    --output-json "train.json" \
    --class-map "2:1"

# Generate val.json
!python yolo_to_coco.py \
    --image-dir "/content/drive/MyDrive/BrickKiln_Split/images/val" \
    --label-dir "/content/drive/MyDrive/BrickKiln_Split/labels/val" \
    --output-json "val.json" \
    --class-map "2:1"

  "date_created": datetime.datetime.utcnow().isoformat(' ')
Reading images from: /content/drive/MyDrive/BrickKiln_Split/images/train
Reading labels from: /content/drive/MyDrive/BrickKiln_Split/labels/train
100% 484/484 [00:03<00:00, 152.32it/s]

Successfully created COCO annotation file at: train.json
  "date_created": datetime.datetime.utcnow().isoformat(' ')
Reading images from: /content/drive/MyDrive/BrickKiln_Split/images/val
Reading labels from: /content/drive/MyDrive/BrickKiln_Split/labels/val
100% 61/61 [00:00<00:00, 184.88it/s]

Successfully created COCO annotation file at: val.json


In [None]:
import torchvision
import torch
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from pycocotools.coco import COCO
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader

def get_faster_rcnn_model(num_classes):

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

def collate_fn(batch):
    return tuple(zip(*batch))

class BrickKilnDataset(CocoDetection):
    def __init__(self, root, annFile, transforms=None):
        super(BrickKilnDataset, self).__init__(root, annFile)
        self.transforms = transforms

    def __getitem__(self, idx):
       
        img, target = super(BrickKilnDataset, self).__getitem__(idx)

        img =F.to_tensor(img) #convert image to tensor

        image_id = self.ids[idx]

        if not isinstance(target, list):
            target = [target]

        boxes = [t['bbox'] for t in target if 'bbox' in t]
        # Convert bbox from [x, y, w, h] to [x1, y1, x2, y2]
        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
        boxes[:, 2:] += boxes[:, :2]

        labels = [t['category_id'] for t in target if 'category_id' in t]
        labels = torch.as_tensor(labels, dtype=torch.int64)

        new_target = {}
        new_target["boxes"] = boxes
        new_target["labels"] = labels
        new_target["image_id"] = torch.tensor([image_id])

        if self.transforms is not None:
            img, new_target = self.transforms(img, new_target)

        return img, new_target



train_dataset = BrickKilnDataset(root="/content/drive/MyDrive/BrickKiln_Split/images/train", annFile="train.json")
val_dataset = BrickKilnDataset(root="/content/drive/MyDrive/BrickKiln_Split/images/val", annFile="val.json")

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 3
model = get_faster_rcnn_model(num_classes)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

num_epochs = 30

for epoch in range(num_epochs):
    model.train()
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    lr_scheduler.step()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {losses.item()}")




loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!




Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


100%|██████████| 160M/160M [00:00<00:00, 189MB/s]


Epoch 1/30, Loss: 0.1619870811700821
Epoch 2/30, Loss: 0.23687754571437836
Epoch 3/30, Loss: 0.04576046019792557
Epoch 4/30, Loss: 0.2223426252603531
Epoch 5/30, Loss: 0.07925695180892944
Epoch 6/30, Loss: 0.13878802955150604
Epoch 7/30, Loss: 0.18031279742717743
Epoch 8/30, Loss: 0.06794573366641998
Epoch 9/30, Loss: 0.12117931246757507
Epoch 10/30, Loss: 0.08949847519397736
Epoch 11/30, Loss: 0.12521207332611084
Epoch 12/30, Loss: 0.05492835491895676
Epoch 13/30, Loss: 0.06803999841213226
Epoch 14/30, Loss: 0.050578661262989044
Epoch 15/30, Loss: 0.13178890943527222
Epoch 16/30, Loss: 0.12966273725032806
Epoch 17/30, Loss: 0.11661406606435776
Epoch 18/30, Loss: 0.12917467951774597
Epoch 19/30, Loss: 0.015752725303173065
Epoch 20/30, Loss: 0.08131719380617142
Epoch 21/30, Loss: 0.03717049956321716
Epoch 22/30, Loss: 0.03873332589864731
Epoch 23/30, Loss: 0.12875840067863464
Epoch 24/30, Loss: 0.073199562728405
Epoch 25/30, Loss: 0.0751006156206131
Epoch 26/30, Loss: 0.0558291822671890

In [None]:
from pycocotools.cocoeval import COCOeval
import pandas as pd

# Evaluation
model.eval()
coco_gt = COCO("val.json")
coco_results = []

with torch.no_grad():
  for images, targets in val_loader:
      images = list(img.to(device) for img in images)
      outputs = model(images)

      for i, output in enumerate(outputs):
          image_id = targets[i]["image_id"].item()
          for box, label, score in zip(output["boxes"], output["labels"], output["scores"]):
              if score > 0.5:
                  coco_results.append({
                      "image_id": image_id,
                      "category_id": label.item(),
                      "bbox": [box[0].item(), box[1].item(), (box[2] - box[0]).item(), (box[3] - box[1]).item()],
                      "score": score.item(),
                  })

coco_dt = coco_gt.loadRes(coco_results)
coco_eval =COCOeval(coco_gt, coco_dt, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

stats = coco_eval.stats

faster_rcnn_results = {
  "Faster R-CNN": {
      "Precision": stats[1],
      "Recall": stats[8],
      "F1-Score": 2 * (stats[1] * stats[8]) / (stats[1] + stats[8]) if (stats[1] + stats[8]) > 0 else 0,
      "mAP-50": stats[1],
      "mAP50-95": stats[0]
  }
}


fdf =pd.DataFrame.from_dict(faster_rcnn_results, orient='index')
fdf

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.02s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.567
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.885
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.661
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.567
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.335
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.680
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDe

Unnamed: 0,Precision,Recall,F1-Score,mAP-50,mAP50-95
Faster R-CNN,0.884713,0.67963,0.768728,0.884713,0.566682


In [14]:
fdf

Unnamed: 0,Precision,Recall,F1-Score,mAP-50,mAP50-95
Faster R-CNN,0.884713,0.67963,0.768728,0.884713,0.566682
