In [2]:
#Training
import detectron2
import torch
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.logger import setup_logger
import os

# Setup logger for better visibility
setup_logger()

# Step 1: Register the Dataset (COCO Format)
def register_datasets():
    # Replace these paths with your dataset paths
    train_json = "dataset/train/annotations.json"
    train_images = "dataset/train/images"
    val_json = "dataset/val/annotations.json"
    val_images = "dataset/val/images"

    register_coco_instances("my_train_dataset", {}, train_json, train_images)
    register_coco_instances("my_val_dataset", {}, val_json, val_images)

    # Print metadata for debugging
    print("Registered Datasets:")
    print(MetadataCatalog.get("my_train_dataset"))
    print(MetadataCatalog.get("my_val_dataset"))

# Step 2: Define the Training Function
def train_model():
    cfg = get_cfg()
    
    # Load a pre-defined configuration file
    cfg.merge_from_file("config/pothole_detection_config.yaml")

    # Step 3: Dataset configuration
    cfg.DATASETS.TRAIN = ("my_train_dataset",)
    cfg.DATASETS.TEST = ("my_val_dataset",)
    cfg.DATALOADER.NUM_WORKERS = 2

    # Step 4: Training parameters
    #cfg.MODEL.WEIGHTS = "detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849600/model_final_280758.pkl"
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.0001
    cfg.SOLVER.MAX_ITER = 1800  # Adjust for quick tests
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
    
    cfg.SOLVER.STEPS = (1000, 2000)
    cfg.SOLVER.GAMMA = 0.1

    # Enable gradient clipping
    cfg.SOLVER.CLIP_GRADIENTS.ENABLE = True
    cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
    cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
    
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # Set according to your dataset

    # Step 5: Output Directory
    cfg.OUTPUT_DIR = "./output"

    # Step 6: Force CPU usage
    cfg.MODEL.DEVICE = "cuda"

    # Create the output directory if it doesn't exist
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

    # Step 7: Initialize the Trainer
    trainer = DefaultTrainer(cfg)

    # Resume training from a checkpoint if available
    trainer.resume_or_load(resume=False)

    # Start training
    trainer.train()

# Main Execution
if __name__ == "__main__":
    # Step 1: Register datasets
    register_datasets()

    # Step 2: Train the model
    train_model()



Registered Datasets:
Metadata(name='my_train_dataset', json_file='dataset/train/annotations.json', image_root='dataset/train/images', evaluator_type='coco', thing_classes=['pothole'], thing_dataset_id_to_contiguous_id={1: 0})
Metadata(name='my_val_dataset', json_file='dataset/val/annotations.json', image_root='dataset/val/images', evaluator_type='coco')
[32m[01/18 11:54:24 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): ResNet(
    (stem): BasicStem(
      (conv1): Conv2d(
        3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
        (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
      )
    )
    (res2): Sequential(
      (0): BottleneckBlock(
        (shortcut): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
        )
        (conv1): Conv2d(
          64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_feat

[32m[01/18 11:54:24 d2.data.datasets.coco]: [0mLoaded 80 images in COCO format from dataset/train/annotations.json
[32m[01/18 11:54:24 d2.data.build]: [0mRemoved 1 images with no usable annotations. 79 images left.
[32m[01/18 11:54:24 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(800,), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[01/18 11:54:24 d2.data.build]: [0mUsing training sampler TrainingSampler
[32m[01/18 11:54:24 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[01/18 11:54:24 d2.data.common]: [0mSerializing 79 elements to byte tensors and concatenating them all ...
[32m[01/18 11:54:24 d2.data.common]: [0mSerialized dataset takes 0.02 MiB
[32m[01/18 11:54:24 d2.data.build]: [0mMaking batched data loader with batch_size=4
[32m[01/18 11:54:24 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from  ..

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[01/18 11:54:41 d2.utils.events]: [0m eta: 0:17:10  iter: 19  total_loss: 86.47  loss_cls: 55.12  loss_box_reg: 10.26  loss_rpn_cls: 12.12  loss_rpn_loc: 18.22    time: 0.5666  last_time: 0.6951  data_time: 0.2092  last_data_time: 0.0035   lr: 2.881e-07  max_mem: 2409M

[32m[01/18 11:55:10 d2.utils.events]: [0m eta: 0:20:26  iter: 39  total_loss: 20.53  loss_cls: 11.24  loss_box_reg: 1.34  loss_rpn_cls: 3.399  loss_rpn_loc: 5.146    time: 0.6441  last_time: 0.7008  data_time: 0.0033  last_data_time: 0.0036   lr: 4.861e-07  max_mem: 2409M
[32m[01/18 11:55:24 d2.utils.events]: [0m eta: 0:20:18  iter: 59  total_loss: 12.9  loss_cls: 7.65  loss_box_reg: 0.6316  loss_rpn_cls: 1.397  loss_rpn_loc: 2.95    time: 0.6648  last_time: 0.7081  data_time: 0.0033  last_data_time: 0.0033   lr: 6.841e-07  max_mem: 2409M
[32m[01/18 11:55:39 d2.utils.events]: [0m eta: 0:20:07  iter: 79  total_loss: 10.25  loss_cls: 6.466  loss_box_reg: 0.3871  loss_rpn_cls: 1.355  loss_rpn_loc: 2.446    time

[32m[01/18 12:01:55 d2.utils.events]: [0m eta: 0:14:24  iter: 599  total_loss: 1.952  loss_cls: 0.2933  loss_box_reg: 0.4659  loss_rpn_cls: 0.3032  loss_rpn_loc: 0.5419    time: 0.7163  last_time: 0.7240  data_time: 0.0036  last_data_time: 0.0033   lr: 6.0301e-06  max_mem: 2409M
[32m[01/18 12:02:09 d2.utils.events]: [0m eta: 0:14:10  iter: 619  total_loss: 1.485  loss_cls: 0.4414  loss_box_reg: 0.3399  loss_rpn_cls: 0.2948  loss_rpn_loc: 0.479    time: 0.7168  last_time: 0.7251  data_time: 0.0033  last_data_time: 0.0072   lr: 6.2281e-06  max_mem: 2409M
[32m[01/18 12:02:24 d2.utils.events]: [0m eta: 0:13:56  iter: 639  total_loss: 1.273  loss_cls: 0.3103  loss_box_reg: 0.2652  loss_rpn_cls: 0.2755  loss_rpn_loc: 0.3933    time: 0.7173  last_time: 0.7286  data_time: 0.0034  last_data_time: 0.0032   lr: 6.4261e-06  max_mem: 2409M
[32m[01/18 12:02:39 d2.utils.events]: [0m eta: 0:13:42  iter: 659  total_loss: 1.799  loss_cls: 0.4556  loss_box_reg: 0.3472  loss_rpn_cls: 0.3007  loss_

[32m[01/18 12:09:15 d2.utils.events]: [0m eta: 0:07:17  iter: 1199  total_loss: 1.088  loss_cls: 0.2575  loss_box_reg: 0.3777  loss_rpn_cls: 0.2053  loss_rpn_loc: 0.2448    time: 0.7252  last_time: 0.7351  data_time: 0.0036  last_data_time: 0.0039   lr: 1e-05  max_mem: 2409M
[32m[01/18 12:09:30 d2.utils.events]: [0m eta: 0:07:03  iter: 1219  total_loss: 0.9042  loss_cls: 0.1732  loss_box_reg: 0.3464  loss_rpn_cls: 0.1952  loss_rpn_loc: 0.2144    time: 0.7253  last_time: 0.7293  data_time: 0.0035  last_data_time: 0.0034   lr: 1e-05  max_mem: 2409M
[32m[01/18 12:09:45 d2.utils.events]: [0m eta: 0:06:49  iter: 1239  total_loss: 1.416  loss_cls: 0.2223  loss_box_reg: 0.4248  loss_rpn_cls: 0.2631  loss_rpn_loc: 0.4314    time: 0.7256  last_time: 0.7354  data_time: 0.0032  last_data_time: 0.0033   lr: 1e-05  max_mem: 2409M
[32m[01/18 12:10:00 d2.utils.events]: [0m eta: 0:06:34  iter: 1259  total_loss: 1.157  loss_cls: 0.1866  loss_box_reg: 0.4705  loss_rpn_cls: 0.1873  loss_rpn_loc: 

[32m[01/18 12:16:38 d2.utils.events]: [0m eta: 0:00:00  iter: 1799  total_loss: 1.174  loss_cls: 0.1911  loss_box_reg: 0.4861  loss_rpn_cls: 0.1945  loss_rpn_loc: 0.209    time: 0.7293  last_time: 0.7275  data_time: 0.0034  last_data_time: 0.0027   lr: 1e-05  max_mem: 2409M
[32m[01/18 12:16:38 d2.engine.hooks]: [0mOverall training speed: 1798 iterations in 0:21:51 (0.7293 s / it)
[32m[01/18 12:16:38 d2.engine.hooks]: [0mTotal training time: 0:22:08 (0:00:16 on hooks)
[32m[01/18 12:16:38 d2.data.datasets.coco]: [0mLoaded 5 images in COCO format from dataset/val/annotations.json
[32m[01/18 12:16:38 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|  pothole   | 5            |
|            |              |[0m
[32m[01/18 12:16:38 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
