In [3]:
import torch
import detectron2
from detectron2.utils.logger import setup_logger

print(torch.version.cuda)
setup_logger()
print("Detectron2 is working!")


12.8
Detectron2 is working!


In [4]:
from detectron2.data import MetadataCatalog, DatasetCatalog

# Unregister the dataset if it's already registered
for d in ["food_train", "food_val"]:
    if d in DatasetCatalog.list():
        DatasetCatalog.remove(d)
        MetadataCatalog.remove(d)

# Now register again with correct paths
from detectron2.data.datasets import register_coco_instances

register_coco_instances("food_train", {}, r"dataset/annotation/train_annotation.json", "dataset/train")
register_coco_instances("food_val", {}, r"dataset/annotation/valid_annotations.json", "dataset/valid")


In [5]:
from detectron2.config import get_cfg
from detectron2 import model_zoo
import os

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml"))  # change here
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml")
cfg.DATASETS.TRAIN = ("food_train",)
cfg.DATASETS.TEST = ("food_val",)
cfg.DATALOADER.NUM_WORKERS = 4
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 3000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 15  # your number of food classes

cfg.OUTPUT_DIR = "./output_R50_DC5"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)


In [6]:
from detectron2.engine import DefaultTrainer

trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()


[32m[05/14 10:37:40 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): ResNet(
    (stem): BasicStem(
      (conv1): Conv2d(
        3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
        (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
      )
    )
    (res2): Sequential(
      (0): BottleneckBlock(
        (shortcut): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
        )
        (conv1): Conv2d(
          64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv2): Conv2d(
          64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv3): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=256, eps

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (16, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (16,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (60, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (60,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (15, 256, 1, 

[32m[05/14 10:37:41 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[05/14 10:37:57 d2.utils.events]: [0m eta: 0:22:25  iter: 19  total_loss: 4.325  loss_cls: 2.683  loss_box_reg: 0.7886  loss_mask: 0.6918  loss_rpn_cls: 0.1082  loss_rpn_loc: 0.1195    time: 0.4836  last_time: 0.4172  data_time: 0.2573  last_data_time: 0.0010   lr: 4.9953e-06  max_mem: 4110M
[32m[05/14 10:38:07 d2.utils.events]: [0m eta: 0:22:15  iter: 39  total_loss: 4.081  loss_cls: 2.543  loss_box_reg: 0.6535  loss_mask: 0.6907  loss_rpn_cls: 0.06802  loss_rpn_loc: 0.1042    time: 0.4823  last_time: 0.4145  data_time: 0.0012  last_data_time: 0.0014   lr: 9.9902e-06  max_mem: 4110M
[32m[05/14 10:38:16 d2.utils.events]: [0m eta: 0:22:05  iter: 59  total_loss: 4.107  loss_cls: 2.263  loss_box_reg: 0.7486  loss_mask: 0.6883  loss_rpn_cls: 0.08375  loss_rpn_loc: 0.106    time: 0.4779  last_time: 0.3780  data_time: 0.0011  last_data_time: 0.0009   lr: 1.4985e-05  max_mem: 4110M
[32m[05/14 10:38:25 d2.utils.events]: [0m eta: 0:21:44  iter: 79  total_loss: 3.478  loss_cls: 1.799

In [7]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

evaluator = COCOEvaluator("food_val", cfg, False, output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, "food_val")
print(inference_on_dataset(trainer.model, val_loader, evaluator))


[32m[05/14 11:03:39 d2.data.datasets.coco]: [0mLoaded 300 images in COCO format from dataset/annotation/valid_annotations.json
[32m[05/14 11:03:39 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[05/14 11:03:39 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[05/14 11:03:39 d2.data.common]: [0mSerializing 300 elements to byte tensors and concatenating them all ...
[32m[05/14 11:03:39 d2.data.common]: [0mSerialized dataset takes 0.36 MiB
[32m[05/14 11:03:39 d2.evaluation.evaluator]: [0mStart inference on 300 batches
[32m[05/14 11:03:49 d2.evaluation.evaluator]: [0mInference done 11/300. Dataloading: 0.0007 s/iter. Inference: 0.1341 s/iter. Eval: 0.0107 s/iter. Total: 0.1455 s/iter. ETA=0:00:42
[32m[05/14 11:03:54 d2.evaluation.evaluator]: [0mInference done 46/300. Dataloading: 0.0008 s/iter.