In [1]:
import torch
import detectron2
from detectron2.utils.logger import setup_logger

print(torch.version.cuda)
setup_logger()
print("Detectron2 is working!")


12.8
Detectron2 is working!


In [2]:
from detectron2.data import MetadataCatalog, DatasetCatalog

# Unregister the dataset if it's already registered
for d in ["food_train", "food_val"]:
    if d in DatasetCatalog.list():
        DatasetCatalog.remove(d)
        MetadataCatalog.remove(d)

# Now register again with correct paths
from detectron2.data.datasets import register_coco_instances

register_coco_instances("food_train", {}, r"dataset/annotation/train_annotation.json", "dataset/train")
register_coco_instances("food_val", {}, r"dataset/annotation/valid_annotations.json", "dataset/valid")


In [3]:
from detectron2.config import get_cfg
from detectron2 import model_zoo
import os

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))  # change here
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("food_train",)
cfg.DATASETS.TEST = ("food_val",)
cfg.DATALOADER.NUM_WORKERS = 4
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 3000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 15  # your number of food classes

cfg.OUTPUT_DIR = "./output_R101"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)


In [4]:
from detectron2.engine import DefaultTrainer

trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()


[32m[05/14 09:42:31 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

model_final_a3ec72.pkl: 254MB [00:10, 24.0MB/s]                              
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (16, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (16,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (60, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (60,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model d

[32m[05/14 09:42:43 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[05/14 09:42:58 d2.utils.events]: [0m eta: 0:18:37  iter: 19  total_loss: 4.143  loss_cls: 2.744  loss_box_reg: 0.6613  loss_mask: 0.6933  loss_rpn_cls: 0.012  loss_rpn_loc: 0.01149    time: 0.3724  last_time: 0.3099  data_time: 0.2764  last_data_time: 0.0016   lr: 4.9953e-06  max_mem: 2254M
[32m[05/14 09:43:06 d2.utils.events]: [0m eta: 0:18:38  iter: 39  total_loss: 3.889  loss_cls: 2.591  loss_box_reg: 0.5992  loss_mask: 0.6944  loss_rpn_cls: 0.009766  loss_rpn_loc: 0.009509    time: 0.3791  last_time: 0.3348  data_time: 0.0013  last_data_time: 0.0011   lr: 9.9902e-06  max_mem: 2310M
[32m[05/14 09:43:14 d2.utils.events]: [0m eta: 0:18:15  iter: 59  total_loss: 3.728  loss_cls: 2.308  loss_box_reg: 0.6941  loss_mask: 0.6895  loss_rpn_cls: 0.01111  loss_rpn_loc: 0.01469    time: 0.3709  last_time: 0.3607  data_time: 0.0012  last_data_time: 0.0012   lr: 1.4985e-05  max_mem: 2310M
[32m[05/14 09:43:21 d2.utils.events]: [0m eta: 0:17:50  iter: 79  total_loss: 3.194  loss_cls: 

In [5]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

evaluator = COCOEvaluator("food_val", cfg, False, output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, "food_val")
print(inference_on_dataset(trainer.model, val_loader, evaluator))


[32m[05/14 10:03:55 d2.data.datasets.coco]: [0mLoaded 300 images in COCO format from dataset/annotation/valid_annotations.json
[32m[05/14 10:03:55 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[05/14 10:03:55 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[05/14 10:03:55 d2.data.common]: [0mSerializing 300 elements to byte tensors and concatenating them all ...
[32m[05/14 10:03:55 d2.data.common]: [0mSerialized dataset takes 0.36 MiB
[32m[05/14 10:03:55 d2.evaluation.evaluator]: [0mStart inference on 300 batches
[32m[05/14 10:04:01 d2.evaluation.evaluator]: [0mInference done 11/300. Dataloading: 0.0005 s/iter. Inference: 0.0947 s/iter. Eval: 0.0042 s/iter. Total: 0.0995 s/iter. ETA=0:00:28
[32m[05/14 10:04:06 d2.evaluation.evaluator]: [0mInference done 51/300. Dataloading: 0.0009 s/iter.