In [1]:
# Install detectron2!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

Cloning into 'detectron2'...
remote: Enumerating objects: 15900, done.[K
remote: Counting objects: 100% (113/113), done.[K
remote: Compressing objects: 100% (84/84), done.[K
remote: Total 15900 (delta 69), reused 29 (delta 29), pack-reused 15787 (from 2)[K
Receiving objects: 100% (15900/15900), 6.45 MiB | 10.90 MiB/s, done.
Resolving deltas: 100% (11568/11568), done.
Ignoring dataclasses: markers 'python_version < "3.7"' don't match your environment
Collecting yacs>=0.1.8
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting fvcore<0.1.6,>=0.1.5
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting iopath<0.1.10,>=0.1.7
  Downloading iopath-0.1.9-py3-none-any.whl.metadata (370 bytes)
Collecting hydra-core>=1.1
  Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)

In [2]:
import sys
import os
import cv2
import torch
import matplotlib.pyplot as plt

# Make sure detectron2 cloned repo is in sys.path
sys.path.insert(0, os.path.abspath('./detectron2'))

# Now import Detectron2 modules from the cloned source
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.data.datasets import register_coco_instances
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.utils.visualizer import Visualizer, ColorMode

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
from detectron2.data.datasets import register_coco_instances

# Adjust paths below to your actual folder locations
dataset_root = "/content/drive/MyDrive/zerowaste dataset" # Set your path here

# Ensure datasets are not registered multiple times by checking if they exist
if "zerowaste_train" not in DatasetCatalog.list():
    register_coco_instances(
        "zerowaste_train",
        {},
        os.path.join(dataset_root, "splits_final_deblurred/train/labels.json"),
        os.path.join(dataset_root, "splits_final_deblurred/train/data")
    )

if "zerowaste_val" not in DatasetCatalog.list():
    register_coco_instances(
        "zerowaste_val",
        {},
        os.path.join(dataset_root, "splits_final_deblurred/val/labels.json"),
        os.path.join(dataset_root, "splits_final_deblurred/val/data")
    )

if "zerowaste_test" not in DatasetCatalog.list():
    register_coco_instances(
        "zerowaste_test",
        {},
        os.path.join(dataset_root, "splits_final_deblurred/test/labels.json"),
        os.path.join(dataset_root, "splits_final_deblurred/test/data")
    )

In [5]:
import os
train_path = "/content/drive/MyDrive/zerowaste dataset/splits_final_deblurred/train"

print("Files in train folder:", os.listdir(train_path))
print("Files in train/data folder:", os.listdir(os.path.join(train_path, "data")))

Files in train folder: ['labels.json', '.DS_Store', 'data', 'sem_seg']
Files in train/data folder: ['09_frame_027200.PNG', '09_frame_027400.PNG', '09_frame_028800.PNG', '09_frame_028000.PNG', '09_frame_033000.PNG', '09_frame_031200.PNG', '09_frame_030600.PNG', '09_frame_032800.PNG', '09_frame_032100.PNG', '09_frame_030300.PNG', '09_frame_032500.PNG', '09_frame_031900.PNG', '09_frame_032200.PNG', '09_frame_032400.PNG', '09_frame_030900.PNG', '09_frame_029900.PNG', '09_frame_030700.PNG', '09_frame_030400.PNG', '09_frame_032700.PNG', '09_frame_031100.PNG', '09_frame_030200.PNG', '09_frame_032000.PNG', '09_frame_030500.PNG', '09_frame_031400.PNG', '09_frame_031500.PNG', '09_frame_032900.PNG', '09_frame_030800.PNG', '09_frame_033100.PNG', '09_frame_030100.PNG', '09_frame_031800.PNG', '09_frame_030000.PNG', '09_frame_029800.PNG', '09_frame_031000.PNG', '09_frame_031300.PNG', '09_frame_032300.PNG', '09_frame_032600.PNG', '09_frame_031600.PNG', '09_frame_031700.PNG', '09_frame_033800.PNG', '09

In [6]:
from detectron2.data import DatasetCatalog, MetadataCatalog

# Check the dataset length
print(f"Train dataset size: {len(DatasetCatalog.get('zerowaste_train'))}")
print(f"Val dataset size: {len(DatasetCatalog.get('zerowaste_val'))}")

# Load metadata (class names, etc.)
metadata = MetadataCatalog.get("zerowaste_train")
print(metadata.get("thing_classes"))  # Should print your class names if included in JSON

Train dataset size: 3002
Val dataset size: 572
['rigid_plastic', 'cardboard', 'metal', 'soft_plastic']


In [7]:
# 4. Setup config for fully supervised Mask R-CNN training
def setup_cfg_fully_supervised():
    cfg = get_cfg()
    cfg.merge_from_file("detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    cfg.DATASETS.TRAIN = ("zerowaste_train",)
    cfg.DATASETS.TEST = ("zerowaste_val",)
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"  # COCO pretrain
    cfg.SOLVER.IMS_PER_BATCH = 8
    cfg.SOLVER.BASE_LR = 0.0025  # Adjust learning rate
    cfg.SOLVER.MAX_ITER = 1000  # Reduced iterations to decrease training time
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4  # Cardboard, soft plastic, rigid plastic, metal
    cfg.OUTPUT_DIR = "./output/zerowaste_f"
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    return cfg

In [8]:
# 5. Fully supervised training
cfg_fully = setup_cfg_fully_supervised()
trainer = DefaultTrainer(cfg_fully)
trainer.resume_or_load(resume=False)
trainer.train()

[08/06 06:35:07 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

model_final_f10217.pkl: 178MB [00:03, 44.5MB/s]                           
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[08/06 06:35:12 d2.engine.train_loop]: Starting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[08/06 06:38:40 d2.utils.events]:  eta: 0:41:53  iter: 19  total_loss: 2.742  loss_cls: 1.595  loss_box_reg: 0.3071  loss_mask: 0.691  loss_rpn_cls: 0.1146  loss_rpn_loc: 0.03509    time: 10.9223  last_time: 2.5121  data_time: 7.5728  last_data_time: 0.0444   lr: 4.9952e-05  max_mem: 10593M
[08/06 06:39:41 d2.utils.events]:  eta: 0:41:27  iter: 39  total_loss: 1.853  loss_cls: 0.7572  loss_box_reg: 0.3291  loss_mask: 0.668  loss_rpn_cls: 0.09702  loss_rpn_loc: 0.03358    time: 6.5860  last_time: 2.5918  data_time: 0.0412  last_data_time: 0.0512   lr: 9.9902e-05  max_mem: 10593M
[08/06 06:40:35 d2.utils.events]:  eta: 0:40:35  iter: 59  total_loss: 1.588  loss_cls: 0.4705  loss_box_reg: 0.3732  loss_mask: 0.6257  loss_rpn_cls: 0.08792  loss_rpn_loc: 0.03714    time: 5.2447  last_time: 2.8536  data_time: 0.0649  last_data_time: 0.3758   lr: 0.00014985  max_mem: 10593M
[08/06 06:41:30 d2.utils.events]:  eta: 0:39:50  iter: 79  total_loss: 1.582  loss_cls: 0.4723  loss_box_reg: 0.4349  los



In [9]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# Create evaluator for your registered validation dataset
evaluator = COCOEvaluator("zerowaste_val", cfg_fully, False, output_dir="./output/zerowaste_val_eval")

# Build validation data loader
val_loader = build_detection_test_loader(cfg_fully, "zerowaste_val")

# Run inference and print results
eval_results = inference_on_dataset(trainer.model, val_loader, evaluator)
print(eval_results)

[08/06 07:25:38 d2.evaluation.coco_evaluation]: Fast COCO eval is not built. Falling back to official COCO eval.
[08/06 07:25:39 d2.data.datasets.coco]: Loaded 572 images in COCO format from /content/drive/MyDrive/zerowaste dataset/splits_final_deblurred/val/labels.json
[08/06 07:25:39 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[08/06 07:25:39 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[08/06 07:25:39 d2.data.common]: Serializing 572 elements to byte tensors and concatenating them all ...
[08/06 07:25:39 d2.data.common]: Serialized dataset takes 2.09 MiB
[08/06 07:25:39 d2.evaluation.evaluator]: Start inference on 572 batches
[08/06 07:25:45 d2.evaluation.evaluator]: Inference done 11/572. Dataloading: 0.0022 s/iter. Inference: 0.1472 s/iter. Eval: 0.1488 s/iter. Total: 0.2982 s/iter. ETA=0:02:47
[08/06 07:25: