In [3]:
# This code configures a Detectron2 Mask R-CNN model for object detection, processes all images in a specified folder, 
# performs inference on each image, prints the detected classes, visualizes the predictions with bounding boxes, 
# and saves the output images with a "predicted_" prefix in the same folder.

import cv2
import os
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
import torch

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3
cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
cfg.TEST.DETECTIONS_PER_IMAGE = 100
cfg.INPUT.MIN_SIZE_TEST = 1333
cfg.MODEL.MASK_ON = False


predictor = DefaultPredictor(cfg)


folder_path = 'C:/Users/alham/Desktop/test_images/'


for filename in os.listdir(folder_path):
    if filename.endswith((".jpg", ".png", ".jpeg", ".bmp")):  
        image_path = os.path.join(folder_path, filename)
        img = cv2.imread(image_path)
        
       
        outputs = predictor(img)
        
        
        coco_classes = MetadataCatalog.get("coco_2017_train").thing_classes
        for cls in outputs["instances"].pred_classes:
            print(f"Predicted Class: {coco_classes[cls]}")
        
        
        img_resized_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        v = Visualizer(img_resized_rgb, MetadataCatalog.get("coco_2017_train"), scale=1.2)
        out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        
        
        result_img_bgr = cv2.cvtColor(out.get_image(), cv2.COLOR_RGB2BGR)
        output_path = os.path.join(folder_path, f"predicted_{filename}")
        cv2.imwrite(output_path, result_img_bgr)
        print(f"Prediction result saved to {output_path}")


The checkpoint state_dict contains keys that are not used by the model:
  [35mroi_heads.mask_head.mask_fcn1.{bias, weight}[0m
  [35mroi_heads.mask_head.mask_fcn2.{bias, weight}[0m
  [35mroi_heads.mask_head.mask_fcn3.{bias, weight}[0m
  [35mroi_heads.mask_head.mask_fcn4.{bias, weight}[0m
  [35mroi_heads.mask_head.deconv.{bias, weight}[0m
  [35mroi_heads.mask_head.predictor.{bias, weight}[0m
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Predicted Class: vase
Predicted Class: potted plant
Predicted Class: bowl
Predicted Class: handbag
Predicted Class: cup
Predicted Class: potted plant
Predicted Class: vase
Predicted Class: toaster
Predicted Class: bowl
Predicted Class: suitcase
Predicted Class: potted plant
Predicted Class: vase
Predicted Class: vase
Prediction result saved to C:/Users/alham/Desktop/test_images/predicted_annie-spratt-PcS2mgh_hUA-unsplash.jpg
Predicted Class: chair
Predicted Class: bed
Predicted Class: bed
Predicted Class: bed
Prediction result saved to C:/Users/alham/Desktop/test_images/predicted_clay-banks-Fr62lAJFnII-unsplash.jpg
Predicted Class: vase
Predicted Class: book
Prediction result saved to C:/Users/alham/Desktop/test_images/predicted_priscilla-du-preez-i__4ck2ml4M-unsplash.jpg
