In [1]:
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
import os
import xml.etree.ElementTree as ET
from detectron2.structures import BoxMode
import json
from PIL import Image

# Load the configuration and model
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4  # Update this to match the number of classes in PRImA
cfg.MODEL.WEIGHTS = "C:/Users/Spawtan/Pictures/Lamdba/outputbackup/model_final.pth"  # Update this path
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

# Define category mapping
CATEGORY_MAPPING = {
    "TextRegion": 0,
    "ImageRegion": 1,
    "TableRegion": 2,
    "SeparatorRegion": 3
}

# Function to load PRImA dataset
def get_prima_dicts(dataset_dir):
    dataset_dicts = []
    img_dir = os.path.join(dataset_dir, "Images")
    xml_dir = os.path.join(dataset_dir, "XML")
    
    print(f"Processing XML files in: {xml_dir}")
    
    # Define the namespace
    namespace = {'pc': 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19'}
    
    for xml_file in os.listdir(xml_dir):
        if xml_file.endswith('.xml'):
            print(f"Processing {xml_file}")
            try:
                record = {}
                
                # Parse XML
                tree = ET.parse(os.path.join(xml_dir, xml_file))
                root = tree.getroot()
                
                # Get image file path
                page = root.find('.//pc:Page', namespace)
                if page is None:
                    print(f"Warning: No 'Page' element found in {xml_file}")
                    continue
                
                img_file_name = page.get('imageFilename')
                img_file = os.path.join(img_dir, img_file_name)
                
                if not os.path.exists(img_file):
                    print(f"Warning: Image file not found: {img_file}")
                    continue
                
                # Get actual image dimensions
                with Image.open(img_file) as img:
                    width, height = img.size
                
                record["file_name"] = img_file
                record["image_id"] = xml_file
                record["height"] = height
                record["width"] = width
                
                objs = []
                for region in root.findall('.//pc:TextRegion', namespace) + root.findall('.//pc:ImageRegion', namespace) + root.findall('.//pc:TableRegion', namespace) + root.findall('.//pc:SeparatorRegion', namespace):
                    category = region.tag.split('}')[-1]  # Get the tag name without namespace
                    
                    coords = region.find('.//pc:Coords', namespace)
                    if coords is None:
                        print(f"Warning: No 'Coords' element found for a region in {xml_file}")
                        continue
                    
                    points = []
                    for point in coords.findall('.//pc:Point', namespace):
                        x = int(point.get('x'))
                        y = int(point.get('y'))
                        # Ensure coordinates are within image boundaries
                        x = max(0, min(x, width - 1))
                        y = max(0, min(y, height - 1))
                        points.append((x, y))
                    
                    if len(points) < 3:
                        print(f"Warning: Less than 3 points for a region in {xml_file}")
                        continue
                    
                    x_coords, y_coords = zip(*points)
                    
                    obj_dict = {
                        "bbox": [min(x_coords), min(y_coords), max(x_coords), max(y_coords)],
                        "bbox_mode": BoxMode.XYXY_ABS,
                        "category_id": CATEGORY_MAPPING[category],
                        "segmentation": [list(sum(points, ()))],
                        "iscrowd": 0
                    }
                    objs.append(obj_dict)
                
                if not objs:
                    print(f"Warning: No valid regions found in {xml_file}")
                    continue
                
                record["annotations"] = objs
                dataset_dicts.append(record)
            except Exception as e:
                print(f"Error processing {xml_file}: {str(e)}")
    
    print(f"Total records processed: {len(dataset_dicts)}")
    return dataset_dicts

# Register the PRImA dataset
prima_dataset_dir = "C:/Users/Spawtan/Pictures/Lamdba/Prima/PRImA_LayoutAnalysisDataset/PRImA Layout Analysis Dataset"  # Update this path
DatasetCatalog.register("prima_test", lambda: get_prima_dicts(prima_dataset_dir))
MetadataCatalog.get("prima_test").set(thing_classes=list(CATEGORY_MAPPING.keys()))

# Try to get the dataset to see if it's loading correctly
prima_dataset = DatasetCatalog.get("prima_test")
print(f"Number of items in prima_test dataset: {len(prima_dataset)}")

# If the dataset is not empty, print some information about the first item
if prima_dataset:
    print("\nFirst item in the dataset:")
    print(json.dumps(prima_dataset[0], indent=2))
else:
    print("Dataset is empty. Check the warnings and errors above.")

# Prepare the evaluator and data loader
evaluator = COCOEvaluator("prima_test", cfg, False, output_dir="C:/Users/Spawtan/Pictures/Lamdba/outputbackup")
val_loader = build_detection_test_loader(cfg, "prima_test")

# Load the model
predictor = DefaultPredictor(cfg)

# Run evaluation
results = inference_on_dataset(predictor.model, val_loader, evaluator)
print(results)

Processing XML files in: C:/Users/Spawtan/Pictures/Lamdba/Prima/PRImA_LayoutAnalysisDataset/PRImA Layout Analysis Dataset\XML
Processing 00000086.xml
Processing 00000087.xml
Processing 00000089.xml
Processing 00000090.xml
Processing 00000122.xml
Processing 00000124.xml
Processing 00000125.xml
Processing 00000126.xml
Processing 00000127.xml
Processing 00000128.xml
Processing 00000129.xml
Processing 00000130.xml
Processing 00000269.xml
Processing 00000272.xml
Processing 00000273.xml
Processing 00000352.xml
Processing 00000356.xml
Processing 00000394.xml
Processing 00000401.xml
Processing 00000402.xml
Processing 00000403.xml
Processing 00000405.xml
Processing 00000406.xml
Processing 00000407.xml
Processing 00000408.xml
Processing 00000421.xml
Processing 00000625.xml
Processing 00000636.xml
Processing 00000657.xml
Processing 00000659.xml
Processing 00000661.xml
Processing 00000662.xml
Processing 00000663.xml
Processing 00000664.xml
Processing 00000671.xml
Processing 00000672.xml
Processing

COCO Evaluator instantiated using config, this is deprecated behavior. Please pass in explicit arguments instead.
Using previously cached COCO format annotations at 'C:/Users/Spawtan/Pictures/Lamdba/outputbackup\prima_test_coco_format.json'. You need to clear the cache file if your dataset has been modified.


Processing pc-00001298.xml
Processing pc-00001308.xml
Processing pc-00001317.xml
Total records processed: 447
Number of items in prima_test dataset: 447

First item in the dataset:
{
  "file_name": "C:/Users/Spawtan/Pictures/Lamdba/Prima/PRImA_LayoutAnalysisDataset/PRImA Layout Analysis Dataset\\Images\\00000086.tif",
  "image_id": "00000086.xml",
  "height": 3275,
  "width": 2442,
  "annotations": [
    {
      "bbox": [
        2140,
        3184,
        2182,
        3209
      ],
      "bbox_mode": 0,
      "category_id": 0,
      "segmentation": [
        [
          2182,
          3184,
          2182,
          3209,
          2140,
          3209,
          2140,
          3184
        ]
      ],
      "iscrowd": 0
    },
    {
      "bbox": [
        1375,
        3191,
        2061,
        3214
      ],
      "bbox_mode": 0,
      "category_id": 0,
      "segmentation": [
        [
          2061,
          3191,
          2061,
          3214,
          1375,
          32

  return torch.load(f, map_location=torch.device("cpu"))
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (6, 1024) in the checkpoint but (5, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (6,) in the checkpoint but (5,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (20, 1024) in the checkpoint but (16, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (20,) in the checkpoint but (16,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: 

Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.

In [7]:
import logging
logging.basicConfig(level=logging.INFO)

# Replace your existing inference_on_dataset call with this:
results = inference_on_dataset(
    predictor.model, 
    val_loader, 
    evaluator,
)
print("Raw results:", results)

INFO:detectron2.evaluation.evaluator:Start inference on 447 batches
INFO:detectron2.evaluation.evaluator:Inference done 11/447. Dataloading: 0.0006 s/iter. Inference: 0.0949 s/iter. Eval: 0.0602 s/iter. Total: 0.1557 s/iter. ETA=0:01:07
INFO:detectron2.evaluation.evaluator:Inference done 42/447. Dataloading: 0.0009 s/iter. Inference: 0.0940 s/iter. Eval: 0.0693 s/iter. Total: 0.1642 s/iter. ETA=0:01:06
INFO:detectron2.evaluation.evaluator:Inference done 73/447. Dataloading: 0.0009 s/iter. Inference: 0.0929 s/iter. Eval: 0.0705 s/iter. Total: 0.1644 s/iter. ETA=0:01:01
INFO:detectron2.evaluation.evaluator:Inference done 105/447. Dataloading: 0.0009 s/iter. Inference: 0.0915 s/iter. Eval: 0.0708 s/iter. Total: 0.1633 s/iter. ETA=0:00:55
INFO:detectron2.evaluation.evaluator:Inference done 136/447. Dataloading: 0.0009 s/iter. Inference: 0.0916 s/iter. Eval: 0.0702 s/iter. Total: 0.1628 s/iter. ETA=0:00:50
INFO:detectron2.evaluation.evaluator:Inference done 169/447. Dataloading: 0.0009 s/it

Raw results: OrderedDict()


In [2]:
# After running the evaluation
results = inference_on_dataset(predictor.model, val_loader, evaluator)
print("Evaluation Results:")
for task, result in results.items():
    print(f"\nTask: {task}")
    for metric, value in result.items():
        print(f"{metric}: {value}")

Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.

In [3]:
import torch
from detectron2.structures import Instances

def calculate_basic_metrics(model, data_loader):
    total_correct = 0
    total_predictions = 0
    for batch in data_loader:
        with torch.no_grad():
            outputs = model(batch)
        for i, output in enumerate(outputs):
            if "instances" not in output:
                print(f"Warning: 'instances' not found in output for item {i}")
                continue
            pred_instances = output["instances"]
            if not isinstance(pred_instances, Instances):
                print(f"Warning: output['instances'] is not an Instances object for item {i}")
                continue
            pred_classes = pred_instances.pred_classes

            # Check if ground truth data is available
            if "instances" not in batch[i]:
                print(f"Warning: 'instances' not found in ground truth for item {i}")
                print(f"Available keys in ground truth: {batch[i].keys()}")
                continue

            gt_instances = batch[i]["instances"]
            if not isinstance(gt_instances, Instances):
                print(f"Warning: ground truth 'instances' is not an Instances object for item {i}")
                continue

            if not hasattr(gt_instances, "gt_classes"):
                print(f"Warning: 'gt_classes' not found in ground truth instances for item {i}")
                print(f"Available attributes in ground truth instances: {gt_instances.__dict__.keys()}")
                continue

            gt_classes = gt_instances.gt_classes
            correct = (pred_classes == gt_classes).sum().item()
            total_correct += correct
            total_predictions += len(pred_classes)

    accuracy = total_correct / total_predictions if total_predictions > 0 else 0
    print(f"Basic Accuracy: {accuracy:.4f}")
    print(f"Total correct predictions: {total_correct}")
    print(f"Total predictions: {total_predictions}")

# Run the function
calculate_basic_metrics(predictor.model, val_loader)

Available keys in ground truth: dict_keys(['file_name', 'image_id', 'height', 'width', 'image'])
Available keys in ground truth: dict_keys(['file_name', 'image_id', 'height', 'width', 'image'])
Available keys in ground truth: dict_keys(['file_name', 'image_id', 'height', 'width', 'image'])
Available keys in ground truth: dict_keys(['file_name', 'image_id', 'height', 'width', 'image'])
Available keys in ground truth: dict_keys(['file_name', 'image_id', 'height', 'width', 'image'])
Available keys in ground truth: dict_keys(['file_name', 'image_id', 'height', 'width', 'image'])
Available keys in ground truth: dict_keys(['file_name', 'image_id', 'height', 'width', 'image'])
Available keys in ground truth: dict_keys(['file_name', 'image_id', 'height', 'width', 'image'])
Available keys in ground truth: dict_keys(['file_name', 'image_id', 'height', 'width', 'image'])
Available keys in ground truth: dict_keys(['file_name', 'image_id', 'height', 'width', 'image'])
Available keys in ground truth

In [15]:
import json

# After running the evaluation
with open('evaluation_results.json', 'w') as f:
    json.dump(results, f, indent=2)
print("\nDetailed results saved to 'evaluation_results.json'")


Detailed results saved to 'evaluation_results.json'


In [4]:
import cv2
import numpy as np
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

def test_on_single_image(image_path, predictor, metadata):
    # Read the image
    img = cv2.imread(image_path)
    
    # Make prediction
    outputs = predictor(img)
    
    # Visualize the predictions
    v = Visualizer(img[:, :, ::-1], metadata=metadata, scale=1.2)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    
    # Convert the image back to BGR for displaying with OpenCV
    result_image = out.get_image()[:, :, ::-1]
    
    # Display the result
    cv2.imshow("Prediction", result_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # Optionally, save the result
    cv2.imwrite("prediction_result.jpg", result_image)
    print("Prediction result saved as 'prediction_result.jpg'")
    
    # Print detection results
    classes = outputs["instances"].pred_classes.cpu().numpy()
    scores = outputs["instances"].scores.cpu().numpy()
    boxes = outputs["instances"].pred_boxes.tensor.cpu().numpy()
    
    for cls, score, box in zip(classes, scores, boxes):
        print(f"Class: {metadata.thing_classes[cls]}, Score: {score:.3f}")
        print(f"Bounding Box: {box}")
        print("---")

# After your model evaluation, add this code to test on a single image
print("\nTesting model on a single image...")
test_image_path = "/home/leo/Downloads/00000088.jpg"  # Replace with the path to your test image
metadata = MetadataCatalog.get("prima_test")
test_on_single_image(test_image_path, predictor, metadata)


Testing model on a single image...


AttributeError: 'NoneType' object has no attribute 'shape'

In [5]:
import random
from detectron2.data import DatasetCatalog, MetadataCatalog

def split_prima_dataset(dataset_dicts, train_ratio=0.8):
    random.shuffle(dataset_dicts)
    split_index = int(len(dataset_dicts) * train_ratio)
    return dataset_dicts[:split_index], dataset_dicts[split_index:]

# Get the full PRImA dataset
prima_dataset = DatasetCatalog.get("prima_test")

# Split the dataset
prima_train, prima_val = split_prima_dataset(prima_dataset)

# Register the split datasets
DatasetCatalog.register("prima_train", lambda: prima_train)
DatasetCatalog.register("prima_val", lambda: prima_val)

# Set metadata for both splits
for d in ["train", "val"]:
    MetadataCatalog.get(f"prima_{d}").set(thing_classes=["TextRegion", "ImageRegion", "TableRegion", "SeparatorRegion"])

print(f"Train set size: {len(prima_train)}")
print(f"Validation set size: {len(prima_val)}")

Processing XML files in: C:/Users/Spawtan/Pictures/Lamdba/Prima/PRImA_LayoutAnalysisDataset/PRImA Layout Analysis Dataset\XML
Processing 00000086.xml
Processing 00000087.xml
Processing 00000089.xml
Processing 00000090.xml
Processing 00000122.xml
Processing 00000124.xml
Processing 00000125.xml
Processing 00000126.xml
Processing 00000127.xml
Processing 00000128.xml
Processing 00000129.xml
Processing 00000130.xml
Processing 00000269.xml
Processing 00000272.xml
Processing 00000273.xml
Processing 00000352.xml
Processing 00000356.xml
Processing 00000394.xml
Processing 00000401.xml
Processing 00000402.xml
Processing 00000403.xml
Processing 00000405.xml
Processing 00000406.xml
Processing 00000407.xml
Processing 00000408.xml
Processing 00000421.xml
Processing 00000625.xml
Processing 00000636.xml
Processing 00000657.xml
Processing 00000659.xml
Processing 00000661.xml
Processing 00000662.xml
Processing 00000663.xml
Processing 00000664.xml
Processing 00000671.xml
Processing 00000672.xml
Processing

In [6]:
from detectron2.config import get_cfg
from detectron2 import model_zoo

def setup_cfg(train_dataset, val_dataset, num_classes, weights_path):
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = (train_dataset,)
    cfg.DATASETS.TEST = (val_dataset,)
    cfg.MODEL.WEIGHTS = weights_path
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_classes
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 1000  # adjust as needed
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
    cfg.TEST.EVAL_PERIOD = 100  # Evaluate every 100 iterations
    return cfg

cfg = setup_cfg("prima_train", "prima_val", num_classes=4, weights_path="C:/Users/Spawtan/Pictures/Lamdba/outputbackup/model_final.pth")

In [7]:
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator

class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = cfg.OUTPUT_DIR
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

# Train the model
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[09/23 15:28:59 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

  return torch.load(f, map_location=torch.device("cpu"))
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (6, 1024) in the checkpoint but (5, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (6,) in the checkpoint but (5,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (20, 1024) in the checkpoint but (16, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (20,) in the checkpoint but (16,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: 

[32m[09/23 15:28:59 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[09/23 15:29:08 d2.utils.events]: [0m eta: 0:03:27  iter: 19  total_loss: 5.903  loss_cls: 1.594  loss_box_reg: 0.7977  loss_mask: 0.6852  loss_rpn_cls: 1.947  loss_rpn_loc: 0.8562    time: 0.2151  last_time: 0.1836  data_time: 0.1916  last_data_time: 0.0016   lr: 4.9953e-06  max_mem: 2754M
[32m[09/23 15:29:19 d2.utils.events]: [0m eta: 0:02:59  iter: 39  total_loss: 3.56  loss_cls: 0.8855  loss_box_reg: 0.7643  loss_mask: 0.4991  loss_rpn_cls: 0.5193  loss_rpn_loc: 0.5771    time: 0.1940  last_time: 0.1898  data_time: 0.0020  last_data_time: 0.0041   lr: 9.9902e-06  max_mem: 2754M
[32m[09/23 15:29:22 d2.utils.events]: [0m eta: 0:02:51  iter: 59  total_loss: 2.915  loss_cls: 0.6102  loss_box_reg: 0.7131  loss_mask: 0.3994  loss_rpn_cls: 0.3092  loss_rpn_loc: 0.9045    time: 0.1900  last_time: 0.2036  data_time: 0.0021  last_data_time: 0.0023   lr: 1.4985e-05  max_mem: 2754M
[32m[09/23 15:29:26

In [12]:
from detectron2.engine import DefaultPredictor
import cv2

def test_on_single_image(image_path, predictor, metadata):
    img = cv2.imread(image_path)
    outputs = predictor(img)
    
    v = Visualizer(img[:, :, ::-1], metadata=metadata, scale=1.2)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    
    
    cv2.imshow("Prediction", out.get_image()[:, :, ::-1])
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    cv2.imwrite("prediction_result.jpg", out.get_image()[:, :, ::-1])
    print("Prediction result saved as 'prediction_result.jpg'")
    
    for i, (cls, score, box) in enumerate(zip(outputs["instances"].pred_classes, 
                                              outputs["instances"].scores, 
                                              outputs["instances"].pred_boxes)):
        print(f"Detection {i+1}:")
        print(f"  Class: {metadata.thing_classes[cls]}")
        print(f"  Score: {score:.3f}")
        print(f"  Bounding Box: {box.tolist()}")

# Load the fine-tuned model
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # set a custom testing threshold
predictor = DefaultPredictor(cfg)

# Test on a single image
test_image_path = "C:/Users/Spawtan/Pictures/Lamdba/00001286.jpg"  # Replace with an actual image path
metadata = MetadataCatalog.get("prima_val")
test_on_single_image(test_image_path, predictor, metadata)

[32m[09/23 16:32:43 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from ./output\model_final.pth ...
Prediction result saved as 'prediction_result.jpg'
Detection 1:
  Class: TextRegion
  Score: 0.988
  Bounding Box: [123.53228759765625, 656.5155029296875, 698.7664794921875, 2098.555908203125]
Detection 2:
  Class: TextRegion
  Score: 0.987
  Bounding Box: [789.2470092773438, 2296.748291015625, 1606.8306884765625, 3007.339599609375]
Detection 3:
  Class: TextRegion
  Score: 0.985
  Bounding Box: [1644.395263671875, 2583.0390625, 2448.86865234375, 3006.533447265625]
Detection 4:
  Class: TextRegion
  Score: 0.961
  Bounding Box: [1917.158447265625, 3073.066162109375, 2472.582763671875, 3099.791015625]
Detection 5:
  Class: TextRegion
  Score: 0.960
  Bounding Box: [1642.6744384765625, 2291.7939453125, 2456.261474609375, 2569.520751953125]
Detection 6:
  Class: TextRegion
  Score: 0.954
  Bounding Box: [140.7581024169922, 121.69854736328125, 2263.4208984375, 496