In [1]:
# Cell 1: Imports and Dataset Registration Function

import os
import json
import torch
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode

def register_dataset_with_debug(name, json_file, image_folders, subset_size=None):
    def load_coco_json():
        print(f"Loading JSON file: {json_file}")
        with open(json_file, 'r') as f:
            data = json.load(f)
        
        images = data['images']
        annotations = data['annotations']
        categories = data['categories']
        
        print(f"Total images in JSON: {len(images)}")
        print(f"Total annotations in JSON: {len(annotations)}")
        
        if subset_size:
            images = images[:subset_size]
            image_ids = set(img['id'] for img in images)
            annotations = [ann for ann in annotations if ann['image_id'] in image_ids]
        
        dataset_dicts = []
        images_not_found = 0
        for img in images:
            record = {}
            file_name = img["file_name"]
            
            # Check all image folders for the file
            file_path = None
            for folder in image_folders:
                potential_path = os.path.join(folder, file_name)
                if os.path.exists(potential_path):
                    file_path = potential_path
                    break
            
            if file_path is None:
                images_not_found += 1
                if images_not_found <= 5:  # Print only the first 5 not found images
                    print(f"Warning: Image file not found: {file_name}")
                continue  # Skip this image
            
            record["file_name"] = file_path
            record["height"] = img["height"]
            record["width"] = img["width"]
            record["image_id"] = img["id"]
            
            anns = [ann for ann in annotations if ann["image_id"] == img["id"]]
            objs = []
            for ann in anns:
                obj = {
                    "bbox": ann["bbox"],
                    "bbox_mode": BoxMode.XYWH_ABS,
                    "category_id": ann["category_id"] - 1,  # Convert 1-indexed to 0-indexed
                    "segmentation": ann["segmentation"],
                }
                objs.append(obj)
            record["annotations"] = objs
            dataset_dicts.append(record)
        
        print(f"Processed images: {len(dataset_dicts)}")
        print(f"Images not found: {images_not_found}")
        print(f"Processed annotations: {sum(len(record['annotations']) for record in dataset_dicts)}")
        return dataset_dicts, categories
    
    dataset_dicts, categories = load_coco_json()
    DatasetCatalog.register(name, lambda: dataset_dicts)
    MetadataCatalog.get(name).set(thing_classes=[cat["name"] for cat in categories])
    print(f"Dataset '{name}' registered successfully!")
    print(f"Number of images: {len(dataset_dicts)}")
    print(f"Number of annotations: {sum(len(record['annotations']) for record in dataset_dicts)}")
    print(f"Categories: {[cat['name'] for cat in categories]}")

In [13]:
# Cell 2: Dataset Setup and Registration

# Define your dataset paths here
train_json = "C:/Users/Spawtan/Pictures/Lamdba/enter/publaynet/train.json"
val_json = "C:/Users/Spawtan/Pictures/Lamdba/enter/publaynet/val.json"
train_image_folders = "C:/Users/Spawtan/Pictures/Lamdba/enter/publaynet/train"
val_image_folders = "C:/Users/Spawtan/Pictures/Lamdba/enter/publaynet/val"
# Option to use full dataset or subset
use_subset_train = True
use_subset_val = False  # Changed to False to try loading the full validation set
subset_size_train = 11000
subset_size_val = 9000  # Increased for validation, adjust as needed

# Register datasets
print("Registering training dataset:")
register_dataset_with_debug("publaynet_train", train_json, train_image_folders, subset_size_train if use_subset_train else None)

print("\nRegistering validation dataset:")
register_dataset_with_debug("publaynet_val", val_json, val_image_folders, subset_size_val if use_subset_val else None)

print("\nDataset setup complete!")

Registering training dataset:
Loading JSON file: C:/Users/Spawtan/Pictures/Lamdba/enter/publaynet/train.json
Total images in JSON: 335703
Total annotations in JSON: 3263046
Processed images: 0
Images not found: 11000
Processed annotations: 0


AssertionError: Dataset 'publaynet_train' is already registered!

In [5]:
# Cell: Investigate Validation JSON

import json

def check_json_file(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    
    print(f"JSON file: {json_file}")
    print(f"Keys in JSON: {list(data.keys())}")
    print(f"Number of images: {len(data.get('images', []))}")
    print(f"Number of annotations: {len(data.get('annotations', []))}")
    print(f"Number of categories: {len(data.get('categories', []))}")
    
    if 'images' in data and len(data['images']) > 0:
        print("\nSample image entry:")
        print(json.dumps(data['images'][0], indent=2))
    
    if 'annotations' in data and len(data['annotations']) > 0:
        print("\nSample annotation entry:")
        print(json.dumps(data['annotations'][0], indent=2))

print("Checking training JSON file:")
check_json_file(train_json)

print("\nChecking validation JSON file:")
check_json_file(val_json)



Checking training JSON file:
JSON file: C:/Users/Spawtan/Pictures/Lamdba/enter/publaynet/train.json
Keys in JSON: ['images', 'annotations', 'categories']
Number of images: 335703
Number of annotations: 3263046
Number of categories: 5

Sample image entry:
{
  "file_name": "PMC3866684_00003.jpg",
  "height": 811,
  "id": 0,
  "width": 613
}

Sample annotation entry:
{
  "segmentation": [
    [
      52.38,
      444.87,
      291.97,
      444.87,
      291.97,
      456.42,
      291.97,
      456.42,
      291.97,
      465.8,
      291.97,
      465.8,
      291.97,
      476.27,
      291.97,
      476.27,
      291.97,
      487.8,
      291.97,
      487.8,
      291.97,
      498.26,
      86.41,
      498.26,
      86.41,
      508.73,
      40.42,
      508.73,
      40.42,
      497.17,
      40.42,
      497.17,
      40.42,
      486.71,
      40.42,
      486.71,
      40.42,
      476.27,
      40.42,
      476.27,
      40.42,
      466.88,
      40.42,
      466.88,
     

In [9]:
# Cell: Verify Image Folders

import os

def list_image_files(folder):
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif']
    image_files = [f for f in os.listdir(folder) if os.path.splitext(f.lower())[1] in image_extensions]
    return image_files

print("Checking image folders:")
for folder in image_folders:
    files = list_image_files(folder)
    print(f"\nFolder: {folder}")
    print(f"Number of image files: {len(files)}")
    if files:
        print("Sample files:")
        for file in files[:5]:  # Print first 5 files
            print(f"  {file}")
    else:
        print("No image files found in this folder.")



Checking image folders:

Folder: C:\Users\Spawtan\Pictures\Lamdba\train-0.tar\publaynet\train
Number of image files: 47958
Sample files:
  PMC1064093_00000.jpg
  PMC1064098_00008.jpg
  PMC1064100_00000.jpg
  PMC1064108_00002.jpg
  PMC1064108_00006.jpg

Folder: C:\Users\Spawtan\Pictures\Lamdba\train-1.tar\publaynet\train
Number of image files: 47958
Sample files:
  PMC1064108_00003.jpg
  PMC1064108_00007.jpg
  PMC1064139_00006.jpg
  PMC1079883_00000.jpg
  PMC1079885_00002.jpg

Folder: C:\Users\Spawtan\Pictures\Lamdba\train-2.tar\publaynet\train
Number of image files: 47958
Sample files:
  PMC1064098_00000.jpg
  PMC1064103_00000.jpg
  PMC1064139_00007.jpg
  PMC1064139_00008.jpg
  PMC1064866_00001.jpg

Folder: C:\Users\Spawtan\Pictures\Lamdba\train-3.tar\publaynet\train
Number of image files: 47958
Sample files:
  PMC1064093_00003.jpg
  PMC1064132_00000.jpg
  PMC1064866_00000.jpg
  PMC1079789_00004.jpg
  PMC1079792_00000.jpg

Folder: C:\Users\Spawtan\Pictures\Lamdba\train-4.tar\publaynet\

In [7]:
# Cell 3: Model Configuration

from detectron2.config import get_cfg
from detectron2 import model_zoo

def setup_configuration():
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("publaynet_train",)
    cfg.DATASETS.TEST = ("publaynet_val",)
    cfg.OUTPUT_DIR = "./output"
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5  # PubLayNet has 5 classes
    cfg.SOLVER.IMS_PER_BATCH = 64 # Adjust based on your GPU memory
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 3000  # Adjust based on dataset size and training time
    cfg.SOLVER.STEPS = (2100, 2500)  # Adjust based on MAX_ITER
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    return cfg

cfg = setup_configuration()
print("Model configuration complete. Configuration details:")
print(cfg)



Model configuration complete. Configuration details:
CUDNN_BENCHMARK: False
DATALOADER:
  ASPECT_RATIO_GROUPING: True
  FILTER_EMPTY_ANNOTATIONS: True
  NUM_WORKERS: 4
  REPEAT_SQRT: True
  REPEAT_THRESHOLD: 0.0
  SAMPLER_TRAIN: TrainingSampler
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: ()
  PROPOSAL_FILES_TRAIN: ()
  TEST: ('publaynet_val',)
  TRAIN: ('publaynet_train',)
FLOAT32_PRECISION: 
GLOBAL:
  HACK: 1.0
INPUT:
  CROP:
    ENABLED: False
    SIZE: [0.9, 0.9]
    TYPE: relative_range
  FORMAT: BGR
  MASK_FORMAT: polygon
  MAX_SIZE_TEST: 1333
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
  MIN_SIZE_TRAIN_SAMPLING: choice
  RANDOM_FLIP: horizontal
MODEL:
  ANCHOR_GENERATOR:
    ANGLES: [[-90, 0, 90]]
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]
    NAME: DefaultAnchorGenerator
    OFFSET: 0.0
    SIZES: [[32], [64], [128], [256], [512]]
  BACKBONE:
    FREEZE_AT: 2
    NAME: 

In [8]:
# Cell 4: Model Training

from detectron2.engine import DefaultTrainer
import os

def train_model(cfg):
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    print("Starting training...")
    trainer.train()


train_model(cfg)

print("Training code is ready. Uncomment the last line in this cell to start training.")
print("Note: Training may take a long time depending on your dataset size and hardware.")

[32m[09/22 16:30:49 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (6, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (6,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (20, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (20,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (5, 256, 1, 1) 

Starting training...
[32m[09/22 16:30:49 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[09/22 16:33:05 d2.utils.events]: [0m eta: 7:07:44  iter: 19  total_loss: 7.542  loss_cls: 1.684  loss_box_reg: 0.7344  loss_mask: 0.684  loss_rpn_cls: 3.946  loss_rpn_loc: 0.4988    time: 7.1448  last_time: 12.7252  data_time: 0.2661  last_data_time: 0.0626   lr: 4.9953e-06  max_mem: 14712M
[32m[09/22 16:35:51 d2.utils.events]: [0m eta: 8:08:01  iter: 39  total_loss: 4.56  loss_cls: 1.559  loss_box_reg: 0.7404  loss_mask: 0.6729  loss_rpn_cls: 1.184  loss_rpn_loc: 0.4124    time: 7.7704  last_time: 1.0902  data_time: 0.0737  last_data_time: 0.0708   lr: 9.9902e-06  max_mem: 15434M
[32m[09/22 16:39:10 d2.utils.events]: [0m eta: 7:54:54  iter: 59  total_loss: 3.365  loss_cls: 1.33  loss_box_reg: 0.7338  loss_mask: 0.667  loss_rpn_cls: 0.2291  loss_rpn_loc: 0.3837    time: 8.5163  last_time: 22.9894  data_time: 0.0750  last_data_time: 0.0644   lr: 1.4985e-05  max_mem: 15434M


AssertionError: Dataset 'publaynet_val' is empty!

In [19]:
import cv2
import torch
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

def test_on_single_image(image_path, predictor):
    # Read the image
    img = cv2.imread(image_path)
    
    # Make prediction
    outputs = predictor(img)
    
    # Get metadata
    metadata = MetadataCatalog.get("publaynet_val")
    
    # Visualize the predictions
    v = Visualizer(img[:, :, ::-1], metadata=metadata, scale=1.2)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    
    # Convert the image back to BGR for displaying with OpenCV
    result_image = out.get_image()[:, :, ::-1]
    
    # Display the result
    cv2.imshow("Prediction", result_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # Optionally, save the result
    cv2.imwrite("prediction_result.jpg", result_image)
    print("Prediction result saved as 'prediction_result.jpg'")
    
    # Print detection results
    classes = outputs["instances"].pred_classes.cpu().numpy()
    scores = outputs["instances"].scores.cpu().numpy()
    boxes = outputs["instances"].pred_boxes.tensor.cpu().numpy()
    
    for cls, score, box in zip(classes, scores, boxes):
        print(f"Class: {metadata.thing_classes[cls]}, Score: {score:.3f}")
        print(f"Bounding Box: {box}")
        print("---")

# Set up the configuration
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("publaynet_train",)
cfg.DATASETS.TEST = ("publaynet_val",)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5  # PubLayNet has 5 classes
cfg.MODEL.WEIGHTS = "C:/Users/Spawtan/Pictures/Lamdba/output/model_final.pth"  # Update this path to your trained model
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

# Create predictor
predictor = DefaultPredictor(cfg)

# Set up metadata for PubLayNet
MetadataCatalog.get("publaynet_val").set(thing_classes=["text", "title", "list", "table", "figure"])

# Test on a single image
test_image_path = "C:/Users/Spawtan/Pictures/Lamdba/enter/publaynet/val/00001286.jpg"  # Replace with the path to your test image
test_on_single_image(test_image_path, predictor)

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (5, 1024) in the checkpoint but (6, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (5,) in the checkpoint but (6,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (16, 1024) in the checkpoint but (20, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (16,) in the checkpoint but (20,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (4, 256, 1, 1) in the checkpoint but (5, 256, 1, 1) in th

AttributeError: 'NoneType' object has no attribute 'shape'