In [7]:
import os
import numpy as np
import json
from PIL import Image

In [8]:
# Absolute paths (adjust to your environment)
RAW_DATA_DIR = r"/Users/mokshdutt/developer/P&ID/data/raw/paliwal_dataset"
PROCESSED_ANN_DIR = r"/Users/mokshdutt/developer/P&ID/data/processed/annotations"
COCO_ANN_FILE = "paliwal_coco.json"

In [9]:
# Verify paths
if not os.path.exists(RAW_DATA_DIR):
    raise FileNotFoundError(f"RAW_DATA_DIR not found: {RAW_DATA_DIR}")
if not os.path.exists(PROCESSED_ANN_DIR):
    os.makedirs(PROCESSED_ANN_DIR)


In [10]:
def get_categories():
    """Define your symbol categories here. Example:"""
    return [
        {"id": 1, "name": "valve"},
        {"id": 2, "name": "pump"},
        {"id": 3, "name": "tank"},
        {"id": 4, "name": "instrument"},
        # Add all classes present in your dataset
    ]

In [11]:
def convert_to_coco():
    coco_data = {
        "images": [],
        "annotations": [],
        "categories": get_categories()
    }
    
    annotation_id = 1
    image_ids = [d for d in os.listdir(RAW_DATA_DIR) if d.isdigit()]
    
    for img_id in image_ids:
        img_id = int(img_id)
        img_folder = os.path.join(RAW_DATA_DIR, str(img_id))
        img_path = os.path.join(RAW_DATA_DIR, "Images", f"{img_id}.jpg")
        
        # Skip missing images/annotations
        if not os.path.exists(img_path) or not os.path.exists(img_folder):
            continue
            
        # Load image dimensions
        with Image.open(img_path) as img:
            width, height = img.size
        
        # Add image entry
        coco_data["images"].append({
            "id": img_id,
            "file_name": f"{img_id}.jpg",
            "width": width,
            "height": height
        })
        
        # Process symbols.npy
        symbols = np.load(os.path.join(img_folder, f"{img_id}_symbols.npy"), allow_pickle=True)
        # In convert_to_coco() function:
        for symbol in symbols:
            symbol_id, bbox, class_id = symbol
            x1, y1, x2, y2 = bbox
    
            # Calculate width and height from bbox
            width_box = float(x2 - x1)
            height_box = float(y2 - y1)
    
            coco_data["annotations"].append({
                "id": annotation_id,
                "image_id": img_id,
                "category_id": int(class_id),
                "bbox": [float(x1), float(y1), width_box, height_box],  # Use calculated values
                "area": width_box * height_box,
                "iscrowd": 0
            })
            annotation_id += 1
    
    # Save COCO JSON
    output_path = os.path.join(PROCESSED_ANN_DIR, COCO_ANN_FILE)
    with open(output_path, "w") as f:
        json.dump(coco_data, f, indent=2)
    print(f"COCO annotations saved to {output_path} (total: {len(coco_data['annotations'])} objects)")

In [12]:
if __name__ == "__main__":
    convert_to_coco()

COCO annotations saved to /Users/mokshdutt/developer/P&ID/data/processed/annotations/paliwal_coco.json (total: 59498 objects)
