In [1]:
import os
import json
import cv2
import numpy as np
from tqdm import tqdm

In [2]:
DATASET_PATH = "dataset"
OUTPUT_JSON = "annotations.json"

CATEGORIES = [
    {"id": 1, "name": "tumor"},
    {"id": 2, "name": "deepfake"}
]

coco_data = {"images": [], "annotations": [], "categories": CATEGORIES}

image_id = 0
annotation_id = 0

In [3]:
def create_full_mask(width, height):
    """Create a full-image mask for deepfake images."""
    return [[0, 0, width, 0, width, height, 0, height]]

def detect_tumor_mask(image_path):
    """Auto-detect bright tumor regions (basic thresholding)."""
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    _, mask = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY) 

    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    polygons = []
    
    for cnt in contours:
        if cv2.contourArea(cnt) > 500:
            polygon = cnt.flatten().tolist()
            polygons.append(polygon)
    
    return polygons if polygons else None 

for category in ["real_tumor", "real_no_tumor", "deepfake_tumor", "deepfake_no_tumor"]:
    folder_path = os.path.join(DATASET_PATH, category)

    for filename in tqdm(os.listdir(folder_path), desc=f"Processing {category}"):
        if filename.endswith((".jpg", ".png")):
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)
            height, width = image.shape[:2]

            # Add image info
            coco_data["images"].append({
                "id": image_id,
                "file_name": filename,
                "width": width,
                "height": height
            })

            # Handle annotation logic
            if category == "real_tumor":
                masks = detect_tumor_mask(image_path)
                if masks:
                    for mask in masks:
                        coco_data["annotations"].append({
                            "id": annotation_id,
                            "image_id": image_id,
                            "category_id": 1,  # Tumor
                            "segmentation": [mask],
                            "bbox": [min(mask[::2]), min(mask[1::2]), max(mask[::2]) - min(mask[::2]), max(mask[1::2]) - min(mask[1::2])],
                            "iscrowd": 0
                        })
                        annotation_id += 1

            elif category == "deepfake_tumor":
                coco_data["annotations"].append({
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": 2,  # Deepfake
                    "segmentation": create_full_mask(width, height),
                    "bbox": [0, 0, width, height],
                    "iscrowd": 0
                })
                annotation_id += 1

            elif category == "deepfake_no_tumor":
                coco_data["annotations"].append({
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": 2,  # Deepfake
                    "segmentation": create_full_mask(width, height),
                    "bbox": [0, 0, width, height],
                    "iscrowd": 0
                })
                annotation_id += 1

            image_id += 1

# Save annotations
with open(OUTPUT_JSON, "w") as f:
    json.dump(coco_data, f, indent=4)

print(f"✅ Annotations saved to {OUTPUT_JSON}")

Processing real_tumor: 100%|█████████████████████████████████████████████████████████| 155/155 [00:03<00:00, 41.68it/s]
Processing real_no_tumor: 100%|████████████████████████████████████████████████████████| 98/98 [00:02<00:00, 46.05it/s]
Processing deepfake_tumor: 100%|█████████████████████████████████████████████████████| 500/500 [00:14<00:00, 33.34it/s]
Processing deepfake_no_tumor: 100%|██████████████████████████████████████████████████| 500/500 [00:14<00:00, 34.47it/s]


✅ Annotations saved to annotations.json
