In [13]:
import os
import json
import base64
import numpy as np
from io import BytesIO
from PIL import Image


In [14]:

# Paths
input_folder = "../Data/Glioma_MDC_2025_training/Glioma_MDC_2025_training"  # Folder with JSON files
output_folder = "../Data/Cropped/training"  # Folder for cropped images

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Iterate over all files in the input folder
for file_name in os.listdir(input_folder):
    if file_name.endswith(".json"):  # Process only JSON files
        json_path = os.path.join(input_folder, file_name)

        # Read the JSON file
        with open(json_path, 'r') as file:
            data = json.load(file)

        # Extract Base64 image data
        image_data_base64 = data.get("imageData")
        if not image_data_base64:
            print(f"No imageData found in {file_name}")
            continue

        # Decode the image data
        image_data = base64.b64decode(image_data_base64)
        image = Image.open(BytesIO(image_data))

        # Process each shape in the JSON file
        for shape in data.get("shapes", []):
            label = shape.get("label", "Unknown")
            points = np.array(shape.get("points", []), dtype=np.float32)

            if points.size == 0:
                print(f"No points found in shape for {file_name}")
                continue

            # Calculate bounding box
            x_min = int(np.min(points[:, 0]))
            y_min = int(np.min(points[:, 1]))
            x_max = int(np.max(points[:, 0]))
            y_max = int(np.max(points[:, 1]))

            # Enlarge the bounding box by 25%
            width = x_max - x_min
            height = y_max - y_min

            x_min = max(0, x_min - int(0.25 * width))
            y_min = max(0, y_min - int(0.25 * height))
            x_max = min(image.width, x_max + int(0.25 * width))
            y_max = min(image.height, y_max + int(0.25 * height))

            # Crop the image
            cropped_image = image.crop((x_min, y_min, x_max, y_max))

            # Create a folder for the label if it doesn't exist
            label_folder = os.path.join(output_folder, label)
            os.makedirs(label_folder, exist_ok=True)

            # Save the cropped image
            output_image_path = os.path.join(label_folder, f"{os.path.splitext(file_name)[0]}_{label}.jpg")
            cropped_image.save(output_image_path)
            print(f"Saved: {output_image_path}")


Saved: ../Data/Cropped/training/Non-mitosis/training0001_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0002_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0003_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0003_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0004_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0004_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Mitosis/training0005_Mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0005_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0006_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0006_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0007_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0007_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0008_Non-mitosis.jpg
Saved: ../Data/Cropped/training/Non-mitosis/training0009_No