In [2]:
import os
import json
from PIL import Image

def create_coco_annotation(dataset_path, split):
    images = []
    annotations = []
    categories = []

    # Initialize counters
    image_id = 1
    annotation_id = 1
    category_id = 1

    # Categories (assuming categories are consistent across splits)
    class_folders = [d for d in os.listdir(os.path.join(dataset_path, split)) if os.path.isdir(os.path.join(dataset_path, split, d))]
    for class_folder in class_folders:
        categories.append({
            "id": category_id,
            "name": class_folder
        })
        category_id += 1

    # Process images and annotations
    for class_folder in class_folders:
        class_folder_path = os.path.join(dataset_path, split, class_folder)
        for image_filename in os.listdir(class_folder_path):
            if image_filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(class_folder_path, image_filename)
                with Image.open(image_path) as img:
                    width, height = img.size

                images.append({
                    "id": image_id,
                    "file_name": dataset_path+'/'+split+'/'+class_folder+'/'+image_filename,
                    "width": width,
                    "height": height
                })

                # Calculate bounding box with 10 pixels less on each side
                padding = 10
                x = padding
                y = padding
                w = width - 2 * padding
                h = height - 2 * padding

                # Ensure bounding box does not go out of bounds
                if w < 0:
                    w = 0
                if h < 0:
                    h = 0

                annotations.append({
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": next(c['id'] for c in categories if c['name'] == class_folder),
                    "bbox": [x, y, w, h],
                    "area": w * h,
                    "iscrowd": 0
                })

                image_id += 1
                annotation_id += 1

    coco_format = {
        "images": images,
        "annotations": annotations,
        "categories": categories
    }

    return coco_format

def save_coco_json(coco_format, output_file):
    with open(output_file, 'w') as f:
        json.dump(coco_format, f, indent=4)

# Define dataset path and output folder
dataset_path = 'drive/MyDrive/plant_disease_detection/dataset'
output_folder = os.path.join(dataset_path, 'detr_annotation')

# Create output folder if it does not exist
os.makedirs(output_folder, exist_ok=True)

# Create and save annotations
train_coco = create_coco_annotation(dataset_path, 'train')
valid_coco = create_coco_annotation(dataset_path, 'valid')

# Define output paths
train_output_path = os.path.join(output_folder, 'train_annotations.json')
valid_output_path = os.path.join(output_folder, 'valid_annotations.json')

# Save the annotations to JSON files
save_coco_json(train_coco, train_output_path)
save_coco_json(valid_coco, valid_output_path)