Transform data into COCO Json file format


In [3]:
import os
import json
import numpy as np
import cv2
from pycocotools import mask as maskUtils
from tqdm import tqdm

In [24]:
# Directories
masks_dir = r'C:\Users\t.andriamihajasoa\AgaThon2025-Crop_residue_coverage_challenge\AgaThon2025-Crop_residue_coverage_challenge\data\raw\masks'
original_dir = r'C:\Users\t.andriamihajasoa\AgaThon2025-Crop_residue_coverage_challenge\AgaThon2025-Crop_residue_coverage_challenge\data\raw\original'

In [25]:
def create_coco_annotation(image_id, image_filename, mask_dir):
    annotations = []
    mask_path = os.path.join(mask_dir, image_filename.replace('.jpg', '.tif'))
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    if mask is None:
        return annotations

    # Apply connected components analysis
    num_labels, labels_im = cv2.connectedComponents(mask)

    for label in range(1, num_labels):  # Start from 1 to skip the background
        # Create a binary mask for the current label
        component_mask = (labels_im == label).astype(np.uint8) * 255

        # Find the bounding box of the connected component
        x, y, w, h = cv2.boundingRect(component_mask)
        bbox = [x, y, w, h]

        # Encode the mask to RLE
        rle = maskUtils.encode(np.asfortranarray(component_mask))
        area = maskUtils.area(rle)

        # Find the segmentation
        segmentation = np.transpose(np.nonzero(component_mask)).flatten().tolist()

        annotation = {
            "id": len(annotations) + 1,
            "image_id": image_id,
            "category_id": 1,
            "segmentation": [segmentation],
            "area": area.tolist(),
            "bbox": bbox,
            "iscrowd": 0
        }
        annotations.append(annotation)
    return annotations

def create_coco_json(image_dir, mask_dir, output_json):
    images = []
    annotations = []
    image_id = 1

    image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
    
    for image_filename in tqdm(image_files, desc="Processing images"):
        image_path = os.path.join(image_dir, image_filename)
        image = cv2.imread(image_path)
        if image is None:
            print(f"Skipping {image_filename} as it could not be read.")
            continue

        height, width, _ = image.shape

        images.append({
            "id": image_id,
            "file_name": image_filename,
            "height": height,
            "width": width
        })

        image_annotations = create_coco_annotation(image_id, image_filename, mask_dir)
        annotations.extend(image_annotations)

        image_id += 1

    coco_json = {
        "images": images,
        "annotations": annotations,
        "categories": [{"id": 1, "name": "object"}]
    }

    with open(output_json, 'w') as f:
        json.dump(coco_json, f, indent=4)

def display_image_with_annotations(image_path, annotations):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Could not read {image_path}")
        return

    for annotation in annotations:
        bbox = annotation['bbox']
        x, y, w, h = bbox
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        for segmentation in annotation['segmentation']:
            points = np.array(segmentation).reshape((-1, 2)).astype(np.int32)
            cv2.polylines(image, [points], isClosed=True, color=(0, 255, 0), thickness=2)

    cv2.imshow('Image with Annotations', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [26]:
# Create COCO JSON annotations for our dataset
create_coco_json(original_dir, masks_dir, r'C:\Users\t.andriamihajasoa\AgaThon2025-Crop_residue_coverage_challenge\AgaThon2025-Crop_residue_coverage_challenge\data\raw\instances.json')

Processing images: 100%|██████████| 568/568 [05:38<00:00,  1.68it/s]


In [31]:
with open(r'C:\Users\t.andriamihajasoa\AgaThon2025-Crop_residue_coverage_challenge\AgaThon2025-Crop_residue_coverage_challenge\data\processed\instances.json', 'r') as f:
    coco_data = json.load(f)

# Display the first image with annotations
first_image_info = coco_data['images'][0]
first_image_annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] == first_image_info['id']]
display_image_with_annotations(os.path.join(r'C:\Users\t.andriamihajasoa\AgaThon2025-Crop_residue_coverage_challenge\AgaThon2025-Crop_residue_coverage_challenge\data\processed\images', first_image_info['file_name']), first_image_annotations)