In [37]:
import os
import json
import csv

In [38]:
IMG_HEIGHT = 1350
IMG_WIDTH = 2400

In [39]:
def load_csv_data(csv_data):
    with open(csv_data, "r") as csv_file:
        csv_reader = csv.DictReader(csv_file)
        csv_data_list = list(csv_reader)
    return csv_data_list

In [40]:
def convert_to_coco_format(json_data, csv_data_list):
    coco_data = {
        "images": [],
        "annotations": [],
        "categories": []
    }

    categories_mapping = {int(key): value for key, value in json_data["detection_categories"].items()}
    category_id_counter = 1

    for image_data in json_data["images"]:
        image_id = len(coco_data["images"]) + 1

        image_info = {
            "id": image_id,
            "dir_name": os.path.dirname(image_data["file"]),
            "file_name": os.path.basename(image_data["file"]),
            "height": IMG_HEIGHT,
            "width": IMG_WIDTH,
            "rev": "",
            "station": "",
            "cam": "",
            "season": "",
            "day_part": "",
            "date_captured": "",
            "seq_id": ""
        }

        # Buscar datos del CSV para la imagen actual
        for row in csv_data_list:
            if row["path"] == image_data["file"]:
                image_info["rev"] = row["rev"]
                image_info["station"] = row["station"]
                image_info["cam"] = row["cam"]
                image_info["season"] = row["season"]
                image_info["day_part"] = row["day_part"]
                image_info["date_captured"] = row["date_time"]
                image_info["seq_id"] = row["seq_id"]
                break

        coco_data["images"].append(image_info)

        for detection in image_data["detections"]:
            category_id = int(detection["category"])
            category_name = categories_mapping.get(category_id, f"category_{category_id}")

            if category_id not in categories_mapping:
                categories_mapping[category_id] = category_name
                category_id_counter += 1

            bbox = detection["bbox"]
            annotation = {
                "id": len(coco_data["annotations"]) + 1,
                "image_id": image_id,
                "category_id": category_id,
                "bbox": [bbox[0], bbox[1], bbox[2], bbox[3]],
                "area": bbox[2] * bbox[3],
                "iscrowd": 0
            }
            coco_data["annotations"].append(annotation)

    for category_id, category_name in categories_mapping.items():
        category_info = {
            "id": category_id,
            "name": category_name,
            "supercategory": "animal"
        }
        coco_data["categories"].append(category_info)

    return coco_data

In [41]:
json_data_path = "../Data/JSONs/output_actualizado_con_categorias.json"
csv_data_path = "../Data/CSVs/dataset_ampliado_caltech.csv"

In [42]:
# Load the input JSON data
with open(json_data_path, "r") as json_file:
    input_json_data = json.load(json_file)

# Cargar los datos del CSV en una lista de diccionarios
csv_data_list = load_csv_data(csv_data_path)

# Convert the JSON data to COCO format
coco_data = convert_to_coco_format(input_json_data,csv_data_list)

# Save the COCO data as a new JSON file
with open("../Data/JSONs/output_coco.json", "w") as output_file:
    json.dump(coco_data, output_file, indent=4)