In [2]:
import os
import json
import glob 
import numpy as np
import pandas as pd
import cv2
from PIL import Image

In [3]:
datasets_dir = os.path.abspath("../data/tator/grid/datasets")
datasets_dir

datasets_folders = glob.glob(os.path.join(datasets_dir, "*"))
datasets_folders

['c:\\Users\\jordan\\Documents\\GitHub\\CoralNet-Toolbox\\data\\tator\\grid\\datasets\\dreiss-annotations-wscnms-dropcam',
 'c:\\Users\\jordan\\Documents\\GitHub\\CoralNet-Toolbox\\data\\tator\\grid\\datasets\\dreiss-annotations-wscnms-usgs-auv',
 'c:\\Users\\jordan\\Documents\\GitHub\\CoralNet-Toolbox\\data\\tator\\grid\\datasets\\hard-mud-annotations-wscnms-dropcam',
 'c:\\Users\\jordan\\Documents\\GitHub\\CoralNet-Toolbox\\data\\tator\\grid\\datasets\\hard-mud-annotations-wscnms-usgs-auv',
 'c:\\Users\\jordan\\Documents\\GitHub\\CoralNet-Toolbox\\data\\tator\\grid\\datasets\\sand-annotations-wscnms-dropcam',
 'c:\\Users\\jordan\\Documents\\GitHub\\CoralNet-Toolbox\\data\\tator\\grid\\datasets\\sand-annotations-wscnms-usgs-auv',
 'c:\\Users\\jordan\\Documents\\GitHub\\CoralNet-Toolbox\\data\\tator\\grid\\datasets\\sav-annotations-wscnms-dropcam',
 'c:\\Users\\jordan\\Documents\\GitHub\\CoralNet-Toolbox\\data\\tator\\grid\\datasets\\sav-annotations-wscnms-usgs-auv',
 'c:\\Users\\jorda

In [9]:
ignore_labels = ["Substrate 1", "Other benthic communities", "Unknown", "UnknownFineSediment"]

for area_factor in [0.001, 0.005, 0.01, 0.02]:  # 0.1%, 0.5%, 1%, 2%
    cell_data_auv = []
    cell_data_dropcam = []

    for dataset_folder in datasets_folders:
        dataset = dataset_folder
        images = glob.glob(os.path.join(dataset, "*.png"))
        json_files = glob.glob(os.path.join(dataset, "*.json"))

        for json_file in json_files:
            with open(json_file, 'r') as f:
                d = json.load(f)
                json_data = [d]
                
            image_name = os.path.basename(json_file).split("_version")[0] + ".png"
            image_path = os.path.join(dataset, image_name)
            
            with Image.open(image_path) as img:
                width, height = img.size
                area = width * height
                patch_area = area * area_factor
                patch_size = min(int(np.sqrt(patch_area)), 336)  # Cap patch size at 336 pixels
                
            for j in json_data:
                keys = j['grid'].keys()
                for k in keys:
                    d = j['grid'][k]
                    for sub_key in d.keys():
                        cell = d[sub_key]
                        if not len(cell['labels']):
                            continue
                        
                        label = cell['labels'][0]
                        
                        if label in ignore_labels:
                            continue
                        
                        top_left_x = cell['top_left_x']
                        top_left_y = cell['top_left_y']
                        width = cell['width']
                        height = cell['height']
                        
                        col = cell['center_x']
                        row = cell['center_y']
                        
                        # Split off the "_version*.json" part of the filename
                        image_name = os.path.basename(json_file).split("_version")[0] + ".png"
                        
                        if "auv" in dataset_folder.lower():
                            cell_data_auv.append([image_name, label, col, row, patch_size])
                        elif "dropcam" in dataset_folder.lower():
                            cell_data_dropcam.append([image_name, label, col, row, patch_size])

        df_auv = pd.DataFrame(cell_data_auv, columns=["Name", "Label", "Column", "Row", "Patch Size"])
        df_auv = df_auv.drop_duplicates()
        df_auv.to_csv(f"{datasets_dir}/auv_grid_annotations_{str(area_factor).split('.')[1]}.csv", index=False)

        df_dropcam = pd.DataFrame(cell_data_dropcam, columns=["Name", "Label", "Column", "Row", "Patch Size"])
        df_dropcam = df_dropcam.drop_duplicates()
        df_dropcam.to_csv(f"{datasets_dir}/dropcam_grid_annotations_{str(area_factor).split('.')[1]}.csv", index=False)
        
        df_combined = pd.concat([df_auv, df_dropcam], ignore_index=True)
        df_combined.to_csv(f"{datasets_dir}/combined_grid_annotations_{str(area_factor).split('.')[1]}.csv", index=False)