# Examining the GINI Dataset
The GINI dataset (https://github.com/spotgarbage/spotgarbage-GINI) contains labeled garbage images used for training models in waste detection. For this project, adding more data will helps improve classification accuracy when transforming trash data into the Spare-it format.

In [None]:
import pandas as pd
import json
import os
from collections import defaultdict
import datetime


csv_file = 'garbage-queried-images.csv'  # Path to your CSV file
data = pd.read_csv(csv_file)

output_directory = 'transformed_spotgarbage_annotations'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

def transform_and_save(data):
    # Create a unique image_id to annotation map
    image_id_to_annotations = defaultdict(list)

    for _, row in data.iterrows():
        image_id = row['image']
        label = row['label']
        bbox = [row['startX'], row['startY'], row['endX'], row['endY']] if not pd.isna(row['startX']) else []

        # Create an annotation object for each row
        annotation = {
            'category_id': 1 if row['label'] == 1 else 0,
            'bbox': bbox,
            'area': (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) if bbox else 0,
            'iscrowd': 0,
            'segmentation': [],
            'id': len(image_id_to_annotations[image_id])  # Unique annotation ID
        }
        image_id_to_annotations[image_id].append(annotation)

    # Save the data in Spare-it format
    for image_id, annotations in image_id_to_annotations.items():
        spareit_json = {
            'info': {
                'description': 'Transformed SpotGarbage GINI annotation',
                'version': '1.0',
                'year': 2024,
                'contributor': 'User',
                'date_created': datetime.datetime.now().isoformat()
            },
            'licenses': [{
                'url': '',
                'id': 0,
                'name': ''
            }],
            'images': [{
                'license': 0,
                'url': '',
                'file_name': image_id,
                'height': 0,
                'width': 0,
                'date_captured': ''
            }],
            'annotations': annotations,
            'categories': [{
                'id': 1,
                'name': 'garbage',
                'supercategory': 'waste'
            }]
        }

        # Save each image's annotations as a separate JSON file
        file_name = os.path.join(output_directory, f'{image_id}.json')
        with open(file_name, 'w') as json_file:
            json.dump(spareit_json, json_file, indent=4)

# Transform and save the CSV data
transform_and_save(data)
