In [1]:
import json
import csv
from pathlib import Path  # added

with open("Graffitti/train/_annotations.coco.json") as f:
    coco = json.load(f)

base_image_dir = Path("Graffitti/train")  # adjust if file_name already contains subfolders

# Your mapping dictionary (fill as per your plan)
category_mapping = {
    "Spitting": {
        "main_category": "Public Cleanliness & Public Property Damage",
        "sub_category": "Vandalism"
    },
    "-": {
        "main_category": "Public Cleanliness & Public Property Damage",
        "sub_category": "Vandalism"
    },
    "vandalism - v2 2024-08-07 9-32pm": {
        "main_category": "Public Cleanliness & Public Property Damage",
        "sub_category": "Vandalism"
    }
}

# Map category id to name for convenience
id_to_name = {cat["id"]: cat["name"] for cat in coco["categories"]}

# Prepare CSV file to write
with open("image_labels.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["image_file", "main_categories", "sub_categories"])

    for img in coco["images"]:
        image_id = img["id"]
        image_file = img["file_name"]

        abs_path = (base_image_dir / image_file).resolve()

        # Find all category ids in this image
        anns = [ann for ann in coco["annotations"] if ann["image_id"] == image_id]
        category_names = set(id_to_name[ann["category_id"]] for ann in anns)

        main_cats = set()
        sub_cats = set()

        for cname in category_names:
            if cname in category_mapping and category_mapping[cname]["main_category"]:
                main_cats.add(category_mapping[cname]["main_category"])
                sub_cats.add(category_mapping[cname]["sub_category"])

        # Join multiple categories with semicolon, or empty string if none
        main_cat_str = "; ".join(main_cats) if main_cats else ""
        sub_cat_str = "; ".join(sub_cats) if sub_cats else ""

        writer.writerow([str(abs_path), main_cat_str, sub_cat_str])

print("Done labeling images with main and sub categories.")


Done labeling images with main and sub categories.


In [2]:
import pandas as pd
graffitti = pd.read_csv("image_labels.csv")

In [3]:
graffitti

Unnamed: 0,image_file,main_categories,sub_categories
0,C:\Users\lchat\One Drive-UoM\OneDrive - Univer...,Public Cleanliness & Public Property Damage,Vandalism
1,C:\Users\lchat\One Drive-UoM\OneDrive - Univer...,Public Cleanliness & Public Property Damage,Vandalism
2,C:\Users\lchat\One Drive-UoM\OneDrive - Univer...,Public Cleanliness & Public Property Damage,Vandalism
3,C:\Users\lchat\One Drive-UoM\OneDrive - Univer...,Public Cleanliness & Public Property Damage,Vandalism
4,C:\Users\lchat\One Drive-UoM\OneDrive - Univer...,Public Cleanliness & Public Property Damage,Vandalism
...,...,...,...
1699,C:\Users\lchat\One Drive-UoM\OneDrive - Univer...,Public Cleanliness & Public Property Damage,Vandalism
1700,C:\Users\lchat\One Drive-UoM\OneDrive - Univer...,Public Cleanliness & Public Property Damage,Vandalism
1701,C:\Users\lchat\One Drive-UoM\OneDrive - Univer...,Public Cleanliness & Public Property Damage,Vandalism
1702,C:\Users\lchat\One Drive-UoM\OneDrive - Univer...,Public Cleanliness & Public Property Damage,Vandalism
