In [None]:
import os
import json
import shutil
from tqdm import tqdm

# Path folder dan file JSON
json_paths = {
    "train": "../dataset/Trash Detection.v14i.coco/train/_annotations.coco.json",
    "val": "../dataset/Trash Detection.v14i.coco/valid/_annotations.coco.json",
    "test": "../dataset/Trash Detection.v14i.coco/test/_annotations.coco.json"
}

input_folders = {
    "train": "../dataset/Trash Detection.v14i.coco/train/",
    "val": "../dataset/Trash Detection.v14i.coco/valid/",
    "test": "../dataset/Trash Detection.v14i.coco/test/"
}

output_folders = {
    "train": "../dataset/Filtered/train/",
    "val": "../dataset/Filtered/valid/",
    "test": "../dataset/Filtered/test/"
}

# Kategori yang ingin dihapus
category_to_remove = "Waste"

# Fungsi untuk memproses setiap folder
def process_folder(json_path, input_folder, output_folder, category_to_remove):
    # Load JSON
    with open(json_path, 'r') as file:
        data = json.load(file)

    # Cari ID kategori yang ingin dihapus
    category_to_remove_id = None
    for category in data["categories"]:
        if category["name"] == category_to_remove:
            category_to_remove_id = category["id"]
            break

    if category_to_remove_id is None:
        print(f"Kategori '{category_to_remove}' tidak ditemukan di {json_path}.")
        return

    # Filter anotasi untuk menghilangkan kategori yang dihapus
    filtered_annotations = [
        ann for ann in data["annotations"] if ann["category_id"] != category_to_remove_id
    ]

    # Cari image_id yang masih memiliki anotasi
    valid_image_ids = {ann["image_id"] for ann in filtered_annotations}

    # Filter gambar berdasarkan image_id yang valid
    filtered_images = [
        img for img in data["images"] if img["id"] in valid_image_ids
    ]

    # Filter kategori (hilangkan kategori yang dihapus)
    filtered_categories = [
        cat for cat in data["categories"] if cat["id"] != category_to_remove_id
    ]

    # Update data JSON
    data["annotations"] = filtered_annotations
    data["images"] = filtered_images
    data["categories"] = filtered_categories

    # Simpan JSON baru
    os.makedirs(output_folder, exist_ok=True)
    output_json_path = os.path.join(output_folder, "_annotations.coco.json")
    with open(output_json_path, 'w') as file:
        json.dump(data, file, indent=4)

    # Salin gambar ke folder baru
    for image in tqdm(filtered_images, desc=f"Copying images for {os.path.basename(input_folder)}"):
        source_path = os.path.join(input_folder, image["file_name"])
        destination_path = os.path.join(output_folder, image["file_name"])
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        shutil.copy(source_path, destination_path)

    print(f"Proses selesai untuk {input_folder}. Hasil disimpan di {output_folder}.")

# Proses semua folder (train, val, test)
for split in ["train", "val", "test"]:
    process_folder(
        json_paths[split],
        input_folders[split],
        output_folders[split],
        category_to_remove
    )


Copying images for : 100%|██████████| 110/110 [00:00<00:00, 1777.45it/s]

Proses selesai untuk ../dataset/Trash Detection.v14i.coco/test/. Hasil disimpan di ../dataset/Filtered/test/.



