In [None]:
# Loading dataset

import os
import csv
from datasets import load_dataset
from tqdm import tqdm
from PIL import Image

def parse_category_file(category_file_path):
    category_map = {}
    with open(category_file_path, 'r') as f:
        for line in f:
            parts = line.strip().split('\t', 1)
            if len(parts) == 2:
                class_id = int(parts[0])
                class_name = parts[1].strip()
                category_map[class_id] = class_name
    return category_map

def export_multilabel_dataset(dataset_name, category_file_path, output_root):
    ds = load_dataset(dataset_name)
    category_map = parse_category_file(category_file_path)

    for split in ds.keys():
        print(f"Exporting {split} split...")

        split_dir = os.path.join(output_root, split)
        os.makedirs(split_dir, exist_ok=True)

        labels_csv_path = os.path.join(split_dir, "labels.csv")
        with open(labels_csv_path, 'w', newline='') as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(['filename', 'labels'])

            for example in tqdm(ds[split], total=len(ds[split])):
                image = example['image']  # PIL Image
                img_id = example.get('image_id', None)
                if img_id is None:
                    img_id = example.get('id', None)
                if img_id is None:
                    img_id = str(tqdm.tqdm.format_meter(0,0,0))
                image_filename = f"{img_id}.jpg"
                image_path = os.path.join(split_dir, image_filename)

                image.save(image_path)

                class_ids = example['classes_on_image']
                labels = [category_map.get(cid, 'unknown') for cid in class_ids]

                csvwriter.writerow([image_filename, ",".join(labels)])

    print("Multi-label dataset export complete.")

category_file = r"C:\Users\Hithesh\Downloads\Kaush Stuff\FoodSeg103 Stuff\category_id.txt"
output_folder = './FoodSeg103_export'
export_multilabel_dataset("EduardoPacheco/FoodSeg103", category_file, output_folder)


Exporting train split...


100%|██████████| 4983/4983 [01:03<00:00, 78.94it/s] 


Exporting validation split...


100%|██████████| 2135/2135 [00:10<00:00, 198.35it/s]


Multi-label dataset export complete.
