# Multi-dataset Download & Preprocessing for Food Panoptic Dataset
**Goal:** Download (when possible), convert, unify labels, and export a merged COCO Panoptic dataset from:
- FoodSeg103 (panoptic preferred)
- UECFOOD256 (instance / bbox)
- Food-101 (classification)
- Recipe1M+ (optional, used for image enrichment)

Outputs:
- `merged_panoptic/annotations/*.json` (COCO Panoptic)
- `merged_panoptic/panoptic_train2017/*.png` (panoptic segmentation PNGs)
- `merged_panoptic/images/*`
---

In [None]:
import os
import json
import shutil
from pathlib import Path
from tqdm import tqdm
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt

from pycocotools import mask as maskUtils

# panoptic API helper
from panopticapi.utils import rgb2id, id2rgb

# Paths - configure these to point to where you've downloaded/extracted datasets.
BASE_DIR = Path.cwd()
OUTPUT_DIR = BASE_DIR / "merged_panoptic"
OUTPUT_DIR.mkdir(exist_ok=True)

# Put local dataset root directories here (edit these paths)
DATA_ROOTS = {
    "foodseg103": "/path/to/FoodSeg103",   # expected: has segmentation masks or COCO-like annotations
    "uecfood256": "/path/to/UECFOOD256",   # expected: images + annotation (bboxes / masks)
    "food101": "/path/to/food-101",        # expected: images + labels (image-level)
    "recipe1m": "/path/to/Recipe1M",       # optional
}

# Output subfolders
IMAGES_OUT = OUTPUT_DIR / "images"
PANOPTIC_OUT = OUTPUT_DIR / "panoptic_train2017"
ANNOTATIONS_OUT = OUTPUT_DIR / "annotations"
for p in (IMAGES_OUT, PANOPTIC_OUT, ANNOTATIONS_OUT):
    p.mkdir(parents=True, exist_ok=True)

# useful counters
NEXT_IMAGE_ID = 1
NEXT_ANNOTATION_ID = 1
NEXT_SEGMENT_ID = 1
