In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%%bash
cd /content/drive/MyDrive/
mkdir -p vp-project-branch
cd vp-project-branch
git clone https://github.com/SJones339/FoodVision

In [None]:
!pip install ultralytics
!pip install datasets
!pip install roboflow
import os
import cv2
import numpy as np
from tqdm import tqdm
from datasets import load_dataset
from roboflow import Roboflow


In [None]:

TARGET_DIR = "/content/drive/MyDrive/vp-project-branch/FoodVision/data/foodseg_pp"

train_img_dir = f"{TARGET_DIR}/images/train"
train_lbl_dir = f"{TARGET_DIR}/labels/train"
val_img_dir   = f"{TARGET_DIR}/images/val"
val_lbl_dir   = f"{TARGET_DIR}/labels/val"

os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(train_lbl_dir, exist_ok=True)
os.makedirs(val_img_dir, exist_ok=True)
os.makedirs(val_lbl_dir, exist_ok=True)

# Load dataset 
ds = load_dataset("EduardoPacheco/FoodSeg103", trust_remote_code=True)

train_ds = ds["train"]
val_ds   = ds["validation"]  # dataset already has validation split

#Label -> YOLO polygons 
def convert_split(split, img_dir, lbl_dir):
    print("Detected mask key: label")

    for i, item in enumerate(tqdm(split)):
        img = np.array(item["image"])
        mask = np.array(item["label"])   # <-- Correct mask key
        H, W = mask.shape

        img_path = f"{img_dir}/{i}.jpg"
        cv2.imwrite(img_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
        label_path = f"{lbl_dir}/{i}.txt"
        with open(label_path, "w") as f:

            # classes present on image
            classes = np.unique(mask)
            classes = classes[classes != 0]   # remove background class 0

            for cls in classes:
                binmask = (mask == cls).astype(np.uint8)

                contours, _ = cv2.findContours(
                    binmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
                )

                for cnt in contours:
                    if len(cnt) < 3:
                        continue

                    poly = cnt.reshape(-1, 2).astype(float)
                    poly[:, 0] /= W
                    poly[:, 1] /= H

                    poly = poly.flatten().tolist()

                    line = f"{cls} " + " ".join([f"{p:.6f}" for p in poly]) + "\n"
                    f.write(line)

#convert both splits 
print("Converting training...")
convert_split(train_ds, train_img_dir, train_lbl_dir)

print("Converting validation...")
convert_split(val_ds, val_img_dir, val_lbl_dir)

print("Saved YOLO dataset to:", TARGET_DIR)
yaml_path = "/content/drive/MyDrive/vp-project-branch/FoodVision/data/foodseg_pp/data.yaml"

import yaml

data = {
    "path": "/content/drive/MyDrive/vp-project-branch/FoodVision/data/foodseg_pp",
    "train": "images/train",
    "val": "images/val",
    "test": "images/test",
    "nc": 104,
    "names": [f"class_{i}" for i in range(104)]
}

with open(yaml_path, "w") as f:
    yaml.dump(data, f, sort_keys=False)

print("Wrote corrected data.yaml")

In [None]:
# Load dataset from HuggingFace
ds = load_dataset("ethz/food101")
# Save to Google Drive
save_path = "/content/drive/MyDrive/vp-project-branch/FoodVision/datasets/food101"
ds.save_to_disk(save_path)

print("Saved to:", save_path)


In [None]:
rf = Roboflow(api_key="KEY")
project = rf.workspace("foodx251").project("foodx-251")
version = project.version(4)

dataset = version.download(
    model_format="folder",  # classification dataset â†’ use "folder" format :contentReference[oaicite:0]{index=0}
    location="/content/drive/MyDrive/vp-project-branch/FoodVision/datasets/foodx251"
)

print("Downloaded to:", dataset.location)