In [28]:
# !pip install pandas

In [27]:
import os
import json
import glob
from collections import defaultdict
import pandas as pd
import yaml

In [8]:
nutri_csv = "/home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/nutritionverse_dish_metadata3.csv"
df = pd.read_csv(nutri_csv)

In [9]:
# Merge Colum
food_cols = [f"food_item_type_{i}" for i in range(1, 8)]
all_food_items = pd.concat([df[col] for col in food_cols])

In [21]:
#Drop Nan 
unique_food_items = sorted(all_food_items.dropna().unique())

In [25]:
print(f"Jumlah food item unik: {len(unique_food_items)}")
print(unique_food_items[:45])  

Jumlah food item unik: 45
['asian-pear', 'captain-crunch-granola-bar', 'carrot', 'chicken-breast', 'chicken-leg', 'chicken-sandwich', 'chicken-wing', 'chocolate-granola-bar', 'corn', 'costco-california-sushi-roll-1', 'costco-cucumber-sushi-roll-1', 'costco-egg', 'costco-salad-sushi-roll-1', 'costco-shrimp-sushi-roll-1', 'costco-tuna-rice-ball', 'crispy-pork-rib', 'cucumber-piece', 'french-fry', 'half-bread-loaf', 'half-minced-shrimp', 'half-shrimp-salad-roll', 'hamburger', 'lamb-shank', 'lasagna', 'lobster', 'meatball', 'meatloaf', 'nature-valley-granola-bar', 'near-whole-chicken', 'plain-toast', 'pork-feet', 'red-apple', 'red-yellow-apple', 'rib', 'salad-beef-strip', 'salad-chicken-strip', 'salmon-nigiri', 'shrimp-nigiri', 'shrimp-salad-roll', 'stack-of-tofu-4pc', 'steak', 'steak-piece', 'toast-with-strawberry-jam', 'tuna-nigiri', 'veal-kebab-piece']


In [26]:
YOLO_TO_FOOD_GROUP = {
    # Fruits
    "apple": [f for f in unique_food_items if "apple" in f.lower() or "pear" in f.lower()],
    "banana": [f for f in unique_food_items if "banana" in f.lower()],

    # Sandwich / Bread-based
    "sandwich": [f for f in unique_food_items if "sandwich" in f.lower() or "hamburger" in f.lower()],
    "bread": [f for f in unique_food_items if "bread" in f.lower() or "toast" in f.lower()],

    # Vegetables
    "broccoli": [f for f in unique_food_items if "broccoli" in f.lower() or "capcay" in f.lower() or "salad" in f.lower()],
    "carrot": [f for f in unique_food_items if "carrot" in f.lower() or "cucumber" in f.lower()],

    # Protein / Meat
    "chicken": [f for f in unique_food_items if "chicken" in f.lower() or "chicken-breast" in f.lower() or "chicken-leg" in f.lower() or "near-whole-chicken" in f.lower()],
    "beef": [f for f in unique_food_items if "steak" in f.lower() or "steak-piece" in f.lower() or "meatball" in f.lower() or "meatloaf" in f.lower() or "rib" in f.lower() or "veal" in f.lower() or "lamb-shank" in f.lower()],
    "pork": [f for f in unique_food_items if "pork" in f.lower() or "crispy-pork-rib" in f.lower()],

    # Seafood
    "seafood": [f for f in unique_food_items if "shrimp" in f.lower() or "lobster" in f.lower() or "salmon" in f.lower() or "tuna" in f.lower()],

    # Pizza / Baked
    "pizza": [f for f in unique_food_items if "pizza" in f.lower() or "lasagna" in f.lower()],
    "cake": [f for f in unique_food_items if "cake" in f.lower() or "donut" in f.lower() or "granola-bar" in f.lower()],

    # Others / Snacks
    "hotdog": [f for f in unique_food_items if "hot" in f.lower() or "sausage" in f.lower()],
    "tofu": [f for f in unique_food_items if "tofu" in f.lower()],

    # Others misc
    "egg": [f for f in unique_food_items if "egg" in f.lower()],
    "sushi": [f for f in unique_food_items if "sushi" in f.lower() or "nigiri" in f.lower()],
}

# Preview mapping
for k, v in YOLO_TO_FOOD_GROUP.items():
    print(k, "=>", v)


apple => ['asian-pear', 'red-apple', 'red-yellow-apple']
banana => []
sandwich => ['chicken-sandwich', 'hamburger']
bread => ['half-bread-loaf', 'plain-toast', 'toast-with-strawberry-jam']
broccoli => ['costco-salad-sushi-roll-1', 'half-shrimp-salad-roll', 'salad-beef-strip', 'salad-chicken-strip', 'shrimp-salad-roll']
carrot => ['carrot', 'costco-cucumber-sushi-roll-1', 'cucumber-piece']
chicken => ['chicken-breast', 'chicken-leg', 'chicken-sandwich', 'chicken-wing', 'near-whole-chicken', 'salad-chicken-strip']
beef => ['crispy-pork-rib', 'lamb-shank', 'meatball', 'meatloaf', 'rib', 'steak', 'steak-piece', 'veal-kebab-piece']
pork => ['crispy-pork-rib', 'pork-feet']
seafood => ['costco-shrimp-sushi-roll-1', 'costco-tuna-rice-ball', 'half-minced-shrimp', 'half-shrimp-salad-roll', 'lobster', 'salmon-nigiri', 'shrimp-nigiri', 'shrimp-salad-roll', 'tuna-nigiri']
pizza => ['lasagna']
cake => ['captain-crunch-granola-bar', 'chocolate-granola-bar', 'nature-valley-granola-bar']
hotdog => []
t

In [29]:
filtered_mapping = {k: v for k, v in YOLO_TO_FOOD_GROUP.items() if v}

DATASET_DIR = "/home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse" 
TRAIN_DIR = f"{DATASET_DIR}/train/images"
VAL_DIR = f"{DATASET_DIR}/valid/images"
TEST_DIR = f"{DATASET_DIR}/test/images"

# Buat dictionary data.yaml
data_yaml = {
    "train": TRAIN_DIR,
    "val": VAL_DIR,
    "test": TEST_DIR,
    "nc": len(filtered_mapping),
    "names": list(filtered_mapping.keys())
}

yaml_file = f"{DATASET_DIR}/data.yaml"
with open(yaml_file, "w") as f:
    yaml.dump(data_yaml, f, sort_keys=False)

print(f"✅ data.yaml berhasil dibuat di {yaml_file}")
print(data_yaml)

✅ data.yaml berhasil dibuat di /home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/data.yaml
{'train': '/home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/train/images', 'val': '/home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/valid/images', 'test': '/home/muliaandiki/project/NutriPlate/preprocessing/data/Nutritionverse/test/images', 'nc': 14, 'names': ['apple', 'sandwich', 'bread', 'broccoli', 'carrot', 'chicken', 'beef', 'pork', 'seafood', 'pizza', 'cake', 'tofu', 'egg', 'sushi']}
