In [2]:
import os

import cv2
import pandas as pd

In [3]:
ingredients_path = "../data/nutrition5k_dataset_nosides/metadata/ingredients_metadata.csv"
proc_ingrediants_path = "../data/nutrition5k_dataset_nosides/processed/ingredients_metadata.csv"
dishes_info_path = "../data/nutrition5k_dataset_nosides/processed/dishes_info.csv"

dish_metadata_path = "../data/nutrition5k_dataset_nosides/metadata/dish_metadata_cafe1.csv"
dish_metadata2_path = "../data/nutrition5k_dataset_nosides/metadata/dish_metadata_cafe2.csv"
dish_ids_path = "../data/nutrition5k_dataset_nosides/dish_ids/dish_ids_all.txt"
img_dir = "../data/nutrition5k_dataset_nosides/imagery/realsense_overhead"

### Load the data


In [4]:
df = pd.read_csv(dish_metadata_path, header=None, names=['raw'], sep="|")
df2 = pd.read_csv(dish_metadata2_path, header=None, names=['raw'], sep="|")

df = pd.concat([df, df2])
raw_df = df['raw'].str.split(',', expand=True)

### Split the dataframe into 2 dataframes to work with


In [5]:
def parse_ingredient_id(name: str) -> int:
    return int(name.split('_')[1])

In [6]:
ingredient_infos = []
dish_infos = []

# Iterate over each row in the DataFrame
for index, row in raw_df.iterrows():
    dish_id = row[0]
    dish_img_path = os.path.join(img_dir, dish_id)

    if not os.path.exists(dish_img_path):
        print(f"not found ground truth: {dish_img_path}")
        continue

    dish_info = row[:6].values
    dish_infos.append(dish_info)

    # Extract the ingredient's columns
    num_ingredients = row.count() - 6  # Adjust based on actual initial fields and structure
    for i in range(num_ingredients // 7):

        # Extract the ingredient's columns
        ingr_start_col = 6 + i * 7
        ingr_end_col = ingr_start_col + 7
        ingr_data = row[ingr_start_col:ingr_end_col].values
        ingr_data[0] = parse_ingredient_id(ingr_data[0])
        # Prepend the dish_id to the ingredient_data
        ingr_data = [dish_id] + list(ingr_data)

        # Append to the list as a new row
        ingredient_infos.append(ingr_data)

# Define column names, including dish_id and the ingredient fields
ingredients_col_names = ['dish_id', 'ingredient_id', 'name', 'grams', 'calories', 'fat', 'carbohydrates', 'protein']
dishes_col_names = ['dish_id', 'calories', 'mass', 'fat', 'carbohydrates', 'protein']

# Convert the list of processed rows into a DataFrame
dish_ingredients_df = pd.DataFrame(ingredient_infos, columns=ingredients_col_names)
dish_info_df = pd.DataFrame(dish_infos, columns=dishes_col_names)

# save the dataframe to a csv file
dish_ingredients_df.to_csv(proc_ingrediants_path, index=False)
dish_info_df.to_csv(dishes_info_path, index=False)
dish_info_df

not found ground truth: ../data/nutrition5k_dataset_nosides/imagery/realsense_overhead/dish_1562688426
not found ground truth: ../data/nutrition5k_dataset_nosides/imagery/realsense_overhead/dish_1563379132
not found ground truth: ../data/nutrition5k_dataset_nosides/imagery/realsense_overhead/dish_1550795690
not found ground truth: ../data/nutrition5k_dataset_nosides/imagery/realsense_overhead/dish_1550876012
not found ground truth: ../data/nutrition5k_dataset_nosides/imagery/realsense_overhead/dish_1551565034
not found ground truth: ../data/nutrition5k_dataset_nosides/imagery/realsense_overhead/dish_1550860747
not found ground truth: ../data/nutrition5k_dataset_nosides/imagery/realsense_overhead/dish_1566245398
not found ground truth: ../data/nutrition5k_dataset_nosides/imagery/realsense_overhead/dish_1563381680
not found ground truth: ../data/nutrition5k_dataset_nosides/imagery/realsense_overhead/dish_1550778583
not found ground truth: ../data/nutrition5k_dataset_nosides/imagery/reals

Unnamed: 0,dish_id,calories,mass,fat,carbohydrates,protein
0,dish_1561662216,300.794281,193.000000,12.387489,28.218290,18.633970
1,dish_1561662054,419.438782,292.000000,23.838249,26.351543,25.910593
2,dish_1562008979,382.936646,290.000000,22.224644,10.173570,35.345387
3,dish_1560455030,20.590000,103.000000,0.148000,4.625000,0.956000
4,dish_1558372433,74.360001,143.000000,0.286000,0.429000,20.020000
...,...,...,...,...,...,...
3488,dish_1574455483,0.000000,168.000000,0.000000,0.000000,0.000000
3489,dish_1571934465,0.000000,232.000000,0.000000,0.000000,0.000000
3490,dish_1573073666,0.000000,15.000000,0.000000,0.000000,0.000000
3491,dish_1574359199,0.000000,329.000000,0.000000,0.000000,0.000000
