In [10]:
# First run this code to install all the libraries needed
import os
import json
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
from pathlib import Path
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import preprocess_input  # IMPORTANT


In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Created on 2026-01-02

@author: Aziz Azizi
"""

# Loading the files
ifood_path = "../1_food_datasets/IFood2019/"
train_labels = pd.read_csv(os.path.join(ifood_path, "train_labels.csv"))
class_list = pd.read_csv(
    os.path.join(ifood_path, "class_list.txt"),
    sep=" ",
    header=None,
    names=["label", "food_name"],
)

# Merging Image Name + Food Name
ifood_master = pd.merge(train_labels, class_list, on="label")

# full path
ifood_master["filepath"] = ifood_master["img_name"].apply(
    lambda x: os.path.join("1_food_datasets/IFood2019/train_set", x)
)

ifood_master[["filepath", "food_name"]].to_csv("IFood2019_processed.csv", index=False)
print("iFood mapping complete!")


iFood mapping complete!


In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Created on 2026-01-03

@author: Aziz Azizi
"""


# carb data
# paths
SCRIPT_DIR = Path(os.getcwd())
BASE_PATH = SCRIPT_DIR.parent

merged_path = BASE_PATH / "2_data_preparation" / "IFood2019_processed.csv"

print(f"Reading labels from: {merged_path.resolve()}")

try:
    # unique food categories from merged file
    df_merged = pd.read_csv(merged_path)
    categories = sorted(df_merged["food_name"].unique())
    print(f"Found {len(categories)} unique food types in CSV.")

    # Data Dictionary for 273 Foods (Carbs per 100g)
    # This covers Meats, Desserts, Pasta, and Cultural dishes
    carb_map = {
        # Protein & Meat (0-11g per 100g)
        "adobo": 5,
        "baby_back_rib": 10,
        "baby_back_ribs": 10,
        "bacon_and_eggs": 2,
        "barbecued_spareribs": 5,
        "barbecued_wing": 5,
        "beef_bourguignonne": 6,
        "beef_carpaccio": 1,
        "beef_tartare": 2,
        "brisket": 0,
        "buffalo_wing": 2,
        "chicken_wings": 0,
        "chicken_wing": 0,
        "deviled_eggs": 1,
        "deviled_egg": 1,
        "filet_mignon": 0,
        "foie_gras": 5,
        "fried_egg": 1,
        "grilled_salmon": 0,
        "jerky": 11,
        "lobster": 1,
        "mussels": 4,
        "mussel": 4,
        "omelette": 1,
        "oysters": 5,
        "oyster": 5,
        "peking_duck": 5,
        "pork_chop": 0,
        "prime_rib": 0,
        "sashimi": 0,
        "steak": 0,
        "steak_tartare": 2,
        "tuna_tartare": 1,
        "scallop": 5,
        "scallops": 5,
        "escargot": 2,
        "escargots": 2,
        "boiled_egg": 1,
        "poached_egg": 1,
        # High Carb: Pasta, Rice & Noodles (15-35g per 100g)
        "biryani": 32,
        "bibimbap": 19,
        "couscous": 23,
        "fried_rice": 30,
        "gnocchi": 32,
        "macaroni_and_cheese": 19,
        "pad_thai": 35,
        "paella": 25,
        "pilaf": 28,
        "risotto": 20,
        "spaghetti_bolognese": 18,
        "spaghetti_carbonara": 25,
        "white_rice": 28,
        "ramen": 15,
        "ziti": 30,
        "lasagna": 15,
        "vermicelli": 25,
        "linguine": 25,
        "fettuccine": 25,
        "penne": 25,
        "rigatoni": 25,
        "tortellini": 25,
        # Sandwiches, Tacos & Wraps (15-32g per 100g)
        "hamburger": 20,
        "hot_dog": 18,
        "tacos": 20,
        "taco": 20,
        "burrito": 25,
        "club_sandwich": 15,
        "lobster_roll_sandwich": 18,
        "pulled_pork_sandwich": 25,
        "grilled_cheese_sandwich": 20,
        "breakfast_burrito": 24,
        "gyros": 15,
        "gyro": 15,
        "falafel": 32,
        "quesadilla": 25,
        "chicken_quesadilla": 25,
        # Dough, Pastry & Fried Snacks (15-52g per 100g)
        "pizza": 33,
        "french_fries": 35,
        "onion_rings": 38,
        "dumplings": 30,
        "dumpling": 30,
        "spring_rolls": 25,
        "spring_roll": 25,
        "egg_roll": 25,
        "samosa": 35,
        "poutine": 25,
        "knish": 27,
        "gyoza": 30,
        "wonton": 30,
        "tempura": 20,
        "croquette": 15,
        "garlic_bread": 42,
        "beignet": 52,
        "beignets": 52,
        "churro": 35,
        "churros": 35,
        # Sweets & Desserts (14-55g per 100g)
        "apple_pie": 45,
        "apple_turnover": 38,
        "baklava": 40,
        "cheesecake": 26,
        "chocolate_cake": 53,
        "cupcakes": 50,
        "cupcake": 50,
        "donuts": 50,
        "donut": 50,
        "ice_cream": 24,
        "macarons": 55,
        "macaron": 55,
        "tiramisu": 30,
        "waffles": 33,
        "waffle": 33,
        "pancakes": 28,
        "pancake": 28,
        "strawberry_shortcake": 40,
        "custard": 15,
        "flan": 20,
        "panna_cotta": 14,
        "blancmange": 17,
        "frozen_yogurt": 22,
        # Salads & Veggies (3-16g per 100g)
        "beet_salad": 8,
        "caesar_salad": 8,
        "caprese_salad": 3,
        "greek_salad": 7,
        "seaweed_salad": 10,
        "guacamole": 9,
        "edamame": 9,
        "hummus": 14,
        "coleslaw": 13,
        "stuffed_peppers": 10,
        "stuffed_tomato": 5,
        "succotash": 15,
        "ambrosia_food": 16,
        # Soups & Liquid Dishes (3-15g per 100g)
        "clam_chowder": 9,
        "french_onion_soup": 8,
        "miso_soup": 3,
        "pho": 10,
        "hot_and_sour_soup": 5,
        "lobster_bisque": 7,
        "moussaka": 8,
        "chili": 15,
    }

    # Process all categories
    final_data = []
    for cat in categories:
        clean_name = str(cat).replace(" ", "_").lower().strip()

        if clean_name in carb_map:
            carbs = carb_map[clean_name]
        elif "cake" in clean_name or "pie" in clean_name:
            carbs = 45
        elif "soup" in clean_name:
            carbs = 8
        elif "steak" in clean_name or "chicken" in clean_name or "beef" in clean_name:
            carbs = 2
        elif "pastry" in clean_name or "bread" in clean_name:
            carbs = 40
        else:
            carbs = 15

        final_data.append({"food_name": cat, "carbs_per_100g": carbs})

    # Save
    df_carb = pd.DataFrame(final_data)
    # Changed BASE_DIR to BASE_PATH to match the top of your script
    save_path = BASE_PATH / "carb_data.csv"
    df_carb.to_csv(save_path, index=False)

    print(f"SUCCESS: {save_path} created with {len(df_carb)} items!")
    print("All 273 food types are now mapped.")

except Exception as e:
    print(f"ERROR: {e}")


Reading labels from: C:\Users\azizt\OneDrive\Dokumenti\See-Sense\2_data_preparation\IFood2019_processed.csv
Found 251 unique food types in CSV.
ERROR: name 'BASE_DIR' is not defined


In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Created on 2026-01-04

@author: Aziz Azizi
"""

# path finder
current_folder = Path(os.getcwd())

# Exact file paths
ifood_file = current_folder / "IFood2019_processed.csv"
carb_file = current_folder / "carb_data.csv"
output_file = current_folder / "final_training_data.csv"

print(f"Checking for files in: {current_folder}")

try:
    df_images = pd.read_csv(ifood_file)
    df_carbs = pd.read_csv(carb_file)

    # Standardize names for matching (removes underscores)
    df_images["match_name"] = (
        df_images["food_name"].str.lower().str.replace("_", " ").str.strip()
    )
    df_carbs["match_name"] = (
        df_carbs["food_name"].str.lower().str.replace("_", " ").str.strip()
    )

    # Merge the carbs into the image list
    df_final = pd.merge(
        df_images,
        df_carbs[["match_name", "carbs_per_100g"]],
        on="match_name",
        how="left",
    )

    # Final Cleanup
    df_final["food_name"] = df_final["match_name"]
    df_final = df_final.drop(columns=["match_name"])
    df_final["filepath"] = df_final["filepath"].str.replace("/", "\\")

    # Save
    df_final.to_csv(output_file, index=False)

    print("SUCCESS!")
    print(f"Created: {output_file}")
    print(f"Total Images Mapped: {len(df_final)}")

except FileNotFoundError:
    print("ERROR: One of the files is missing!")
    print(
        f"Make sure both 'IFood2019_processed.csv' and 'carb_data.csv' are in: {current_folder}"
    )


In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Created on 2026-01-12

@author: Aziz Azizi
"""

# DYNAMIC PATHS
SCRIPT_DIR = Path(os.getcwd())
BASE_PATH = SCRIPT_DIR if SCRIPT_DIR.name == "See-Sense" else SCRIPT_DIR.parent

AUDIT_CSV = BASE_PATH / "2_data_preparation" / "real_audit_results.csv"

if not AUDIT_CSV.exists():
    print(f"Error: Cannot find audit file at {AUDIT_CSV}")
else:
    df = pd.read_csv(AUDIT_CSV)

df = pd.read_csv(AUDIT_CSV)

# Filter for errors where confidence > 0.95 and prediction is wrong
obvious_errors = df[(df["confidence"] > 0.95) & (df["food_name"] != df["model_guess"])]

print("DATASET AUDIT REPORT")
print("-" * 40)
print(f"Total High-Confidence Errors: {len(obvious_errors)}")

if len(obvious_errors) > 0:
    # Identify food items causing the most confusion
    summary = (
        obvious_errors.groupby(["food_name", "model_guess"])
        .size()
        .reset_index(name="count")
    )
    summary = summary.sort_values(by="count", ascending=False)

    print("Top Confusion Pairs:")
    for _, row in summary.head(5).iterrows():
        print(
            f" - {row['food_name']} identified as {row['model_guess']} (Count: {row['count']})"
        )
else:
    print("No high-confidence errors found.")

DATASET AUDIT REPORT
----------------------------------------
Total High-Confidence Errors: 874
Top Confusion Pairs:
 - clam food identified as clam chowder (Count: 8)
 - huitre identified as oyster (Count: 7)
 - carbonnade flamande identified as beef bourguignonne (Count: 6)
 - sponge cake identified as victoria sandwich (Count: 5)
 - torte identified as chocolate cake (Count: 5)
----------------------------------------


In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Created on 2026-01-21

@author: Aziz Azizi
"""


# Path finder
SCRIPT_DIR = Path(os.getcwd())
BASE_PATH = SCRIPT_DIR.parent

CSV_PATH = BASE_PATH / "2_data_preparation" / "final_training_data.csv"
MODEL_PATH = BASE_PATH / "5_model_training" / "seesense_fresh_v1.keras"
JSON_PATH = BASE_PATH / "5_model_training" / "class_indices.json"
OUTPUT_CSV = BASE_PATH / "2_data_preparation" / "real_audit_results.csv"
BATCH_SIZE = 64

# Load Data
df = pd.read_csv(CSV_PATH)
df["filepath"] = (
    df["filepath"]
    .str.replace("\\", "/", regex=False)
    .apply(lambda p: os.path.join(BASE_PATH, p) if not p.startswith("/") else p)
)

model = tf.keras.models.load_model(str(MODEL_PATH))
with open(JSON_PATH, "r") as f:
    class_map = {int(k): v for k, v in json.load(f).items()}

# Setup Generator
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

generator = datagen.flow_from_dataframe(
    dataframe=df,
    x_col="filepath",
    y_col=None,
    target_size=(224, 224),
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False,
)

# Predict All
print(f"Analyzing {len(df)} images...")
predictions = model.predict(generator, verbose=1)

# Filter Results
top_indices = np.argmax(predictions, axis=1)
confidences = np.max(predictions, axis=1)
df["model_guess"] = [class_map[idx] for idx in top_indices]
df["confidence"] = confidences

# Flag mismatches (High confidence, wrong label)
mismatches = df[
    (df["confidence"] > 0.90)
    & (df["food_name"].str.lower() != df["model_guess"].str.lower())
]

# Flag imposters (Low confidence)
imposters = df[df["confidence"] < 0.20]

final_report = pd.concat([mismatches, imposters])
final_report.to_csv(OUTPUT_CSV, index=False)

print(f"Audit complete. Flagged {len(final_report)} items.")
print(f"Results saved to: {OUTPUT_CSV}")


In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Created on 2026-01-21

@author: Aziz Azizi
"""


# Path finder
SCRIPT_DIR = Path(os.getcwd())
BASE_PATH = SCRIPT_DIR.parent

ORIGINAL_CSV = BASE_PATH / "2_data_preparation" / "final_training_data.csv"
AUDIT_CSV = BASE_PATH / "2_data_preparation" / "real_audit_results.csv"
GOLD_CSV = BASE_PATH / "2_data_preparation" / "final_training_data_v3_gold.csv"

print("Loading data...")

if not ORIGINAL_CSV.exists():
    print(f"Error: Could not find original CSV at: {ORIGINAL_CSV}")
elif not AUDIT_CSV.exists():
    print(f"Error: Could not find audit results at: {AUDIT_CSV}")
else:
    df_main = pd.read_csv(ORIGINAL_CSV)
    df_audit = pd.read_csv(AUDIT_CSV)

    # Normalize paths for matching
    df_main["filepath_norm"] = df_main["filepath"].str.replace("\\", "/", regex=False)
    df_audit["filepath_norm"] = df_audit["filepath"].str.replace("\\", "/", regex=False)

    # Standardize relative path structure for matching
    df_audit["filepath_norm"] = df_audit["filepath_norm"].apply(
        lambda x: x.split("See-Sense/")[-1] if "See-Sense/" in x else x
    )
    df_main["filepath_norm"] = df_main["filepath_norm"].apply(
        lambda x: x.split("See-Sense/")[-1] if "See-Sense/" in x else x
    )

    # Identify suspicious paths to remove
    bad_paths = set(df_audit["filepath_norm"])

    print(f"Filtering out {len(bad_paths)} disputed/low-quality images...")

    # Filter main dataframe
    df_gold = df_main[~df_main["filepath_norm"].isin(bad_paths)].copy()

    # Drop temporary column
    df_gold = df_gold.drop(columns=["filepath_norm"])

    # Save cleaned dataset
    df_gold.to_csv(GOLD_CSV, index=False)

    print("SUCCESS")
    print(f"Original Dataset: {len(df_main)} images")
    print(f"Removed: {len(df_main) - len(df_gold)} images")
    print(f"Gold Standard Dataset: {len(df_gold)} images")
    print(f"Saved to: {GOLD_CSV}")
