In [1]:
import os
import json
from sklearn.metrics import mean_absolute_error
import scipy.stats as st
import numpy as np

In [2]:
os.chdir("..")

## Importing the metadata

In [3]:
METADATA_PATH = "data/silver/metadata"

metadata = {}
for metadata_filename in os.listdir(METADATA_PATH):
    with open(f"{METADATA_PATH}/{metadata_filename}") as metadata_file:
        dish_metadata = json.load(metadata_file)
        metadata[dish_metadata["dish_id"]] = dish_metadata

## Importing the results

In [4]:
RESULTS_PATH = "results"

models_names = [
    "gemini",
    "gemini_split"
]

models_results = {}

for model_name in models_names:
    model_results_path = f"{RESULTS_PATH}/{model_name}"
    
    results = {}
    for result_filename in os.listdir(model_results_path):
        with open(f"{model_results_path}/{result_filename}") as result_file:
            result = json.load(result_file)
            results[result["dishId"]] = result
    
    models_results[model_name] = results

## Evaluating results

In [5]:
def confidence_interval(data, confidence=0.9, decimals=1):
    mean = np.mean(data)
    standard_error = st.sem(data, nan_policy="omit")
    
    z_score = st.norm.ppf((1 + confidence) / 2)
    margin_of_error = standard_error * z_score

    lower_bound = round(mean - margin_of_error, decimals)
    upper_bound = round(mean + margin_of_error, decimals)

    return lower_bound, upper_bound


In [6]:
CONFIDENCE_LEVELS = [0.85, 0.9, 0.95, 0.99]

In [7]:
for model_name, model_results in models_results.items():
    Y_test_carb = []
    Y_pred_carb = []

    Y_test_calories = []
    Y_pred_calories = []

    processing_times = []

    for dish_id, dish_metadata in metadata.items():
        if dish_id in model_results:
            result = model_results.get(dish_id)

            Y_test_carb.append(dish_metadata["total_carb"])
            Y_pred_carb.append(result["totalCarbohydrates"])

            Y_test_calories.append(dish_metadata["total_calories"])
            Y_pred_calories.append(result["totalCalories"])

            processing_times.append(result["elapsedTime"])

    mae_carb = mean_absolute_error(Y_test_carb, Y_pred_carb)
    residuals_carb = np.abs(np.array(Y_pred_carb) - np.array(Y_test_carb))
    sem_carb = st.sem(residuals_carb)

    confidence_intervals = []
    
    mae_calories = mean_absolute_error(Y_test_calories, Y_pred_calories)
    residuals_calories = np.abs(np.array(Y_pred_calories) - np.array(Y_test_calories))
    sem_calories = st.sem(residuals_calories)

    avg_time = np.mean(processing_times)

    print(f"{model_name.capitalize()}")
    print(f"-> MAE carbohydrates = {mae_carb:.1f} g (SEM = {sem_carb:.1f} g)")
    for confidence_level in CONFIDENCE_LEVELS:
        interval = confidence_interval(residuals_carb, confidence_level)
        print(f"-> CHO error CI {confidence_level * 100}%: {interval[0]} - {interval[1]} (g)")
    print(f"-> MAE calories = {mae_calories:.1f} kcal (SEM = {sem_calories:.1f} kcal)")
    for confidence_level in CONFIDENCE_LEVELS:
        interval = confidence_interval(residuals_calories, confidence_level)
        print(f"-> Calories error CI {confidence_level * 100}%: {interval[0]} - {interval[1]} (kcal)")
    print(f"-> Average processing time = {avg_time:.2f} s")
    print()

Gemini
-> MAE carbohydrates = 17.2 g (SEM = 0.8 g)
-> CHO error CI 85.0%: 16.0 - 18.4 (g)
-> CHO error CI 90.0%: 15.8 - 18.5 (g)
-> CHO error CI 95.0%: 15.6 - 18.8 (g)
-> CHO error CI 99.0%: 15.1 - 19.3 (g)
-> MAE calories = 113.6 kcal (SEM = 5.3 kcal)
-> Calories error CI 85.0%: 105.9 - 121.3 (kcal)
-> Calories error CI 90.0%: 104.8 - 122.4 (kcal)
-> Calories error CI 95.0%: 103.1 - 124.1 (kcal)
-> Calories error CI 99.0%: 99.8 - 127.4 (kcal)
-> Average processing time = 2.76 s

Gemini_split
-> MAE carbohydrates = 14.9 g (SEM = 1.2 g)
-> CHO error CI 85.0%: 13.1 - 16.7 (g)
-> CHO error CI 90.0%: 12.9 - 17.0 (g)
-> CHO error CI 95.0%: 12.5 - 17.4 (g)
-> CHO error CI 99.0%: 11.7 - 18.1 (g)
-> MAE calories = 113.5 kcal (SEM = 6.6 kcal)
-> Calories error CI 85.0%: 104.1 - 123.0 (kcal)
-> Calories error CI 90.0%: 102.7 - 124.3 (kcal)
-> Calories error CI 95.0%: 100.7 - 126.4 (kcal)
-> Calories error CI 99.0%: 96.6 - 130.4 (kcal)
-> Average processing time = 3.74 s

