In [1]:
# =========================================================
# Cell 1: Setup environment and load dataset
# =========================================================
import pandas as pd
import numpy as np
from difflib import get_close_matches

# Load file
file_path = "/Users/kshitizsikriwal/Kshitiz/evaluation/GPT/meal_to_yoga_top1_contrastive_Asthma.xlsx"
df = pd.read_excel(file_path)

# Select relevant columns
cols = [
    "meal_plan", "meal_day", "meal_time", "meal_name", "meal_description",
    "Calories (kcal)", "Total_Fat (g)", "Protein (g)",
    "Dietary_Fiber (g)", "Added_Sugar (g)", "Vitamin_D (µg)",
    "Magnesium (mg)", "Selenium (µg)", "Omega-3 (ALA, g)", "EPA + DHA (mg)"
]
df = df[cols]

print("✅ Dataset loaded successfully — shape:", df.shape)
df.head()


✅ Dataset loaded successfully — shape: (42, 15)


Unnamed: 0,meal_plan,meal_day,meal_time,meal_name,meal_description,Calories (kcal),Total_Fat (g),Protein (g),Dietary_Fiber (g),Added_Sugar (g),Vitamin_D (µg),Magnesium (mg),Selenium (µg),"Omega-3 (ALA, g)",EPA + DHA (mg)
0,Plan A,Monday,Morning,Oatmeal with chia and berries,"Cooked oats with chia seeds and mixed berries,...",608.3,17.6,26.3,14.7,1.4,2.7,101.0,15.3,0.44,148.8
1,Plan A,Monday,Afternoon,Grilled salmon with quinoa and spinach,"Salmon fillet with quinoa, spinach, and a sque...",530.9,18.5,27.3,12.2,2.8,1.7,147.7,23.5,0.37,133.3
2,Plan A,Monday,Evening,Baked cod with roasted vegetables,"Oven-baked cod with roasted carrots, zucchini,...",553.9,22.7,38.5,13.4,1.7,1.8,145.5,21.8,0.36,174.8
3,Plan A,Tuesday,Morning,Oatmeal with chia and berries,"Cooked oats with chia seeds and mixed berries,...",641.9,15.2,24.4,9.2,1.7,3.0,133.4,20.4,0.38,154.8
4,Plan A,Tuesday,Afternoon,Chickpea salad with olive oil dressing,"Chickpeas, cucumbers, tomatoes, and olive oil ...",581.0,15.9,38.6,8.7,0.6,2.1,148.1,16.7,0.31,114.6


In [2]:
# =========================================================
# Cell 2: Standardize column names and day ordering
# =========================================================
df.columns = (
    df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_")
    .str.replace("(", "")
    .str.replace(")", "")
    .str.replace("+", "plus")
    .str.replace("µ", "u")
    .str.replace("-", "_")
)

weekday_order = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
df["meal_day"] = df["meal_day"].str.strip().str.lower()
df["meal_day"] = pd.Categorical(df["meal_day"], categories=weekday_order, ordered=True)

print("✅ Columns standardized and weekdays ordered.")
df.head(3)


✅ Columns standardized and weekdays ordered.


Unnamed: 0,meal_plan,meal_day,meal_time,meal_name,meal_description,calories_kcal,total_fat_g,protein_g,dietary_fiber_g,added_sugar_g,vitamin_d_ug,magnesium_mg,selenium_ug,"omega_3_ala,_g",epa_plus_dha_mg
0,Plan A,monday,Morning,Oatmeal with chia and berries,"Cooked oats with chia seeds and mixed berries,...",608.3,17.6,26.3,14.7,1.4,2.7,101.0,15.3,0.44,148.8
1,Plan A,monday,Afternoon,Grilled salmon with quinoa and spinach,"Salmon fillet with quinoa, spinach, and a sque...",530.9,18.5,27.3,12.2,2.8,1.7,147.7,23.5,0.37,133.3
2,Plan A,monday,Evening,Baked cod with roasted vegetables,"Oven-baked cod with roasted carrots, zucchini,...",553.9,22.7,38.5,13.4,1.7,1.8,145.5,21.8,0.36,174.8


In [4]:
# =========================================================
# Cell 3: Define nutrient target ranges (from WHO / NIH / DRI)
# =========================================================
nutrient_ranges = {
    "calories_kcal": (1900, 2100),
    "total_fat_g": (44, 78),
    "protein_g": (50, 175),
    "dietary_fiber_g": (26, 30),
    "added_sugar_g": (0, 50),
    "omega_3_alag": (1.1, 1.6),
    "epa_plus_dha_mg": (250, 500),
    "selenium_ug": (52, 58),
    "magnesium_mg": (310, 420),
    "vitamin_d_ug": (5, 15)
}

ranges_df = pd.DataFrame([
    {"Nutrient": k, "Min_Recommended": v[0], "Max_Recommended": v[1]}
    for k, v in nutrient_ranges.items()
])

print("✅ Real-world nutrient ranges used for compliance:")
display(ranges_df)


✅ Real-world nutrient ranges used for compliance:


Unnamed: 0,Nutrient,Min_Recommended,Max_Recommended
0,calories_kcal,1900.0,2100.0
1,total_fat_g,44.0,78.0
2,protein_g,50.0,175.0
3,dietary_fiber_g,26.0,30.0
4,added_sugar_g,0.0,50.0
5,omega_3_alag,1.1,1.6
6,epa_plus_dha_mg,250.0,500.0
7,selenium_ug,52.0,58.0
8,magnesium_mg,310.0,420.0
9,vitamin_d_ug,5.0,15.0


In [5]:
# =========================================================
# Cell 4: Map dataset columns to nutrient keys dynamically
# =========================================================
col_map = {}
for key in nutrient_ranges.keys():
    best = get_close_matches(key, df.columns, n=1, cutoff=0.5)
    if best:
        col_map[key] = best[0]

print("✅ Column mapping established:")
for k, v in col_map.items():
    print(f"  {k:20} --> {v}")

# Convert to numeric
for col in col_map.values():
    df[col] = pd.to_numeric(df[col], errors="coerce")

df.head(3)


✅ Column mapping established:
  calories_kcal        --> calories_kcal
  total_fat_g          --> total_fat_g
  protein_g            --> protein_g
  dietary_fiber_g      --> dietary_fiber_g
  added_sugar_g        --> added_sugar_g
  omega_3_alag         --> omega_3_ala,_g
  epa_plus_dha_mg      --> epa_plus_dha_mg
  selenium_ug          --> selenium_ug
  magnesium_mg         --> magnesium_mg
  vitamin_d_ug         --> vitamin_d_ug


Unnamed: 0,meal_plan,meal_day,meal_time,meal_name,meal_description,calories_kcal,total_fat_g,protein_g,dietary_fiber_g,added_sugar_g,vitamin_d_ug,magnesium_mg,selenium_ug,"omega_3_ala,_g",epa_plus_dha_mg
0,Plan A,monday,Morning,Oatmeal with chia and berries,"Cooked oats with chia seeds and mixed berries,...",608.3,17.6,26.3,14.7,1.4,2.7,101.0,15.3,0.44,148.8
1,Plan A,monday,Afternoon,Grilled salmon with quinoa and spinach,"Salmon fillet with quinoa, spinach, and a sque...",530.9,18.5,27.3,12.2,2.8,1.7,147.7,23.5,0.37,133.3
2,Plan A,monday,Evening,Baked cod with roasted vegetables,"Oven-baked cod with roasted carrots, zucchini,...",553.9,22.7,38.5,13.4,1.7,1.8,145.5,21.8,0.36,174.8


In [6]:
# =========================================================
# Cell 5: Aggregate nutrients by meal_plan and meal_day
# =========================================================
agg_cols = list(col_map.values())
daily = df.groupby(["meal_plan", "meal_day"], as_index=False)[agg_cols].sum()

print("✅ Aggregated nutrient totals per plan/day:")
display(daily.head())


✅ Aggregated nutrient totals per plan/day:


  daily = df.groupby(["meal_plan", "meal_day"], as_index=False)[agg_cols].sum()


Unnamed: 0,meal_plan,meal_day,calories_kcal,total_fat_g,protein_g,dietary_fiber_g,added_sugar_g,"omega_3_ala,_g",epa_plus_dha_mg,selenium_ug,magnesium_mg,vitamin_d_ug
0,Plan A,monday,1693.1,58.8,92.1,40.3,5.9,1.17,456.9,60.6,394.2,6.2
1,Plan A,tuesday,1971.8,56.0,100.3,29.8,2.4,1.01,398.9,53.9,412.8,6.8
2,Plan A,wednesday,1842.7,65.9,105.2,34.5,6.7,1.42,270.5,58.9,346.7,7.9
3,Plan A,thursday,1950.2,59.3,87.6,38.3,4.5,1.46,336.3,63.6,389.5,7.3
4,Plan A,friday,2004.8,60.1,96.8,34.2,4.8,1.24,372.4,60.5,349.4,8.8


In [7]:
# =========================================================
# Cell 6: Compare generated values vs real-world target ranges
# =========================================================
plan_example = daily[daily["meal_plan"].str.contains("a", case=False)].head(3)
compare_table = []

for nutrient, (low, high) in nutrient_ranges.items():
    col = col_map[nutrient]
    for _, r in plan_example.iterrows():
        compare_table.append({
            "Meal_Plan": r["meal_plan"],
            "Meal_Day": r["meal_day"],
            "Nutrient": nutrient,
            "Generated_Value": round(r[col], 2),
            "Target_Range": f"{low} - {high}"
        })

compare_df = pd.DataFrame(compare_table)
display(compare_df)


Unnamed: 0,Meal_Plan,Meal_Day,Nutrient,Generated_Value,Target_Range
0,Plan A,monday,calories_kcal,1693.1,1900 - 2100
1,Plan A,tuesday,calories_kcal,1971.8,1900 - 2100
2,Plan A,wednesday,calories_kcal,1842.7,1900 - 2100
3,Plan A,monday,total_fat_g,58.8,44 - 78
4,Plan A,tuesday,total_fat_g,56.0,44 - 78
5,Plan A,wednesday,total_fat_g,65.9,44 - 78
6,Plan A,monday,protein_g,92.1,50 - 175
7,Plan A,tuesday,protein_g,100.3,50 - 175
8,Plan A,wednesday,protein_g,105.2,50 - 175
9,Plan A,monday,dietary_fiber_g,40.3,26 - 30


In [8]:
# =========================================================
# Cell 7: Compliance flags for each nutrient
# =========================================================
def evaluate_compliance(value, low, high, tolerance=0.05):
    if pd.isna(value):
        return np.nan, np.nan
    strict = low <= value <= high
    partial = (low * (1 - tolerance)) <= value <= (high * (1 + tolerance))
    return strict, partial

records = []

for _, row in daily.iterrows():
    record = {"meal_plan": row["meal_plan"], "meal_day": row["meal_day"]}
    for nutrient, (low, high) in nutrient_ranges.items():
        col = col_map[nutrient]
        val = row[col]
        strict, partial = evaluate_compliance(val, low, high)
        record[f"{nutrient}_value"] = round(val, 2)
        record[f"{nutrient}_strict"] = strict
        record[f"{nutrient}_partial"] = partial
    records.append(record)

compliance_df = pd.DataFrame(records)
display(compliance_df.head())


Unnamed: 0,meal_plan,meal_day,calories_kcal_value,calories_kcal_strict,calories_kcal_partial,total_fat_g_value,total_fat_g_strict,total_fat_g_partial,protein_g_value,protein_g_strict,...,epa_plus_dha_mg_partial,selenium_ug_value,selenium_ug_strict,selenium_ug_partial,magnesium_mg_value,magnesium_mg_strict,magnesium_mg_partial,vitamin_d_ug_value,vitamin_d_ug_strict,vitamin_d_ug_partial
0,Plan A,monday,1693.1,False,False,58.8,True,True,92.1,True,...,True,60.6,False,True,394.2,True,True,6.2,True,True
1,Plan A,tuesday,1971.8,True,True,56.0,True,True,100.3,True,...,True,53.9,True,True,412.8,True,True,6.8,True,True
2,Plan A,wednesday,1842.7,False,True,65.9,True,True,105.2,True,...,True,58.9,False,True,346.7,True,True,7.9,True,True
3,Plan A,thursday,1950.2,True,True,59.3,True,True,87.6,True,...,True,63.6,False,False,389.5,True,True,7.3,True,True
4,Plan A,friday,2004.8,True,True,60.1,True,True,96.8,True,...,True,60.5,False,True,349.4,True,True,8.8,True,True


In [9]:
# =========================================================
# Cell 8: Count nutrients within range per plan/day
# =========================================================
strict_cols = [c for c in compliance_df.columns if "_strict" in c]
partial_cols = [c for c in compliance_df.columns if "_partial" in c]

compliance_df["strict_count"] = compliance_df[strict_cols].sum(axis=1)
compliance_df["partial_count"] = compliance_df[partial_cols].sum(axis=1)
compliance_df["total_nutrients"] = len(strict_cols)

compliance_df["strict_compliance_%"] = round(
    compliance_df["strict_count"] / compliance_df["total_nutrients"] * 100, 2)
compliance_df["partial_compliance_%"] = round(
    compliance_df["partial_count"] / compliance_df["total_nutrients"] * 100, 2)

print("✅ Nutrient compliance counts per plan/day:")
display(compliance_df[
    ["meal_plan", "meal_day", "strict_count", "partial_count",
     "total_nutrients", "strict_compliance_%", "partial_compliance_%"]
])


✅ Nutrient compliance counts per plan/day:


Unnamed: 0,meal_plan,meal_day,strict_count,partial_count,total_nutrients,strict_compliance_%,partial_compliance_%
0,Plan A,monday,7,8,10,70.0,80.0
1,Plan A,tuesday,9,9,10,90.0,90.0
2,Plan A,wednesday,7,9,10,70.0,90.0
3,Plan A,thursday,8,8,10,80.0,80.0
4,Plan A,friday,8,9,10,80.0,90.0
5,Plan A,saturday,7,9,10,70.0,90.0
6,Plan A,sunday,7,10,10,70.0,100.0
7,Plan B,monday,7,9,10,70.0,90.0
8,Plan B,tuesday,8,8,10,80.0,80.0
9,Plan B,wednesday,10,10,10,100.0,100.0


In [14]:
# =========================================================
# Cell 9: Average compliance per plan
# =========================================================
summary = (
    compliance_df.groupby("meal_plan")[["strict_compliance_%", "partial_compliance_%"]]
    .mean()
    .reset_index()
)

summary.rename(columns={
    "strict_compliance_%": "Avg_Strict_Compliance(%)",
    "partial_compliance_%": "Avg_Partial_Compliance(%)"
}, inplace=True)

print("✅ Average compliance per plan:")
display(summary)


✅ Average compliance per plan:


Unnamed: 0,meal_plan,Avg_Strict_Compliance(%),Avg_Partial_Compliance(%)
0,Plan A,75.714286,88.571429
1,Plan B,81.428571,88.571429


In [11]:
# =========================================================
# Cell 10: Generate narrative story for publication
# =========================================================
stories = []
for _, row in summary.iterrows():
    plan = row["meal_plan"]
    s = row["Avg_Strict_Compliance(%)"]
    p = row["Avg_Partial_Compliance(%)"]

    text = (
        f"For {plan}, mean strict compliance was {s:.1f}% and "
        f"partial compliance {p:.1f}%. "
        f"This indicates that on average, {s:.1f}% of the 10 evaluated nutrients "
        f"fell strictly within their recommended range, "
        f"and {p:.1f}% were within ±5% tolerance."
    )
    stories.append(text)

print("🧾 Narrative summary for results section:\n")
for story in stories:
    print(story)


🧾 Narrative summary for results section:

For Plan A, mean strict compliance was 75.7% and partial compliance 88.6%. This indicates that on average, 75.7% of the 10 evaluated nutrients fell strictly within their recommended range, and 88.6% were within ±5% tolerance.
For Plan B, mean strict compliance was 81.4% and partial compliance 88.6%. This indicates that on average, 81.4% of the 10 evaluated nutrients fell strictly within their recommended range, and 88.6% were within ±5% tolerance.
