In [1]:
# =========================================================
# Cell 1: Import libraries and load cholesterol dataset
# =========================================================
import pandas as pd
import numpy as np

# Load file
file_path = "/Users/kshitizsikriwal/Kshitiz/evaluation/Gemini/meal_to_yoga_top1_contrastive_Cholesterol (1).xlsx"
df = pd.read_excel(file_path)

# Keep relevant columns only
cols = [
    "meal_plan", "meal_day", "meal_time", "meal_name", "meal_description",
    "Calories (kcal)", "Zinc (mg)", "Copper (mg)", "Iron (mg)",
    "Chromium (µg)", "Potassium (mg)", "Phosphorus (mg)",
    "Vitamin_B1 (Thiamin, mg)", "Vitamin_B2 (Riboflavin, mg)",
    "Vitamin_B3 (Niacin, mg)", "Vitamin_B6 (mg)",
    "Vitamin_B9 (Folate, µg)", "Vitamin_B12 (µg)",
    "Selenium (µg)", "Magnesium (mg)"
]
df = df[cols]

print("✅ Dataset loaded successfully — shape:", df.shape)
df.head(3)


✅ Dataset loaded successfully — shape: (42, 20)


Unnamed: 0,meal_plan,meal_day,meal_time,meal_name,meal_description,Calories (kcal),Zinc (mg),Copper (mg),Iron (mg),Chromium (µg),Potassium (mg),Phosphorus (mg),"Vitamin_B1 (Thiamin, mg)","Vitamin_B2 (Riboflavin, mg)","Vitamin_B3 (Niacin, mg)",Vitamin_B6 (mg),"Vitamin_B9 (Folate, µg)",Vitamin_B12 (µg),Selenium (µg),Magnesium (mg)
0,Plan A,Monday,Morning,Oatmeal with Walnuts,"Steel-cut oats with walnuts, flaxseed, and ban...",500.0,2.5,0.4,2.5,8.0,900,250,0.3,0.2,2.0,0.4,80.0,0.1,15,110
1,Plan A,Monday,Afternoon,Spinach & Chicken Salad,"Large spinach salad with grilled chicken, chic...",700.0,3.0,0.3,5.5,10.0,1300,450,0.4,0.4,18.0,0.8,150.0,1.2,20,120
2,Plan A,Monday,Evening,Baked Salmon & Sweet Potato,Baked salmon with steamed broccoli and a mediu...,800.0,3.5,0.4,4.0,12.0,1400,500,0.5,0.5,15.0,0.7,100.0,1.3,25,100


In [2]:
# =========================================================
# Cell 2: Standardize column names and meal_day order
# =========================================================
df.columns = (
    df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_")
    .str.replace("(", "")
    .str.replace(")", "")
    .str.replace("+", "plus")
    .str.replace("µ", "u")
    .str.replace("-", "_")
    .str.replace(",", "")
)

weekday_order = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
df["meal_day"] = df["meal_day"].astype(str).str.strip().str.lower()
df["meal_day"] = pd.Categorical(df["meal_day"], categories=weekday_order, ordered=True)

print("✅ Column names cleaned and weekdays standardized:")
print(df.columns.tolist())
df.head(3)


✅ Column names cleaned and weekdays standardized:
['meal_plan', 'meal_day', 'meal_time', 'meal_name', 'meal_description', 'calories_kcal', 'zinc_mg', 'copper_mg', 'iron_mg', 'chromium_ug', 'potassium_mg', 'phosphorus_mg', 'vitamin_b1_thiamin_mg', 'vitamin_b2_riboflavin_mg', 'vitamin_b3_niacin_mg', 'vitamin_b6_mg', 'vitamin_b9_folate_ug', 'vitamin_b12_ug', 'selenium_ug', 'magnesium_mg']


Unnamed: 0,meal_plan,meal_day,meal_time,meal_name,meal_description,calories_kcal,zinc_mg,copper_mg,iron_mg,chromium_ug,potassium_mg,phosphorus_mg,vitamin_b1_thiamin_mg,vitamin_b2_riboflavin_mg,vitamin_b3_niacin_mg,vitamin_b6_mg,vitamin_b9_folate_ug,vitamin_b12_ug,selenium_ug,magnesium_mg
0,Plan A,monday,Morning,Oatmeal with Walnuts,"Steel-cut oats with walnuts, flaxseed, and ban...",500.0,2.5,0.4,2.5,8.0,900,250,0.3,0.2,2.0,0.4,80.0,0.1,15,110
1,Plan A,monday,Afternoon,Spinach & Chicken Salad,"Large spinach salad with grilled chicken, chic...",700.0,3.0,0.3,5.5,10.0,1300,450,0.4,0.4,18.0,0.8,150.0,1.2,20,120
2,Plan A,monday,Evening,Baked Salmon & Sweet Potato,Baked salmon with steamed broccoli and a mediu...,800.0,3.5,0.4,4.0,12.0,1400,500,0.5,0.5,15.0,0.7,100.0,1.3,25,100


In [3]:
# =========================================================
# Cell 3: Define nutrient reference ranges (from WHO / NIH / DRI)
# =========================================================
nutrient_ranges = {
    "calories_kcal": (1800, 2200),
    "zinc_mg": (8, 11),
    "copper_mg": (0.9, 1.3),
    "iron_mg": (8, 18),
    "chromium_ug": (25, 35),
    "potassium_mg": (3400, 4700),
    "phosphorus_mg": (700, 1250),
    "vitamin_b1_thiamin_mg": (1.1, 1.2),
    "vitamin_b2_riboflavin_mg": (1.1, 1.3),
    "vitamin_b3_niacin_mg": (14, 35),
    "vitamin_b6_mg": (1.3, 2.0),
    "vitamin_b9_folate_ug": (300, 400),
    "vitamin_b12_ug": (2.4, 4.0),
    "selenium_ug": (55, 70),
    "magnesium_mg": (310, 420)
}

ranges_df = pd.DataFrame([
    {"Nutrient": k, "Min_Recommended": v[0], "Max_Recommended": v[1]}
    for k, v in nutrient_ranges.items()
])

print("✅ Real-world nutrient ranges for cholesterol plan:")
display(ranges_df)


✅ Real-world nutrient ranges for cholesterol plan:


Unnamed: 0,Nutrient,Min_Recommended,Max_Recommended
0,calories_kcal,1800.0,2200.0
1,zinc_mg,8.0,11.0
2,copper_mg,0.9,1.3
3,iron_mg,8.0,18.0
4,chromium_ug,25.0,35.0
5,potassium_mg,3400.0,4700.0
6,phosphorus_mg,700.0,1250.0
7,vitamin_b1_thiamin_mg,1.1,1.2
8,vitamin_b2_riboflavin_mg,1.1,1.3
9,vitamin_b3_niacin_mg,14.0,35.0


In [4]:
# =========================================================
# Cell 4: Map dataset columns to nutrient names manually (explicit match)
# =========================================================
col_map = {
    "calories_kcal": "calories_kcal",
    "zinc_mg": "zinc_mg",
    "copper_mg": "copper_mg",
    "iron_mg": "iron_mg",
    "chromium_ug": "chromium_ug",
    "potassium_mg": "potassium_mg",
    "phosphorus_mg": "phosphorus_mg",
    "vitamin_b1_thiamin_mg": "vitamin_b1_thiamin_mg",
    "vitamin_b2_riboflavin_mg": "vitamin_b2_riboflavin_mg",
    "vitamin_b3_niacin_mg": "vitamin_b3_niacin_mg",
    "vitamin_b6_mg": "vitamin_b6_mg",
    "vitamin_b9_folate_ug": "vitamin_b9_folate_ug",
    "vitamin_b12_ug": "vitamin_b12_ug",
    "selenium_ug": "selenium_ug",
    "magnesium_mg": "magnesium_mg"
}

# Convert to numeric
for col in col_map.values():
    df[col] = pd.to_numeric(df[col], errors="coerce")

print("✅ Nutrient column mapping validated:")
for k, v in col_map.items():
    print(f"{k:25} --> {v}")


✅ Nutrient column mapping validated:
calories_kcal             --> calories_kcal
zinc_mg                   --> zinc_mg
copper_mg                 --> copper_mg
iron_mg                   --> iron_mg
chromium_ug               --> chromium_ug
potassium_mg              --> potassium_mg
phosphorus_mg             --> phosphorus_mg
vitamin_b1_thiamin_mg     --> vitamin_b1_thiamin_mg
vitamin_b2_riboflavin_mg  --> vitamin_b2_riboflavin_mg
vitamin_b3_niacin_mg      --> vitamin_b3_niacin_mg
vitamin_b6_mg             --> vitamin_b6_mg
vitamin_b9_folate_ug      --> vitamin_b9_folate_ug
vitamin_b12_ug            --> vitamin_b12_ug
selenium_ug               --> selenium_ug
magnesium_mg              --> magnesium_mg


In [5]:
# =========================================================
# Cell 5: Aggregate nutrient totals per day and plan
# =========================================================
agg_cols = list(col_map.values())
daily = df.groupby(["meal_plan", "meal_day"], as_index=False)[agg_cols].sum()

print("✅ Aggregated daily nutrient totals:")
display(daily.head())


✅ Aggregated daily nutrient totals:


  daily = df.groupby(["meal_plan", "meal_day"], as_index=False)[agg_cols].sum()


Unnamed: 0,meal_plan,meal_day,calories_kcal,zinc_mg,copper_mg,iron_mg,chromium_ug,potassium_mg,phosphorus_mg,vitamin_b1_thiamin_mg,vitamin_b2_riboflavin_mg,vitamin_b3_niacin_mg,vitamin_b6_mg,vitamin_b9_folate_ug,vitamin_b12_ug,selenium_ug,magnesium_mg
0,Plan A,monday,2000.0,9.0,1.1,12.0,30.0,3600,1200,1.2,1.1,35.0,1.9,330.0,2.6,60,330
1,Plan A,tuesday,2000.0,9.5,1.1,15.0,27.0,3600,1100,1.2,1.1,24.0,1.8,340.0,2.5,60,330
2,Plan A,wednesday,2000.0,8.5,1.1,10.0,25.0,3500,1200,1.2,1.1,25.0,1.7,330.0,2.6,60,330
3,Plan A,thursday,2000.0,10.0,1.1,12.0,28.0,3500,1200,1.2,1.4,33.0,1.8,310.0,4.6,65,320
4,Plan A,friday,1950.0,8.5,1.1,12.5,26.0,3700,1150,1.2,1.1,24.0,1.8,350.0,3.9,60,350


In [6]:
# =========================================================
# Cell 6: Display generated vs recommended nutrient values
# =========================================================
plan_example = daily[daily["meal_plan"].str.contains("a", case=False)].head(3)
compare_table = []

for nutrient, (low, high) in nutrient_ranges.items():
    col = col_map[nutrient]
    for _, r in plan_example.iterrows():
        val = round(float(r[col]), 2)
        compare_table.append({
            "Meal_Plan": r["meal_plan"],
            "Meal_Day": r["meal_day"],
            "Nutrient": nutrient,
            "Generated_Value": val,
            "Target_Range": f"{low} - {high}"
        })

compare_df = pd.DataFrame(compare_table)
display(compare_df)


Unnamed: 0,Meal_Plan,Meal_Day,Nutrient,Generated_Value,Target_Range
0,Plan A,monday,calories_kcal,2000.0,1800 - 2200
1,Plan A,tuesday,calories_kcal,2000.0,1800 - 2200
2,Plan A,wednesday,calories_kcal,2000.0,1800 - 2200
3,Plan A,monday,zinc_mg,9.0,8 - 11
4,Plan A,tuesday,zinc_mg,9.5,8 - 11
5,Plan A,wednesday,zinc_mg,8.5,8 - 11
6,Plan A,monday,copper_mg,1.1,0.9 - 1.3
7,Plan A,tuesday,copper_mg,1.1,0.9 - 1.3
8,Plan A,wednesday,copper_mg,1.1,0.9 - 1.3
9,Plan A,monday,iron_mg,12.0,8 - 18


In [7]:
# =========================================================
# Cell 7: Compute strict and partial TRUE/FALSE for all nutrients
# =========================================================
def evaluate_compliance(value, low, high, tolerance=0.05):
    if pd.isna(value):
        return np.nan, np.nan
    strict = low <= value <= high
    partial = (low * (1 - tolerance)) <= value <= (high * (1 + tolerance))
    return strict, partial

records = []
for _, row in daily.iterrows():
    record = {"meal_plan": row["meal_plan"], "meal_day": row["meal_day"]}
    for nutrient, (low, high) in nutrient_ranges.items():
        col = col_map[nutrient]
        val = row[col]
        strict, partial = evaluate_compliance(val, low, high)
        record[f"{nutrient}_value"] = round(val, 2)
        record[f"{nutrient}_strict"] = strict
        record[f"{nutrient}_partial"] = partial
    records.append(record)

compliance_df = pd.DataFrame(records)
display(compliance_df.head())


Unnamed: 0,meal_plan,meal_day,calories_kcal_value,calories_kcal_strict,calories_kcal_partial,zinc_mg_value,zinc_mg_strict,zinc_mg_partial,copper_mg_value,copper_mg_strict,...,vitamin_b9_folate_ug_partial,vitamin_b12_ug_value,vitamin_b12_ug_strict,vitamin_b12_ug_partial,selenium_ug_value,selenium_ug_strict,selenium_ug_partial,magnesium_mg_value,magnesium_mg_strict,magnesium_mg_partial
0,Plan A,monday,2000.0,True,True,9.0,True,True,1.1,True,...,True,2.6,True,True,60,True,True,330,True,True
1,Plan A,tuesday,2000.0,True,True,9.5,True,True,1.1,True,...,True,2.5,True,True,60,True,True,330,True,True
2,Plan A,wednesday,2000.0,True,True,8.5,True,True,1.1,True,...,True,2.6,True,True,60,True,True,330,True,True
3,Plan A,thursday,2000.0,True,True,10.0,True,True,1.1,True,...,True,4.6,False,False,65,True,True,320,True,True
4,Plan A,friday,1950.0,True,True,8.5,True,True,1.1,True,...,True,3.9,True,True,60,True,True,350,True,True


In [8]:
# =========================================================
# Cell 8: Count nutrients within target range per day
# =========================================================
strict_cols = [c for c in compliance_df.columns if "_strict" in c]
partial_cols = [c for c in compliance_df.columns if "_partial" in c]

compliance_df["strict_count"] = compliance_df[strict_cols].sum(axis=1)
compliance_df["partial_count"] = compliance_df[partial_cols].sum(axis=1)
compliance_df["total_nutrients"] = len(strict_cols)

compliance_df["strict_compliance_%"] = round(
    compliance_df["strict_count"] / compliance_df["total_nutrients"] * 100, 2)
compliance_df["partial_compliance_%"] = round(
    compliance_df["partial_count"] / compliance_df["total_nutrients"] * 100, 2)

print("✅ Nutrient compliance count per plan/day:")
display(compliance_df[
    ["meal_plan", "meal_day", "strict_count", "partial_count",
     "total_nutrients", "strict_compliance_%", "partial_compliance_%"]
])


✅ Nutrient compliance count per plan/day:


Unnamed: 0,meal_plan,meal_day,strict_count,partial_count,total_nutrients,strict_compliance_%,partial_compliance_%
0,Plan A,monday,15,15,15,100.0,100.0
1,Plan A,tuesday,15,15,15,100.0,100.0
2,Plan A,wednesday,15,15,15,100.0,100.0
3,Plan A,thursday,13,13,15,86.67,86.67
4,Plan A,friday,15,15,15,100.0,100.0
5,Plan A,saturday,12,13,15,80.0,86.67
6,Plan A,sunday,4,4,15,26.67,26.67
7,Plan B,monday,5,5,15,33.33,33.33
8,Plan B,tuesday,14,14,15,93.33,93.33
9,Plan B,wednesday,13,13,15,86.67,86.67


In [9]:
# =========================================================
# Cell 9: Compute average compliance across days per plan
# =========================================================
summary = (
    compliance_df.groupby("meal_plan")[["strict_compliance_%", "partial_compliance_%"]]
    .mean()
    .reset_index()
)

summary.rename(columns={
    "strict_compliance_%": "Avg_Strict_Compliance(%)",
    "partial_compliance_%": "Avg_Partial_Compliance(%)"
}, inplace=True)

print("✅ Average compliance summary per plan:")
display(summary)


✅ Average compliance summary per plan:


Unnamed: 0,meal_plan,Avg_Strict_Compliance(%),Avg_Partial_Compliance(%)
0,Plan A,84.762857,85.715714
1,Plan B,81.904286,84.761429


In [10]:
# =========================================================
# Cell 10: Generate summary story for research writing
# =========================================================
stories = []
for _, row in summary.iterrows():
    plan = row["meal_plan"]
    s = row["Avg_Strict_Compliance(%)"]
    p = row["Avg_Partial_Compliance(%)"]

    story = (
        f"For {plan}, mean strict compliance was {s:.1f}% "
        f"and partial compliance {p:.1f}%. "
        f"This indicates that, on average, {s:.1f}% of the 14 evaluated nutrients "
        f"fell strictly within the reference range, "
        f"while {p:.1f}% were within ±5% of their target range."
    )
    stories.append(story)

print("🧾 Narrative Summary for Research Paper:\n")
for s in stories:
    print(s)


🧾 Narrative Summary for Research Paper:

For Plan A, mean strict compliance was 84.8% and partial compliance 85.7%. This indicates that, on average, 84.8% of the 14 evaluated nutrients fell strictly within the reference range, while 85.7% were within ±5% of their target range.
For Plan B, mean strict compliance was 81.9% and partial compliance 84.8%. This indicates that, on average, 81.9% of the 14 evaluated nutrients fell strictly within the reference range, while 84.8% were within ±5% of their target range.
