In [27]:
# =========================================================
# Cell 1: Import libraries and load cholesterol dataset
# =========================================================
import pandas as pd
import numpy as np

# Load file
file_path = "meal_to_yoga_top1_contrastive_Cholesterol.xlsx"
df = pd.read_excel(file_path)

# Keep relevant columns only
cols = [
    "meal_plan", "meal_day", "meal_time", "meal_name", "meal_description",
    "Calories (kcal)", "Zinc (mg)", "Copper (mg)", "Iron (mg)",
    "Chromium (µg)", "Potassium (mg)", "Phosphorus (mg)",
    "Vitamin_B1 (Thiamin, mg)", "Vitamin_B2 (Riboflavin, mg)",
    "Vitamin_B3 (Niacin, mg)", "Vitamin_B6 (mg)",
    "Vitamin_B9 (Folate, µg)", "Vitamin_B12 (µg)",
    "Selenium (µg)", "Magnesium (mg)"
]
df = df[cols]

print("✅ Dataset loaded successfully — shape:", df.shape)
df.head(3)


✅ Dataset loaded successfully — shape: (42, 20)


Unnamed: 0,meal_plan,meal_day,meal_time,meal_name,meal_description,Calories (kcal),Zinc (mg),Copper (mg),Iron (mg),Chromium (µg),Potassium (mg),Phosphorus (mg),"Vitamin_B1 (Thiamin, mg)","Vitamin_B2 (Riboflavin, mg)","Vitamin_B3 (Niacin, mg)",Vitamin_B6 (mg),"Vitamin_B9 (Folate, µg)",Vitamin_B12 (µg),Selenium (µg),Magnesium (mg)
0,Plan A,Monday,Morning,Spinach-mushroom omelette,Egg omelette with spinach and mushrooms.,664,2.54,0.363,5.079,9.18,1116.27,241.131,0.388,0.365,4.794,0.575,124.452,0.764,19.779,126.709
1,Plan A,Monday,Afternoon,Tofu stir-fry with brown rice,"Tofu and veggies stir-fried, served with brown...",784,3.003,0.426,5.65,10.983,1409.389,312.913,0.432,0.418,5.865,0.663,144.389,0.907,23.126,150.93
2,Plan A,Monday,Evening,Baked cod with sautéed spinach,Baked cod with garlic sautéed spinach.,660,2.519,0.363,4.759,9.821,1166.577,254.892,0.357,0.351,5.196,0.542,126.497,0.735,19.778,121.749


In [28]:
# =========================================================
# Cell 2: Standardize column names and meal_day order
# =========================================================
df.columns = (
    df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_")
    .str.replace("(", "")
    .str.replace(")", "")
    .str.replace("+", "plus")
    .str.replace("µ", "u")
    .str.replace("-", "_")
    .str.replace(",", "")
)

weekday_order = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
df["meal_day"] = df["meal_day"].astype(str).str.strip().str.lower()
df["meal_day"] = pd.Categorical(df["meal_day"], categories=weekday_order, ordered=True)

print("✅ Column names cleaned and weekdays standardized:")
print(df.columns.tolist())
df.head(3)


✅ Column names cleaned and weekdays standardized:
['meal_plan', 'meal_day', 'meal_time', 'meal_name', 'meal_description', 'calories_kcal', 'zinc_mg', 'copper_mg', 'iron_mg', 'chromium_ug', 'potassium_mg', 'phosphorus_mg', 'vitamin_b1_thiamin_mg', 'vitamin_b2_riboflavin_mg', 'vitamin_b3_niacin_mg', 'vitamin_b6_mg', 'vitamin_b9_folate_ug', 'vitamin_b12_ug', 'selenium_ug', 'magnesium_mg']


Unnamed: 0,meal_plan,meal_day,meal_time,meal_name,meal_description,calories_kcal,zinc_mg,copper_mg,iron_mg,chromium_ug,potassium_mg,phosphorus_mg,vitamin_b1_thiamin_mg,vitamin_b2_riboflavin_mg,vitamin_b3_niacin_mg,vitamin_b6_mg,vitamin_b9_folate_ug,vitamin_b12_ug,selenium_ug,magnesium_mg
0,Plan A,monday,Morning,Spinach-mushroom omelette,Egg omelette with spinach and mushrooms.,664,2.54,0.363,5.079,9.18,1116.27,241.131,0.388,0.365,4.794,0.575,124.452,0.764,19.779,126.709
1,Plan A,monday,Afternoon,Tofu stir-fry with brown rice,"Tofu and veggies stir-fried, served with brown...",784,3.003,0.426,5.65,10.983,1409.389,312.913,0.432,0.418,5.865,0.663,144.389,0.907,23.126,150.93
2,Plan A,monday,Evening,Baked cod with sautéed spinach,Baked cod with garlic sautéed spinach.,660,2.519,0.363,4.759,9.821,1166.577,254.892,0.357,0.351,5.196,0.542,126.497,0.735,19.778,121.749


In [29]:
# =========================================================
# Cell 3: Define nutrient reference ranges (from WHO / NIH / DRI)
# =========================================================
nutrient_ranges = {
    "calories_kcal": (1800, 2200),
    "zinc_mg": (8, 11),
    "copper_mg": (0.9, 1.3),
    "iron_mg": (8, 18),
    "chromium_ug": (25, 35),
    "potassium_mg": (3400, 4700),
    "phosphorus_mg": (700, 1250),
    "vitamin_b1_thiamin_mg": (1.1, 1.2),
    "vitamin_b2_riboflavin_mg": (1.1, 1.3),
    "vitamin_b3_niacin_mg": (14, 35),
    "vitamin_b6_mg": (1.3, 2.0),
    "vitamin_b9_folate_ug": (300, 400),
    "vitamin_b12_ug": (2.4, 4.0),
    "selenium_ug": (55, 70),
    "magnesium_mg": (310, 420)
}

ranges_df = pd.DataFrame([
    {"Nutrient": k, "Min_Recommended": v[0], "Max_Recommended": v[1]}
    for k, v in nutrient_ranges.items()
])

print("✅ Real-world nutrient ranges for cholesterol plan:")
display(ranges_df)


✅ Real-world nutrient ranges for cholesterol plan:


Unnamed: 0,Nutrient,Min_Recommended,Max_Recommended
0,calories_kcal,1800.0,2200.0
1,zinc_mg,8.0,11.0
2,copper_mg,0.9,1.3
3,iron_mg,8.0,18.0
4,chromium_ug,25.0,35.0
5,potassium_mg,3400.0,4700.0
6,phosphorus_mg,700.0,1250.0
7,vitamin_b1_thiamin_mg,1.1,1.2
8,vitamin_b2_riboflavin_mg,1.1,1.3
9,vitamin_b3_niacin_mg,14.0,35.0


In [30]:
# =========================================================
# Cell 4: Map dataset columns to nutrient names manually (explicit match)
# =========================================================
col_map = {
    "calories_kcal": "calories_kcal",
    "zinc_mg": "zinc_mg",
    "copper_mg": "copper_mg",
    "iron_mg": "iron_mg",
    "chromium_ug": "chromium_ug",
    "potassium_mg": "potassium_mg",
    "phosphorus_mg": "phosphorus_mg",
    "vitamin_b1_thiamin_mg": "vitamin_b1_thiamin_mg",
    "vitamin_b2_riboflavin_mg": "vitamin_b2_riboflavin_mg",
    "vitamin_b3_niacin_mg": "vitamin_b3_niacin_mg",
    "vitamin_b6_mg": "vitamin_b6_mg",
    "vitamin_b9_folate_ug": "vitamin_b9_folate_ug",
    "vitamin_b12_ug": "vitamin_b12_ug",
    "selenium_ug": "selenium_ug",
    "magnesium_mg": "magnesium_mg"
}

# Convert to numeric
for col in col_map.values():
    df[col] = pd.to_numeric(df[col], errors="coerce")

print("✅ Nutrient column mapping validated:")
for k, v in col_map.items():
    print(f"{k:25} --> {v}")


✅ Nutrient column mapping validated:
calories_kcal             --> calories_kcal
zinc_mg                   --> zinc_mg
copper_mg                 --> copper_mg
iron_mg                   --> iron_mg
chromium_ug               --> chromium_ug
potassium_mg              --> potassium_mg
phosphorus_mg             --> phosphorus_mg
vitamin_b1_thiamin_mg     --> vitamin_b1_thiamin_mg
vitamin_b2_riboflavin_mg  --> vitamin_b2_riboflavin_mg
vitamin_b3_niacin_mg      --> vitamin_b3_niacin_mg
vitamin_b6_mg             --> vitamin_b6_mg
vitamin_b9_folate_ug      --> vitamin_b9_folate_ug
vitamin_b12_ug            --> vitamin_b12_ug
selenium_ug               --> selenium_ug
magnesium_mg              --> magnesium_mg


In [31]:
# =========================================================
# Cell 5: Aggregate nutrient totals per day and plan
# =========================================================
agg_cols = list(col_map.values())
daily = df.groupby(["meal_plan", "meal_day"], as_index=False)[agg_cols].sum()

print("✅ Aggregated daily nutrient totals:")
display(daily.head())


✅ Aggregated daily nutrient totals:


  daily = df.groupby(["meal_plan", "meal_day"], as_index=False)[agg_cols].sum()


Unnamed: 0,meal_plan,meal_day,calories_kcal,zinc_mg,copper_mg,iron_mg,chromium_ug,potassium_mg,phosphorus_mg,vitamin_b1_thiamin_mg,vitamin_b2_riboflavin_mg,vitamin_b3_niacin_mg,vitamin_b6_mg,vitamin_b9_folate_ug,vitamin_b12_ug,selenium_ug,magnesium_mg
0,Plan A,monday,2108,8.062,1.152,15.488,29.984,3692.236,808.936,1.177,1.134,15.855,1.78,395.338,2.406,62.683,399.388
1,Plan A,tuesday,2109,8.12,1.072,11.149,31.365,3850.251,723.704,1.188,1.252,32.441,1.591,360.558,3.222,63.968,338.844
2,Plan A,wednesday,2169,10.776,1.126,13.335,25.149,4671.269,1015.166,1.179,1.211,32.424,1.709,370.885,2.638,61.427,386.327
3,Plan A,thursday,2094,9.523,1.158,10.132,33.957,4655.73,874.352,1.186,1.162,14.531,1.333,318.463,2.509,58.861,410.494
4,Plan A,friday,2107,8.963,1.155,8.092,30.435,3839.081,1193.913,1.194,1.176,22.318,1.928,305.882,3.049,56.952,319.567


In [32]:
# =========================================================
# Cell 6: Display generated vs recommended nutrient values
# =========================================================
plan_example = daily[daily["meal_plan"].str.contains("a", case=False)].head(3)
compare_table = []

for nutrient, (low, high) in nutrient_ranges.items():
    col = col_map[nutrient]
    for _, r in plan_example.iterrows():
        val = round(float(r[col]), 2)
        compare_table.append({
            "Meal_Plan": r["meal_plan"],
            "Meal_Day": r["meal_day"],
            "Nutrient": nutrient,
            "Generated_Value": val,
            "Target_Range": f"{low} - {high}"
        })

compare_df = pd.DataFrame(compare_table)
display(compare_df)


Unnamed: 0,Meal_Plan,Meal_Day,Nutrient,Generated_Value,Target_Range
0,Plan A,monday,calories_kcal,2108.0,1800 - 2200
1,Plan A,tuesday,calories_kcal,2109.0,1800 - 2200
2,Plan A,wednesday,calories_kcal,2169.0,1800 - 2200
3,Plan A,monday,zinc_mg,8.06,8 - 11
4,Plan A,tuesday,zinc_mg,8.12,8 - 11
5,Plan A,wednesday,zinc_mg,10.78,8 - 11
6,Plan A,monday,copper_mg,1.15,0.9 - 1.3
7,Plan A,tuesday,copper_mg,1.07,0.9 - 1.3
8,Plan A,wednesday,copper_mg,1.13,0.9 - 1.3
9,Plan A,monday,iron_mg,15.49,8 - 18


In [33]:
# =========================================================
# Cell 7: Compute strict and partial TRUE/FALSE for all nutrients
# =========================================================
def evaluate_compliance(value, low, high, tolerance=0.05):
    if pd.isna(value):
        return np.nan, np.nan
    strict = low <= value <= high
    partial = (low * (1 - tolerance)) <= value <= (high * (1 + tolerance))
    return strict, partial

records = []
for _, row in daily.iterrows():
    record = {"meal_plan": row["meal_plan"], "meal_day": row["meal_day"]}
    for nutrient, (low, high) in nutrient_ranges.items():
        col = col_map[nutrient]
        val = row[col]
        strict, partial = evaluate_compliance(val, low, high)
        record[f"{nutrient}_value"] = round(val, 2)
        record[f"{nutrient}_strict"] = strict
        record[f"{nutrient}_partial"] = partial
    records.append(record)

compliance_df = pd.DataFrame(records)
display(compliance_df.head())


Unnamed: 0,meal_plan,meal_day,calories_kcal_value,calories_kcal_strict,calories_kcal_partial,zinc_mg_value,zinc_mg_strict,zinc_mg_partial,copper_mg_value,copper_mg_strict,...,vitamin_b9_folate_ug_partial,vitamin_b12_ug_value,vitamin_b12_ug_strict,vitamin_b12_ug_partial,selenium_ug_value,selenium_ug_strict,selenium_ug_partial,magnesium_mg_value,magnesium_mg_strict,magnesium_mg_partial
0,Plan A,monday,2108,True,True,8.06,True,True,1.15,True,...,True,2.41,True,True,62.68,True,True,399.39,True,True
1,Plan A,tuesday,2109,True,True,8.12,True,True,1.07,True,...,True,3.22,True,True,63.97,True,True,338.84,True,True
2,Plan A,wednesday,2169,True,True,10.78,True,True,1.13,True,...,True,2.64,True,True,61.43,True,True,386.33,True,True
3,Plan A,thursday,2094,True,True,9.52,True,True,1.16,True,...,True,2.51,True,True,58.86,True,True,410.49,True,True
4,Plan A,friday,2107,True,True,8.96,True,True,1.16,True,...,True,3.05,True,True,56.95,True,True,319.57,True,True


In [34]:
# =========================================================
# Cell 8: Count nutrients within target range per day
# =========================================================
strict_cols = [c for c in compliance_df.columns if "_strict" in c]
partial_cols = [c for c in compliance_df.columns if "_partial" in c]

compliance_df["strict_count"] = compliance_df[strict_cols].sum(axis=1)
compliance_df["partial_count"] = compliance_df[partial_cols].sum(axis=1)
compliance_df["total_nutrients"] = len(strict_cols)

compliance_df["strict_compliance_%"] = round(
    compliance_df["strict_count"] / compliance_df["total_nutrients"] * 100, 2)
compliance_df["partial_compliance_%"] = round(
    compliance_df["partial_count"] / compliance_df["total_nutrients"] * 100, 2)

print("✅ Nutrient compliance count per plan/day:")
display(compliance_df[
    ["meal_plan", "meal_day", "strict_count", "partial_count",
     "total_nutrients", "strict_compliance_%", "partial_compliance_%"]
])


✅ Nutrient compliance count per plan/day:


Unnamed: 0,meal_plan,meal_day,strict_count,partial_count,total_nutrients,strict_compliance_%,partial_compliance_%
0,Plan A,monday,15,15,15,100.0,100.0
1,Plan A,tuesday,15,15,15,100.0,100.0
2,Plan A,wednesday,15,15,15,100.0,100.0
3,Plan A,thursday,15,15,15,100.0,100.0
4,Plan A,friday,15,15,15,100.0,100.0
5,Plan A,saturday,15,15,15,100.0,100.0
6,Plan A,sunday,15,15,15,100.0,100.0
7,Plan B,monday,15,15,15,100.0,100.0
8,Plan B,tuesday,15,15,15,100.0,100.0
9,Plan B,wednesday,15,15,15,100.0,100.0


In [35]:
# =========================================================
# Cell 9: Compute average compliance across days per plan
# =========================================================
summary = (
    compliance_df.groupby("meal_plan")[["strict_compliance_%", "partial_compliance_%"]]
    .mean()
    .reset_index()
)

summary.rename(columns={
    "strict_compliance_%": "Avg_Strict_Compliance(%)",
    "partial_compliance_%": "Avg_Partial_Compliance(%)"
}, inplace=True)

print("✅ Average compliance summary per plan:")
display(summary)


✅ Average compliance summary per plan:


Unnamed: 0,meal_plan,Avg_Strict_Compliance(%),Avg_Partial_Compliance(%)
0,Plan A,100.0,100.0
1,Plan B,100.0,100.0


In [36]:
# =========================================================
# Cell 10: Generate summary story for research writing
# =========================================================
stories = []
for _, row in summary.iterrows():
    plan = row["meal_plan"]
    s = row["Avg_Strict_Compliance(%)"]
    p = row["Avg_Partial_Compliance(%)"]

    story = (
        f"For {plan}, mean strict compliance was {s:.1f}% "
        f"and partial compliance {p:.1f}%. "
        f"This indicates that, on average, {s:.1f}% of the 14 evaluated nutrients "
        f"fell strictly within the reference range, "
        f"while {p:.1f}% were within ±5% of their target range."
    )
    stories.append(story)

print("🧾 Narrative Summary for Research Paper:\n")
for s in stories:
    print(s)


🧾 Narrative Summary for Research Paper:

For Plan A, mean strict compliance was 100.0% and partial compliance 100.0%. This indicates that, on average, 100.0% of the 14 evaluated nutrients fell strictly within the reference range, while 100.0% were within ±5% of their target range.
For Plan B, mean strict compliance was 100.0% and partial compliance 100.0%. This indicates that, on average, 100.0% of the 14 evaluated nutrients fell strictly within the reference range, while 100.0% were within ±5% of their target range.
