In [31]:
# =========================================================
# Cell 1: Setup environment and load dataset
# =========================================================
import pandas as pd
import numpy as np
from difflib import get_close_matches

# Load file
file_path = "meal_to_yoga_top1_contrastive_Asthma.xlsx"
df = pd.read_excel(file_path)

# Select relevant columns
cols = [
    "meal_plan", "meal_day", "meal_time", "meal_name", "meal_description",
    "Calories (kcal)", "Total_Fat (g)", "Protein (g)",
    "Dietary_Fiber (g)", "Added_Sugar (g)", "Vitamin_D (µg)",
    "Magnesium (mg)", "Selenium (µg)", "Omega-3 (ALA, g)", "EPA + DHA (mg)"
]
df = df[cols]

print("✅ Dataset loaded successfully — shape:", df.shape)
df.head()


✅ Dataset loaded successfully — shape: (42, 15)


Unnamed: 0,meal_plan,meal_day,meal_time,meal_name,meal_description,Calories (kcal),Total_Fat (g),Protein (g),Dietary_Fiber (g),Added_Sugar (g),Vitamin_D (µg),Magnesium (mg),Selenium (µg),"Omega-3 (ALA, g)",EPA + DHA (mg)
0,Plan A,Monday,Morning,Oatmeal & Berries,"Steel-cut oats with almonds, flaxseed, and mix...",450,15,15,10,5,0.0,100,10,0.8,0
1,Plan A,Monday,Afternoon,Chicken & Black Bean Salad,Grilled chicken and black bean salad with spin...,700,25,45,10,5,1.0,130,20,0.3,0
2,Plan A,Monday,Evening,Grilled Salmon & Quinoa,"Grilled salmon with lemon-dill, quinoa, and ro...",850,20,40,10,5,9.0,120,30,0.2,300
3,Plan A,Tuesday,Morning,Egg & Avocado Toast,"Scrambled eggs with spinach, served with avoca...",500,28,25,10,2,2.0,90,20,0.3,50
4,Plan A,Tuesday,Afternoon,Turkey & Bell Pepper Stir-fry,Lean turkey stir-fry with red and yellow bell ...,750,20,40,9,6,1.0,120,20,0.4,100


In [32]:
# =========================================================
# Cell 2: Standardize column names and day ordering
# =========================================================
df.columns = (
    df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_")
    .str.replace("(", "")
    .str.replace(")", "")
    .str.replace("+", "plus")
    .str.replace("µ", "u")
    .str.replace("-", "_")
)

weekday_order = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
df["meal_day"] = df["meal_day"].str.strip().str.lower()
df["meal_day"] = pd.Categorical(df["meal_day"], categories=weekday_order, ordered=True)

print("✅ Columns standardized and weekdays ordered.")
df.head(3)


✅ Columns standardized and weekdays ordered.


Unnamed: 0,meal_plan,meal_day,meal_time,meal_name,meal_description,calories_kcal,total_fat_g,protein_g,dietary_fiber_g,added_sugar_g,vitamin_d_ug,magnesium_mg,selenium_ug,"omega_3_ala,_g",epa_plus_dha_mg
0,Plan A,monday,Morning,Oatmeal & Berries,"Steel-cut oats with almonds, flaxseed, and mix...",450,15,15,10,5,0.0,100,10,0.8,0
1,Plan A,monday,Afternoon,Chicken & Black Bean Salad,Grilled chicken and black bean salad with spin...,700,25,45,10,5,1.0,130,20,0.3,0
2,Plan A,monday,Evening,Grilled Salmon & Quinoa,"Grilled salmon with lemon-dill, quinoa, and ro...",850,20,40,10,5,9.0,120,30,0.2,300


In [33]:
# =========================================================
# Cell 3: Define nutrient target ranges (from WHO / NIH / DRI)
# =========================================================
nutrient_ranges = {
    "calories_kcal": (1900, 2100),
    "total_fat_g": (44, 78),
    "protein_g": (50, 175),
    "dietary_fiber_g": (26, 30),
    "added_sugar_g": (0, 50),
    "omega_3_alag": (1.1, 1.6),
    "epa_plus_dha_mg": (250, 500),
    "selenium_ug": (52, 58),
    "magnesium_mg": (310, 420),
    "vitamin_d_ug": (5, 15)
}

ranges_df = pd.DataFrame([
    {"Nutrient": k, "Min_Recommended": v[0], "Max_Recommended": v[1]}
    for k, v in nutrient_ranges.items()
])

print("✅ Real-world nutrient ranges used for compliance:")
display(ranges_df)


✅ Real-world nutrient ranges used for compliance:


Unnamed: 0,Nutrient,Min_Recommended,Max_Recommended
0,calories_kcal,1900.0,2100.0
1,total_fat_g,44.0,78.0
2,protein_g,50.0,175.0
3,dietary_fiber_g,26.0,30.0
4,added_sugar_g,0.0,50.0
5,omega_3_alag,1.1,1.6
6,epa_plus_dha_mg,250.0,500.0
7,selenium_ug,52.0,58.0
8,magnesium_mg,310.0,420.0
9,vitamin_d_ug,5.0,15.0


In [34]:
# =========================================================
# Cell 4: Map dataset columns to nutrient keys dynamically
# =========================================================
col_map = {}
for key in nutrient_ranges.keys():
    best = get_close_matches(key, df.columns, n=1, cutoff=0.5)
    if best:
        col_map[key] = best[0]

print("✅ Column mapping established:")
for k, v in col_map.items():
    print(f"  {k:20} --> {v}")

# Convert to numeric
for col in col_map.values():
    df[col] = pd.to_numeric(df[col], errors="coerce")

df.head(3)


✅ Column mapping established:
  calories_kcal        --> calories_kcal
  total_fat_g          --> total_fat_g
  protein_g            --> protein_g
  dietary_fiber_g      --> dietary_fiber_g
  added_sugar_g        --> added_sugar_g
  omega_3_alag         --> omega_3_ala,_g
  epa_plus_dha_mg      --> epa_plus_dha_mg
  selenium_ug          --> selenium_ug
  magnesium_mg         --> magnesium_mg
  vitamin_d_ug         --> vitamin_d_ug


Unnamed: 0,meal_plan,meal_day,meal_time,meal_name,meal_description,calories_kcal,total_fat_g,protein_g,dietary_fiber_g,added_sugar_g,vitamin_d_ug,magnesium_mg,selenium_ug,"omega_3_ala,_g",epa_plus_dha_mg
0,Plan A,monday,Morning,Oatmeal & Berries,"Steel-cut oats with almonds, flaxseed, and mix...",450,15,15,10,5,0.0,100,10,0.8,0
1,Plan A,monday,Afternoon,Chicken & Black Bean Salad,Grilled chicken and black bean salad with spin...,700,25,45,10,5,1.0,130,20,0.3,0
2,Plan A,monday,Evening,Grilled Salmon & Quinoa,"Grilled salmon with lemon-dill, quinoa, and ro...",850,20,40,10,5,9.0,120,30,0.2,300


In [35]:
# =========================================================
# Cell 5: Aggregate nutrients by meal_plan and meal_day
# =========================================================
agg_cols = list(col_map.values())
daily = df.groupby(["meal_plan", "meal_day"], as_index=False)[agg_cols].sum()

print("✅ Aggregated nutrient totals per plan/day:")
display(daily.head())


✅ Aggregated nutrient totals per plan/day:


  daily = df.groupby(["meal_plan", "meal_day"], as_index=False)[agg_cols].sum()


Unnamed: 0,meal_plan,meal_day,calories_kcal,total_fat_g,protein_g,dietary_fiber_g,added_sugar_g,"omega_3_ala,_g",epa_plus_dha_mg,selenium_ug,magnesium_mg,vitamin_d_ug
0,Plan A,monday,2000,60,100,30,15,1.3,300,60,350,10.0
1,Plan A,tuesday,2000,68,110,29,12,1.3,300,60,320,5.0
2,Plan A,wednesday,1950,75,100,30,18,1.4,0,55,340,7.0
3,Plan A,thursday,1950,65,100,31,11,1.3,260,65,350,10.0
4,Plan A,friday,1900,60,115,30,17,1.3,300,55,330,6.0


In [37]:
# =========================================================
# Cell 6: Compare generated values vs real-world target ranges
# =========================================================
plan_example = daily[daily["meal_plan"].str.contains("", case=False)].head(3)
compare_table = []

for nutrient, (low, high) in nutrient_ranges.items():
    col = col_map[nutrient]
    for _, r in plan_example.iterrows():
        compare_table.append({
            "Meal_Plan": r["meal_plan"],
            "Meal_Day": r["meal_day"],
            "Nutrient": nutrient,
            "Generated_Value": round(r[col], 2),
            "Target_Range": f"{low} - {high}"
        })

compare_df = pd.DataFrame(compare_table)
display(compare_df)


Unnamed: 0,Meal_Plan,Meal_Day,Nutrient,Generated_Value,Target_Range
0,Plan A,monday,calories_kcal,2000.0,1900 - 2100
1,Plan A,tuesday,calories_kcal,2000.0,1900 - 2100
2,Plan A,wednesday,calories_kcal,1950.0,1900 - 2100
3,Plan A,monday,total_fat_g,60.0,44 - 78
4,Plan A,tuesday,total_fat_g,68.0,44 - 78
5,Plan A,wednesday,total_fat_g,75.0,44 - 78
6,Plan A,monday,protein_g,100.0,50 - 175
7,Plan A,tuesday,protein_g,110.0,50 - 175
8,Plan A,wednesday,protein_g,100.0,50 - 175
9,Plan A,monday,dietary_fiber_g,30.0,26 - 30
