In [1]:
import joblib
import pandas as pd
import numpy as np

In [2]:
def bmi_class(weight,height):
    bmi = weight / ((height/100) ** 2)
    if bmi<18.5:
        bmi_class="Underweight"
    elif 18.5<=bmi<=24.9:
        bmi_class="Normal weight"
    elif 25<=bmi<=29.9:
        bmi_class="Overweight"
    elif bmi>=30:
        bmi_class="Obese"
    return bmi_class

In [3]:
# Remove non‑meal / junk foods – expanded blacklist
def remove_non_meal_foods(df):
    blacklist = [
        "salt", "pepper", "vinegar", "mustard", "spice", "seasoning", "extract",
        "essence", "vegetable oil", "coffee", "flavor", "stock", "sauce base",
        "burger", "pizza", "fries", "fried", "cake", "pastry", "chips", "cola",
        "soda", "chocolate", "ice cream", "candy", "biscuit", "donut", "noodle",
        "noodles", "instant noodles", "sausage", "salami", "nachos", "brownie",
        "milkshake", "muffin", "white bread", "cream", "mayonnaise", "sweet drink",
        "packaged juice", "cheese noodle ring", "bread sauce", "spinach burfi",
        "palak burfi", "sweet rice", "meethe chawal", "fruit punch","chutney","Fish in coconut milk (Nariyal ke doodh ke saath machli)"
    ]
    pattern = r'\b(?:' + '|'.join(blacklist) + r')\b'
    # Also remove very low‑calorie items (mostly drinks)
    df = df[~df["food"].str.lower().str.contains(pattern, na=False, regex=True)]
    df = df[df["Caloric Value"] > 20]
    return df


In [4]:
def classify_food(name):
    name = str(name).lower()
    # Fats / oils (should be limited)
    if any(x in name for x in ["oil", "butter", "ghee", "margarine"]):
        return "fat"
    # Lean proteins (prioritise for weight loss)
    if any(x in name for x in ["chicken breast", "fish", "egg white", "tofu", "paneer"]):
        return "protein_lean"
    # Fatty proteins (use in moderation)
    if any(x in name for x in ["chicken", "egg", "meat", "keema", "mutton"]):
        return "protein_fatty"
    # Complex carbohydrates (preferred)
    if any(x in name for x in ["brown rice", "roti", "chapati", "oat", "quinoa", "poha", "dalia", "whole wheat"]):
        return "carb_complex"
    # Simple carbohydrates (limit)
    if any(x in name for x in ["white rice", "bread", "pasta", "noodle", "maida", "sugar", "sweet"]):
        return "carb_simple"
    # Fruits
    if any(x in name for x in ["apple", "banana", "orange", "fruit", "berry", "mango", "grapes"]):
        return "fruit"
    # Vegetables
    if any(x in name for x in ["spinach", "broccoli", "carrot", "beans", "veg", "soup", "raita", "salad", "bhaji"]):
        return "vegetable"
    # Everything else (mixed dishes, etc.)
    return "other"



In [5]:
# Health score now depends on the user's goal (will be passed later)
def compute_health_score(row, goal="weight_loss"):
    protein = row.get("Protein", 0)
    fat = row.get("Fat", 0)
    sugar = row.get("Sugars", 0)
    fiber = row.get("Fiber", 0)
    
    if goal == "weight_loss":
        # Heavily penalise fat and sugar, reward protein and fiber
        return protein * 3 - fat * 2.5 - sugar * 1.5 + fiber * 1
    elif goal == "muscle_gain":
        # Reward protein, allow moderate fat, penalise excess sugar
        return protein * 4 - fat * 1.2 - sugar * 1 + fiber * 0.5
    else:  # maintenance
        return protein * 2 - fat * 1.5 - sugar * 1 + fiber * 0.8

# We'll compute a temporary health score for sorting later.
# The final health score will be recomputed inside diet_planner with the correct goal.
# For now, we use a default (maintenance) to have a column.


In [6]:
def calorie_split(total):
    """Breakfast 20%, Lunch 35%, Dinner 30%, Snacks 15%"""
    return {
        "breakfast": total * 0.20,
        "lunch":     total * 0.35,
        "dinner":    total * 0.30,
        "snacks":    total * 0.15
    }

def adjust_calories(base, goal):
    if goal == "weight_loss":
        return base * 0.8
    elif goal == "muscle_gain":
        return base * 1.15
    return base

In [7]:
def calculate_tdee(age, gender, weight_kg, height_cm, activity_level):
    """
    Mifflin-St Jeor equation.
    activity_level: 'sedentary', 'light', 'moderate', 'active', 'very_active'
    """
    if gender.lower() == 'male':
        bmr = 10 * weight_kg + 6.25 * height_cm - 5 * age + 5
    else:
        bmr = 10 * weight_kg + 6.25 * height_cm - 5 * age - 161

    multipliers = {
        'sedentary': 1.2,
        'light': 1.375,
        'moderate': 1.55,
        'active': 1.725,
        'very_active': 1.9
    }
    return bmr * multipliers.get(activity_level, 1.2)

In [8]:
def apply_diet_preference(df, preference):
    if preference.lower() == "vegetarian":
        nonveg = ["chicken", "fish", "egg", "meat", "beef", "pork"]
        pattern = '|'.join(nonveg)
        return df[~df["food"].str.contains(pattern, case=False, na=False)]
    return df

def medical_filter(df, conditions):
    if isinstance(conditions, str):
        conditions = [conditions]
    conditions = [c.lower() for c in conditions]
    df = df.copy()
    # Ensure all nutrient columns exist
    for col in ["Sugars", "Fat", "Carbohydrates", "Protein", "Fiber"]:
        if col not in df.columns:
            df[col] = 0
    if "diabetes" in conditions:
        df = df[(df["Sugars"] < 8) & (df["Carbohydrates"] < 35)]  # stricter limits
    if "heart" in conditions:
        df = df[df["Fat"] < 8]  # lower fat
    if "cholesterol" in conditions:
        df = df[df["Fat"] < 6]
    if "kidney" in conditions:
        df = df[df["Protein"] < 15]
    return df

def filter_diet(df, mode):
    # mode can be 'diabetic', 'weight_loss', 'high_protein', or 'normal'
    if mode == "diabetic":
        return df[df["Sugars"] < 8]
    elif mode == "high_protein":
        return df[df["Protein"] > 15]
    return df

In [9]:
# Allowed categories per meal (using the detailed categories from classify_food)
meal_allowed_categories = {
    "breakfast": ["protein_lean", "protein_fatty", "carb_complex", "carb_simple", "fruit", "vegetable"],
    "lunch":     ["protein_lean", "protein_fatty", "carb_complex", "carb_simple", "vegetable", "fruit"],
    "dinner":    ["protein_lean", "protein_fatty", "carb_complex", "carb_simple", "vegetable", "fruit"],
    "snacks":    ["fruit", "vegetable", "protein_lean", "carb_complex"]
}

def build_meal(df, target_cal, used_foods, meal_name, fat_cap_per_meal=None, goal="weight_loss"):
    """
    Builds one meal, aiming to hit the target calories while respecting the fat cap.
    Returns a DataFrame of selected foods.
    """
    meal_items = []
    total_cal = 0.0
    total_fat = 0.0
    total_protein = 0.0
    total_carbs = 0.0

    available = df[~df["food"].isin(used_foods)].copy()
    if available.empty:
        return pd.DataFrame()

    # Soft macro targets (used only for guidance, not hard limits)
    if goal == "weight_loss":
        protein_target = target_cal * 0.30 / 4
        fat_target     = target_cal * 0.25 / 9
        carb_target    = target_cal * 0.45 / 4
    elif goal == "muscle_gain":
        protein_target = target_cal * 0.35 / 4
        fat_target     = target_cal * 0.30 / 9
        carb_target    = target_cal * 0.35 / 4
    else:  # maintenance
        protein_target = target_cal * 0.20 / 4
        fat_target     = target_cal * 0.30 / 9
        carb_target    = target_cal * 0.50 / 4

    # Helper to check only the fat cap (strict)
    def would_exceed_fat_cap(food):
        if fat_cap_per_meal and (total_fat + food["Fat"] > fat_cap_per_meal):
            return True
        return False

    # ---- Category order based on meal type ----
    if meal_name == "breakfast":
        # Breakfast: carbs first, then fruits, then proteins
        category_order = ["carb_complex", "carb_simple", "fruit", "protein_lean", "protein_fatty", "vegetable", "other"]
    elif meal_name in ["lunch", "dinner"]:
        # Lunch/dinner: protein first, then carbs, then vegetables, then fruits
        category_order = ["protein_lean", "protein_fatty", "carb_complex", "carb_simple", "vegetable", "fruit", "other"]
    else:  # snacks
        category_order = ["fruit", "vegetable", "protein_lean", "carb_complex", "carb_simple", "other"]

    # ---- Phase 1: Pick one item from each priority category until we have a base ----
    for cat in category_order:
        if cat not in meal_allowed_categories[meal_name]:
            continue
        cat_df = available[available["category"] == cat]
        if cat_df.empty:
            continue

        # For the first item in a category, we want something reasonably sized
        # but not too huge (to leave room for variety)
        if cat.startswith("protein"):
            # Aim for ~30-40% of meal calories from protein
            ideal_cal = target_cal * 0.35
            cat_df = cat_df[(cat_df["Caloric Value"] > 50) & (cat_df["Caloric Value"] < 400)]
        elif cat.startswith("carb"):
            ideal_cal = target_cal * 0.4
            cat_df = cat_df[cat_df["Caloric Value"] < 400]
        elif cat == "vegetable":
            ideal_cal = target_cal * 0.15
            cat_df = cat_df[cat_df["Caloric Value"] < 200]
        elif cat == "fruit":
            ideal_cal = target_cal * 0.15
            cat_df = cat_df[cat_df["Caloric Value"] < 200]
        else:
            ideal_cal = target_cal * 0.2
            cat_df = cat_df[cat_df["Caloric Value"] < 300]

        if cat_df.empty:
            continue

        # Score: combination of closeness to ideal calories and health score
        cat_df = cat_df.copy()
        cat_df["cal_diff"] = abs(cat_df["Caloric Value"] - ideal_cal)
        # Normalise health score to 0-1 range (approx)
        max_hs = cat_df["health_score"].max()
        min_hs = cat_df["health_score"].min()
        if max_hs > min_hs:
            cat_df["hs_norm"] = (cat_df["health_score"] - min_hs) / (max_hs - min_hs)
        else:
            cat_df["hs_norm"] = 0.5
        # Combine (lower cal_diff is better, higher hs_norm is better)
        # We'll use a weighted rank: 70% weight on cal_diff, 30% on health score
        cat_df["combined_score"] = 0.7 * (cat_df["cal_diff"] / cat_df["cal_diff"].max()) - 0.3 * cat_df["hs_norm"]
        best = cat_df.nsmallest(3, "combined_score").sample(1).iloc[0]

        # Check fat cap
        if would_exceed_fat_cap(best):
            continue

        # Add the food
        meal_items.append(best)
        total_cal += best["Caloric Value"]
        total_fat += best["Fat"]
        total_protein += best["Protein"]
        total_carbs += best["Carbohydrates"]
        used_foods.add(best["food"])
        available = available[available["food"] != best["food"]]

        # If we already have a good base, we can break out of the priority loop
        # But we want at least 2 items for breakfast, 3 for lunch/dinner
        min_items = 2 if meal_name == "breakfast" else 3 if meal_name in ["lunch","dinner"] else 2
        if len(meal_items) >= min_items and total_cal >= target_cal * 0.5:
            break

    # ---- Phase 2: Ensure lunch/dinner have at least one protein and one carb ----
    if meal_name in ["lunch", "dinner"]:
        has_protein = any("protein" in item["category"] for item in meal_items)
        has_carb = any("carb" in item["category"] for item in meal_items)
        if not has_protein:
            # Force add a protein from remaining available
            protein_df = available[available["category"].str.contains("protein", na=False)]
            if not protein_df.empty:
                # Pick the one with highest health score
                best_protein = protein_df.nlargest(1, "health_score").iloc[0]
                if not would_exceed_fat_cap(best_protein):
                    meal_items.append(best_protein)
                    total_cal += best_protein["Caloric Value"]
                    total_fat += best_protein["Fat"]
                    total_protein += best_protein["Protein"]
                    total_carbs += best_protein["Carbohydrates"]
                    used_foods.add(best_protein["food"])
                    available = available[available["food"] != best_protein["food"]]
        if not has_carb:
            carb_df = available[available["category"].str.contains("carb", na=False)]
            if not carb_df.empty:
                best_carb = carb_df.nlargest(1, "health_score").iloc[0]
                if not would_exceed_fat_cap(best_carb):
                    meal_items.append(best_carb)
                    total_cal += best_carb["Caloric Value"]
                    total_fat += best_carb["Fat"]
                    total_protein += best_carb["Protein"]
                    total_carbs += best_carb["Carbohydrates"]
                    used_foods.add(best_carb["food"])
                    available = available[available["food"] != best_carb["food"]]

    # ---- Phase 3: Fill remaining calories aggressively ----
    # We'll consider all remaining foods (except those already used), sorted by health score.
    # But we'll also prefer foods that are not too large to allow variety.
    remaining_needed = target_cal - total_cal
    filler_candidates = available[~available["food"].isin(used_foods)].copy()
    # Exclude very large items (> 60% of remaining target) to avoid overshooting too much
    filler_candidates = filler_candidates[filler_candidates["Caloric Value"] <= remaining_needed * 1.2]
    # Sort by health score descending
    filler_candidates = filler_candidates.sort_values("health_score", ascending=False)

    for _, row in filler_candidates.iterrows():
        if total_cal >= target_cal * 0.95:
            break
        if row["food"] in used_foods:
            continue
        if would_exceed_fat_cap(row):
            continue
        # Add the food
        meal_items.append(row)
        total_cal += row["Caloric Value"]
        total_fat += row["Fat"]
        total_protein += row["Protein"]
        total_carbs += row["Carbohydrates"]
        used_foods.add(row["food"])

    # ---- Final validation ----
    if meal_name in ["lunch", "dinner"] and not any("protein" in item["category"] for item in meal_items):
        return pd.DataFrame()
    if total_cal < target_cal * 0.4:
        return pd.DataFrame()

    return pd.DataFrame(meal_items)

In [10]:
def calculate_daily_totals(plan):
    total_cal = 0.0
    total_prot = 0.0
    total_carb = 0.0
    total_fat = 0.0

    for meal, df_meal in plan.items():
        if df_meal.empty:
            continue
        total_cal += df_meal["Caloric Value"].sum()
        total_prot += df_meal["Protein"].sum()
        total_carb += df_meal["Carbohydrates"].sum()
        total_fat += df_meal["Fat"].sum()

    prot_cal = total_prot * 4
    carb_cal = total_carb * 4
    fat_cal = total_fat * 9
    total_cal_derived = prot_cal + carb_cal + fat_cal

    if total_cal_derived > 0:
        prot_pct = (prot_cal / total_cal_derived) * 100
        carb_pct = (carb_cal / total_cal_derived) * 100
        fat_pct = (fat_cal / total_cal_derived) * 100
    else:
        prot_pct = carb_pct = fat_pct = 0.0

    return {
        "calories": total_cal,
        "protein": total_prot,
        "carbs": total_carb,
        "fat": total_fat,
        "protein_pct": prot_pct,
        "carbs_pct": carb_pct,
        "fat_pct": fat_pct
    }

In [11]:
def diet_planner(df,daily_cal,activity_level=None,
                 mode="normal", conditions=None,
                 goal="maintenance",preference=None):
    """
    Generate a daily meal plan with improved macro tracking and goal‑aware selection.
    """
    

    # Work on a copy
    df = df.copy()

    # Recompute health score with the actual goal (overwrites the temporary column)
    df["health_score"] = df.apply(lambda row: compute_health_score(row, goal=goal), axis=1)

    # Apply dietary preference and medical filters
    if preference:
        df = apply_diet_preference(df, dietary_preference)
    if conditions:
        df = medical_filter(df, conditions)
    df = filter_diet(df, mode)

    # Fat cap: 30% of daily calories from fat (can be adjusted per goal)
    if goal == "weight_loss":
        fat_pct = 0.25
    elif goal == "muscle_gain":
        fat_pct = 0.30
    else:
        fat_pct = 0.30
    fat_cap_daily = (daily_cal * fat_pct) / 9

    splits = calorie_split(daily_cal)
    fat_cap_per_meal = {meal: (cal / daily_cal) * fat_cap_daily for meal, cal in splits.items()}

    used_foods = set()
    plan = {}

    for meal, cal in splits.items():
        meal_df = build_meal(df, cal, used_foods, meal, 
                             fat_cap_per_meal.get(meal), goal=goal)
        plan[meal] = meal_df

    return plan

In [12]:
def generate_exercise_plan(user_input):

    import joblib

    # Load models and scalers
    workout_model = joblib.load("workout_type_model.pkl")
    workout_scaler = joblib.load("scaler_workout.pkl")
    workout_encoder = joblib.load("label_encoder_workout.pkl")

    freq_model = joblib.load("frequency_model.pkl")
    freq_scaler = joblib.load("scaler_freq.pkl")
    freq_encoder = joblib.load("label_encoder_freq.pkl")   # encoder used for Workout_Type in freq model

    dur_model = joblib.load("duration_model.pkl")
    dur_scaler = joblib.load("scaler_freq.pkl")

    cal_model = joblib.load("calorie_model.pkl")
    cal_scaler = joblib.load("calories_scaler.pkl")

    # ------------------------------------------------------
    # 1) WORKOUT TYPE PREDICTION
    # ------------------------------------------------------
    workout_input = [user_input[:4]]   # [Age, Gender, BMI, Experience]
    workout_input_scaled = workout_scaler.transform(workout_input)

    workout_pred = workout_model.predict(workout_input_scaled)
    workout_type = workout_encoder.inverse_transform(workout_pred)[0]

    # ------------------------------------------------------
    # 2) FREQUENCY PREDICTION (needs encoded workout type)
    # ------------------------------------------------------

    # Encode predicted workout type for frequency model
    encoded_workout = freq_encoder.transform([workout_type])[0]

    # Frequency model expects:
    # [Age, Gender, Experience_Level, BMI, Workout_Type_encoded]
    freq_features = [
        user_input[0],   # Age
        user_input[1],   # Gender
        user_input[3],   # Experience_Level
        user_input[2],   # BMI
        encoded_workout  # Workout Type
    ]

    freq_input = freq_scaler.transform([freq_features])
    freq = int(freq_model.predict(freq_input)[0])

    # ------------------------------------------------------
    # 3) DURATION PREDICTION
    # ------------------------------------------------------
    # Build features for duration model
    # 3) DURATION PREDICTION
    dur_features = [
        user_input[0],
        user_input[1],
        user_input[3],
        user_input[2],
        encoded_workout
    ]
    
    dur_input = dur_scaler.transform([dur_features])
    duration_hours = float(dur_model.predict(dur_input)[0])



    # Convert to minutes and clamp
    duration_minutes = round(duration_hours * 60)
    duration_minutes = max(15, min(duration_minutes, 180))

    # ------------------------------------------------------
    # 4) CALORIE PREDICTION
    # ------------------------------------------------------
    calorie_features = [
        user_input[0],   # Age
        user_input[1],   # Gender
        user_input[3],   # Experience_Level
        user_input[2],   # BMI
        encoded_workout, # Workout Type
        duration_hours   # Session Duration
    ]

    calorie_scaled = cal_scaler.transform([calorie_features])


    calories = float(cal_model.predict(calorie_scaled)[0])
    calories = round(max(50, min(calories, 1200)))

    # ------------------------------------------------------
    # FINAL OUTPUT
    # ------------------------------------------------------
    return {
        "Workout Type": workout_type,
        "Workout Frequency (days/week)": freq,
        "Session Duration (minutes)": duration_minutes,
        "Estimated Calories Burned": calories
    }


In [13]:
def health_fitness_system(age, gender, weight, height, activity_level, goal,
                          preference=None, experience=1, mode="normal", conditions=None):
    """
    Integrated system that generates both exercise and diet plans.
    df: the food DataFrame (must be passed)
    """
    import pandas as pd
    df = pd.read_csv("final_ind.csv")
    df=remove_non_meal_foods(df)
    # Apply the function – this creates the 'category' column
    df["category"] = df["food"].apply(classify_food)
    df["health_score"] = df.apply(lambda row: compute_health_score(row, goal="maintenance"), axis=1)
    df = df.sort_values("health_score", ascending=False)
    
    # Convert gender to numeric (1 for male, 0 for female) for exercise model
    gender_num = 1 if gender.lower() == 'male' else 0

    # Calculate BMI value and category
    bmi_value = weight / ((height/100) ** 2)
    bmi_category = bmi_class(weight, height)

    # Generate exercise plan
    exercise_plan = generate_exercise_plan([age, gender_num, bmi_value, experience])

    # ----- CORRECT DIET CALORIE TARGET -----
    # Compute TDEE using the Mifflin-St Jeor equation (you have this function)
    tdee = calculate_tdee(age, gender, weight, height, activity_level)
    # Adjust for goal (e.g., 0.8 * TDEE for weight loss)
    daily_cal = adjust_calories(tdee, goal)

    # Generate diet plan – note the correct keyword arguments
    diet_plan = diet_planner(
        df=df,
        daily_cal=daily_cal,
        mode=mode,
        conditions=conditions,
        goal=goal,
        dietary_preference=preference   # parameter name is dietary_preference, not preference
    )

    return {
        "BMI Class": bmi_category,
        "Exercise Plan": exercise_plan,
        "Recommended Diet": diet_plan
    }

In [14]:
plan = health_fitness_system(                  
    age=39,
    gender='male',
    weight=65,
    height=165,
    activity_level='moderate',
    goal='weight_gain',
    preference='nonveg',
    experience=1
)


  df = df[~df["food"].str.lower().str.contains(pattern, na=False, regex=True)]


In [15]:
def print_full_report(result):
    print("\n========== PERSONALIZED HEALTH REPORT ==========\n")
    print("BMI CATEGORY:", result["BMI Class"])
    
    print("\n----- EXERCISE PLAN -----")
    for key, value in result["Exercise Plan"].items():
        print(f"{key}: {value}")
    
    print("\n----- DIET PLAN -----")
    diet_plan = result["Recommended Diet"]   # this is a dict: {meal_name: DataFrame}
    
    # Iterate over meals in a sensible order (lowercase keys)
    for meal in ["breakfast", "lunch", "dinner", "snacks"]:
        df_meal = diet_plan.get(meal)
        if df_meal is None or df_meal.empty:
            print(f"\n{meal.upper()}: No foods selected.")
            continue
        
        print(f"\n{meal.upper()}:")
        # Print each food in the meal
        for _, row in df_meal.iterrows():
            print(f"• {row['food']} – {row['Caloric Value']:.0f} kcal, \n"
                  f"   Protein:{row['Protein']:.1f}g\n   Carbohydrates:{row['Carbohydrates']:.1f}g\n   Fat:{row['Fat']:.1f}g")
        
        # Optionally show meal totals
        meal_cal = df_meal["Caloric Value"].sum()
        print(f"  Meal total: {meal_cal:.0f} kcal")
    
    print("\n===============================================\n")

In [16]:
print_full_report(plan)



BMI CATEGORY: Normal weight

----- EXERCISE PLAN -----
Workout Type: Strength
Workout Frequency (days/week): 3
Session Duration (minutes): 58
Estimated Calories Burned: 727

----- DIET PLAN -----

BREAKFAST:
• Chapati/Roti – 202 kcal, 
   Protein:5.9g
   Carbohydrates:35.6g
   Fat:3.6g
• Sweet couscous dessert – 196 kcal, 
   Protein:3.8g
   Carbohydrates:29.7g
   Fat:7.7g
• Tomato aspic – 50 kcal, 
   Protein:6.9g
   Carbohydrates:5.0g
   Fat:0.3g
  Meal total: 449 kcal

LUNCH:
• Paneer pea sandwich (toasted) (Paneer matar ka sandwich) – 250 kcal, 
   Protein:12.4g
   Carbohydrates:24.1g
   Fat:12.0g
• Cracked wheat and green gram dal premix (Dalia moong dal premix) – 362 kcal, 
   Protein:15.9g
   Carbohydrates:64.8g
   Fat:5.1g
• Split bengal gram dal and vegetable pulao (Channa dal and vegetable pulao) – 117 kcal, 
   Protein:3.6g
   Carbohydrates:19.7g
   Fat:2.3g
• Tandoori fish – 96 kcal, 
   Protein:11.2g
   Carbohydrates:2.4g
   Fat:4.6g
  Meal total: 824 kcal

DINNER:
• Mas