In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import joblib
import gradio as gr
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# --- 1. HELPER FUNCTIONS ---
def mifflin_st_jeor(age, sex, weight_kg, height_cm):
    if sex.lower().startswith('m'):
        return 10*weight_kg + 6.25*height_cm - 5*age + 5
    else:
        return 10*weight_kg + 6.25*height_cm - 5*age - 161

activity_multipliers = {
    'sedentary': 1.2,
    'light': 1.375,
    'moderate': 1.55,
    'active': 1.725,
    'very_active': 1.9
}

In [3]:
def load_and_fix_data():
    print("Loading data...")
    
    # 1. Load BOTH files
    try:
        df_food = pd.read_csv("food.csv")
        df_nut = pd.read_csv("nutrition_distriution.csv")
    except FileNotFoundError:
        return None, "Error: CSV files not found. Check file paths."

    # 2. Find the Food Name (The Critical Fix)
    # First, check if nutrition file has names (columns with text/strings)
    nut_text_cols = df_nut.select_dtypes(include=['object']).columns.tolist()
    
    final_name_col = None
    source_df = None
    
    # Logic: If nutrition file has no text, we MUST use food.csv
    if len(nut_text_cols) == 0:
        print("Nutrition file is numeric only. extracting names from food.csv...")
        # Find text columns in food.csv
        food_text_cols = df_food.select_dtypes(include=['object']).columns.tolist()
        if len(food_text_cols) > 0:
            # Use the first text column found in food.csv (e.g. "Food_items")
            final_name_col = food_text_cols[0]
            source_df = df_food
            # ASSUMPTION: The files are row-aligned (Row 1 in Food = Row 1 in Nutrition)
            # We copy the name column over to the nutrition dataframe
            df_nut['merged_name'] = df_food[final_name_col]
            final_name_col = 'merged_name'
            source_df = df_nut
    else:
        # Nutrition file has text, let's try to find "food", "item", or "name"
        candidates = [c for c in nut_text_cols if any(x in c.lower() for x in ['food', 'item', 'name'])]
        if candidates:
            final_name_col = candidates[0]
        else:
            final_name_col = nut_text_cols[0] # Fallback to first string column
        source_df = df_nut

    print(f"Selected Name Column: '{final_name_col}'")

    # 3. Find Nutrient Columns (in df_nut)
    def find_col(df, keywords):
        # Exclude the name column we just found
        cols = [c for c in df.columns if c != final_name_col]
        for col in cols:
            if any(k in col.lower() for k in keywords):
                return col
        return None

    col_cal = find_col(df_nut, ['calor', 'kcal', 'energy'])
    col_pro = find_col(df_nut, ['protein', 'prot'])
    col_fat = find_col(df_nut, ['fat', 'lipid'])
    col_carb = find_col(df_nut, ['carb', 'carbo'])

    # Fallback: If names aren't found, grab numeric columns in order
    numerics = df_nut.select_dtypes(include=[np.number]).columns.tolist()
    if col_cal is None and len(numerics) > 0: col_cal = numerics[0]
    if col_pro is None and len(numerics) > 1: col_pro = numerics[1]
    
    # 4. Build Final Clean DataFrame
    nutrition = pd.DataFrame()
    
    # Ensure we have string names
    nutrition['food'] = source_df[final_name_col].astype(str)
    
    # Get values (forcing errors to 0)
    nutrition['calories'] = pd.to_numeric(df_nut[col_cal], errors='coerce').fillna(0)
    nutrition['protein'] = pd.to_numeric(df_nut[col_pro], errors='coerce').fillna(0)
    
    # Drop rows where food name is missing or "nan"
    nutrition = nutrition[nutrition['food'].str.lower() != 'nan']
    nutrition = nutrition[nutrition['food'].str.strip() != '']

    # 5. Metrics for Recommender
    nutrition['kcal_per_100g'] = nutrition['calories']
    # Avoid division by zero
    nutrition['protein_per_kcal'] = np.where(
        nutrition['calories'] > 0, 
        nutrition['protein'] / nutrition['calories'], 
        0
    )
    
    return nutrition, "Data Loaded Successfully"

# --- EXECUTE ---
nutrition_df, msg = load_and_fix_data()
print(msg)
print("First 5 rows of clean data:")
print(nutrition_df.head())

Loading data...
Nutrition file is numeric only. extracting names from food.csv...
Selected Name Column: 'merged_name'
Data Loaded Successfully
First 5 rows of clean data:
                   food  calories  protein  kcal_per_100g  protein_per_kcal
0              Avocados       160      2.0            160          0.012500
1               Bananas        89      1.1             89          0.012360
2  Bagels made in wheat       349     14.0            349          0.040115
3               Berries       331     20.0            331          0.060423
4              Brocolli         2      0.3              2          0.150000


In [4]:
# --- 3. MODEL TRAINING ---
def train_calorie_model():
    rng = np.random.RandomState(42)
    n_samples = 2500
    
    # Generate synthetic data for training
    ages = rng.randint(18, 65, n_samples)
    sexes = rng.choice(['male', 'female'], n_samples)
    weights = rng.uniform(45, 105, n_samples)
    heights = rng.uniform(150, 195, n_samples)
    activities = rng.choice(list(activity_multipliers.keys()), n_samples)
    
    rows = []
    for a, s, w, h, act in zip(ages, sexes, weights, heights, activities):
        bmr = mifflin_st_jeor(a, s, w, h)
        tdee = bmr * activity_multipliers[act]
        
        # Randomize goal (loss/gain) for training variance
        goal_factor = rng.uniform(0.8, 1.2)
        target = tdee * goal_factor
        
        rows.append([
            a, 
            1 if s == 'male' else 0, 
            w, 
            h, 
            list(activity_multipliers.keys()).index(act), 
            target
        ])

    data = pd.DataFrame(rows, columns=['age', 'sex_male', 'weight_kg', 'height_cm', 'activity_idx', 'cal_target'])
    
    X = data[['age', 'sex_male', 'weight_kg', 'height_cm', 'activity_idx']]
    y = data['cal_target']
    
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)
    return model

rf_model = train_calorie_model()
print("Model trained successfully.")

Model trained successfully.


In [5]:
# --- 4. RECOMMENDATION ENGINE ---
def recommend_mealplan(cal_target, goal='loss', meals=3):
    per_meal = cal_target / meals
    plan = []
    
    # Filter candidates based on goal
    # Loss = High Protein / Low Calorie density
    # Gain = High Calorie density
    candidates = nutrition_df.copy()
    
    if goal == 'loss':
        candidates = candidates.sort_values(['protein_per_kcal', 'kcal_per_100g'], ascending=[False, False])
    else:
        candidates = candidates.sort_values('kcal_per_100g', ascending=False)
        
    candidates = candidates.reset_index(drop=True)

    for m in range(meals):
        meal_items = []
        meal_cal = 0.0
        idx = 0
        
        # Loop until meal is full
        while meal_cal < per_meal * 0.95 and idx < len(candidates):
            row = candidates.iloc[idx]
            food_name = row['food']
            
            # Standard increment (e.g., 50g)
            step_g = 50
            cal_step = row['kcal_per_100g'] * (step_g / 100.0)
            prot_step = row['protein'] * (step_g / 100.0)
            
            # Skip invalid items
            if cal_step <= 0:
                idx += 1
                continue

            # --- FIX: Check if item already exists in this meal ---
            existing_item = next((item for item in meal_items if item['food'] == food_name), None)
            current_grams = existing_item['grams'] if existing_item else 0
            
            # Limit max grams per food to 200g to ensure variety
            if current_grams >= 200:
                idx += 1
                continue
                
            # Check if adding this step exceeds calorie limit
            if meal_cal + cal_step > per_meal * 1.1:
                # Try smaller step (25g)
                step_g = 25
                cal_step = row['kcal_per_100g'] * (step_g / 100.0)
                prot_step = row['protein'] * (step_g / 100.0)
                
                if meal_cal + cal_step > per_meal * 1.1:
                    idx += 1
                    continue

            # Add or Update Item
            if existing_item:
                existing_item['grams'] += step_g
                existing_item['calories'] += cal_step
                existing_item['protein'] += prot_step
            else:
                meal_items.append({
                    'food': food_name,
                    'grams': step_g,
                    'calories': cal_step,
                    'protein': prot_step
                })
            
            meal_cal += cal_step
            
            # Note: We do NOT increment idx here. We let the loop run again 
            # to see if we can add MORE of the same food (up to 200g).
            
        # Round values for display
        for item in meal_items:
            item['calories'] = round(item['calories'], 1)
            item['protein'] = round(item['protein'], 1)

        plan.append({
            'meal_no': m + 1,
            'items': meal_items,
            'meal_calories': round(meal_cal, 1)
        })
        
    total_cals = sum(p['meal_calories'] for p in plan)
    return plan, round(total_cals, 1)

In [6]:
# --- 5. USER INTERFACE ---
def predict_and_recommend(age, sex, weight, height, activity, goal, meals):
    # 1. Predict Calories
    act_idx = list(activity_multipliers.keys()).index(activity)
    sex_val = 1 if sex == 'male' else 0
    
    prediction = rf_model.predict([[age, sex_val, weight, height, act_idx]])[0]
    
    # Adjust for goal
    if goal == 'loss':
        target_cals = prediction * 0.85
    else:
        target_cals = prediction * 1.15
        
    # 2. Get Plan
    plan, total_planned = recommend_mealplan(target_cals, goal, meals)
    
    # 3. Format Output Text
    output = f"Estimated Daily Needs: {int(target_cals)} kcal\n"
    output += f"Plan Total: {total_planned} kcal\n"
    output += "="*40 + "\n\n"
    
    for meal in plan:
        output += f"MEAL {meal['meal_no']} ({meal['meal_calories']} kcal):\n"
        if not meal['items']:
            output += "  Could not find foods to fit this target.\n"
        for item in meal['items']:
            output += f"  - {item['food']}: {item['grams']}g  "
            output += f"({item['calories']} kcal, {item['protein']}g protein)\n"
        output += "\n"
        
    return output

# Launch App
iface = gr.Interface(
    fn=predict_and_recommend,
    inputs=[
        gr.Number(label="Age", value=25),
        gr.Radio(["male", "female"], label="Sex", value="male"),
        gr.Number(label="Weight (kg)", value=70),
        gr.Number(label="Height (cm)", value=175),
        gr.Dropdown(list(activity_multipliers.keys()), label="Activity", value="moderate"),
        gr.Radio(["loss", "gain"], label="Goal", value="loss"),
        gr.Slider(2, 6, step=1, label="Meals per Day", value=3)
    ],
    outputs="text",
    title="AI Diet Recommender",
    description="Generates a meal plan with aggregated food items and correct names."
)

iface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://fe15c9376c9fb5bb9d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [7]:
import gradio as gr
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib
import os

In [8]:
# --- CONFIGURATION ---
MODEL_PATH = "diet_model.keras"
SCALER_X_PATH = "scaler_x.pkl"
SCALER_Y_PATH = "scaler_y.pkl"
FOOD_DATA_PATH = "food.csv"
NUT_DATA_PATH = "nutrition_distriution.csv"  # Note: Keeping the typo from your original filename

# --- 1. LOAD MODEL & SCALERS ---
print("Loading model and scalers...")
try:
    if not os.path.exists(MODEL_PATH):
        raise FileNotFoundError(f"Model file not found: {MODEL_PATH}. Run train_dl_model.py first.")
        
    model = tf.keras.models.load_model(MODEL_PATH)
    scaler_X = joblib.load(SCALER_X_PATH)
    scaler_y = joblib.load(SCALER_Y_PATH)
    print("Model loaded successfully.")
except Exception as e:
    print(f"CRITICAL ERROR: {e}")
    model, scaler_X, scaler_y = None, None, None

# --- 2. LOAD & CLEAN DATA ---
def load_data():
    try:
        df_food = pd.read_csv(FOOD_DATA_PATH)
        df_nut = pd.read_csv(NUT_DATA_PATH)
        
        # --- Data Cleaning Logic (from your notebook) ---
        # 1. Merge Name if missing in nutrition file
        # Check if nutrition file has string columns
        if df_nut.select_dtypes(include=['object']).empty:
             if 'Food_items' in df_food.columns:
                 df_nut['merged_name'] = df_food['Food_items']
             else:
                 df_nut['merged_name'] = df_food.iloc[:, 0]
        else:
            # Try to find a name column
            candidates = [c for c in df_nut.columns if any(x in c.lower() for x in ['food', 'item', 'name'])]
            if candidates:
                df_nut['merged_name'] = df_nut[candidates[0]]
            else:
                df_nut['merged_name'] = df_nut.iloc[:, 0]

        # 2. Extract Nutrients
        nutrition = pd.DataFrame()
        nutrition['food'] = df_nut['merged_name'].astype(str)
        
        def get_col(df, terms):
            for col in df.columns:
                if col == 'merged_name': continue
                if any(t in col.lower() for t in terms): return col
            return None

        col_cal = get_col(df_nut, ['calor', 'kcal', 'energy']) or df_nut.select_dtypes(include=[np.number]).columns[0]
        col_pro = get_col(df_nut, ['protein', 'prot']) or df_nut.select_dtypes(include=[np.number]).columns[1]
        
        nutrition['calories'] = pd.to_numeric(df_nut[col_cal], errors='coerce').fillna(0)
        nutrition['protein'] = pd.to_numeric(df_nut[col_pro], errors='coerce').fillna(0)
        
        # 3. Calculate Metrics for Recommendation Engine
        nutrition['kcal_per_100g'] = nutrition['calories']
        # Avoid division by zero
        nutrition['protein_per_kcal'] = np.where(
            nutrition['calories'] > 0, 
            nutrition['protein'] / nutrition['calories'], 
            0
        )
        
        # Clean up empty rows
        nutrition = nutrition[nutrition['food'] != 'nan']
        nutrition = nutrition[nutrition['calories'] > 0]
        
        return nutrition
    except Exception as e:
        print(f"Error loading CSV data: {e}")
        return pd.DataFrame() # Return empty DF on failure

nutrition_df = load_data()

# --- 3. MEAL RECOMMENDATION LOGIC ---
def recommend_mealplan(cal_target, goal='loss', meals=3):
    if nutrition_df.empty:
        return [], 0
        
    per_meal = cal_target / meals
    plan = []
    
    # Sort candidates based on goal
    candidates = nutrition_df.copy()
    if goal == 'loss':
        # High protein, lower calorie density
        candidates = candidates.sort_values(['protein_per_kcal', 'kcal_per_100g'], ascending=[False, False])
    else:
        # High calorie density
        candidates = candidates.sort_values('kcal_per_100g', ascending=False)
    
    candidates = candidates.reset_index(drop=True)

    for m in range(meals):
        meal_items = []
        meal_cal = 0.0
        idx = 0
        
        # Try to fill the meal
        while meal_cal < per_meal * 0.95 and idx < len(candidates):
            row = candidates.iloc[idx]
            
            # Basic unit: 50g
            step_g = 50
            cal_step = row['kcal_per_100g'] * (step_g / 100.0)
            prot_step = row['protein'] * (step_g / 100.0)
            
            # Skip bad data
            if cal_step <= 0:
                idx += 1
                continue

            # Check existing items in this meal
            existing_item = next((item for item in meal_items if item['food'] == row['food']), None)
            
            # Don't exceed meal limit significantly
            if meal_cal + cal_step > per_meal * 1.1:
                idx += 1
                continue
                
            if existing_item:
                # Cap variation at 200g per item
                if existing_item['grams'] >= 200:
                    idx += 1
                    continue
                existing_item['grams'] += step_g
                existing_item['calories'] += cal_step
                existing_item['protein'] += prot_step
            else:
                meal_items.append({
                    'food': row['food'],
                    'grams': step_g,
                    'calories': cal_step,
                    'protein': prot_step
                })
            
            meal_cal += cal_step
            # We don't increment idx immediately to allow adding more of the same food
            
        plan.append({
            'meal_no': m + 1,
            'items': meal_items,
            'meal_calories': round(meal_cal, 1)
        })
        
    total_cals = sum(p['meal_calories'] for p in plan)
    return plan, round(total_cals, 1)

# --- 4. PREDICTION WRAPPER ---
activity_multipliers = {'sedentary': 1.2, 'light': 1.375, 'moderate': 1.55, 'active': 1.725, 'very_active': 1.9}

def predict_and_recommend(age, sex, weight, height, activity, goal, meals):
    if model is None:
        return "Error: Model not loaded. Please ensure diet_model.keras exists."
        
    # 1. Prepare Input
    act_idx = list(activity_multipliers.keys()).index(activity)
    sex_val = 1 if sex == 'male' else 0
    
    # Input vector: [age, sex, weight, height, activity_index]
    input_data = np.array([[age, sex_val, weight, height, act_idx]])
    
    # 2. Scale Input
    input_scaled = scaler_X.transform(input_data)
    
    # 3. Deep Learning Prediction
    pred_scaled = model.predict(input_scaled, verbose=0)
    
    # 4. Inverse Scale Output
    tdee_prediction = scaler_y.inverse_transform(pred_scaled)[0][0]
    
    # 5. Goal Adjustment
    if goal == 'loss':
        target_cals = tdee_prediction * 0.85
    else:
        target_cals = tdee_prediction * 1.15
        
    # 6. Generate Plan
    plan, total_planned = recommend_mealplan(target_cals, goal, int(meals))
    
    # 7. Format Output String
    output = []
    output.append(f"ðŸ§  AI ANALYSIS (Deep Learning Model)")
    output.append(f"-----------------------------------")
    output.append(f"Maintenance Calories: {int(tdee_prediction)} kcal")
    output.append(f"Target for {goal.upper()}: {int(target_cals)} kcal")
    output.append(f"Generated Plan Total: {int(total_planned)} kcal")
    output.append(f"-----------------------------------\n")
    
    for meal in plan:
        output.append(f"MEAL {meal['meal_no']} ({meal['meal_calories']} kcal):")
        if not meal['items']:
            output.append("  (No suitable foods found in database)")
        for item in meal['items']:
            food_line = f"  â€¢ {item['food']} | {item['grams']}g"
            food_line += f" ({int(item['calories'])} kcal, {int(item['protein'])}g protein)"
            output.append(food_line)
        output.append("")
        
    return "\n".join(output)

# --- 5. GRADIO INTERFACE ---
iface = gr.Interface(
    fn=predict_and_recommend,
    inputs=[
        gr.Number(label="Age", value=30),
        gr.Radio(["male", "female"], label="Sex", value="male"),
        gr.Number(label="Weight (kg)", value=75),
        gr.Number(label="Height (cm)", value=175),
        gr.Dropdown(list(activity_multipliers.keys()), label="Activity Level", value="moderate"),
        gr.Radio(["loss", "gain"], label="Goal", value="loss"),
        gr.Slider(2, 6, step=1, label="Meals per Day", value=3)
    ],
    outputs=gr.Textbox(label="Your Personalized Diet Plan", lines=25),
    title="ðŸ¥— Deep Learning Diet Recommender",
    description="This system uses a Neural Network to calculate your caloric needs and generates a meal plan based on your goal.",
    theme="default"
)

if __name__ == "__main__":
    iface.launch(share=True)

Loading model and scalers...
Model loaded successfully.
* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://0a1f47468c15e280f4.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
