In [16]:
import pandas as pd
import random
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# ------------------------------
# Step 1: Load datasets
# ------------------------------
food_data = pd.read_csv("/content/Indian_Food_Nutrition_Categorized_Broad.csv")
employee_data = pd.read_csv("/content/employee_wellness_with_clerks_supervisors.csv")

# ------------------------------
# Step 2: Categorize food (same as your current code)
# ------------------------------
categories = {
    "Beverages": ["tea", "coffee", "milkshake", "juice", "lassi", "sherbet", "cooler", "cocoa", "thandai", "milk", "drink", "sorbet", "smoothie"],
    "Sandwiches": ["sandwich", "burger", "submarine"],
    "Breakfast / Cereals": ["porridge", "cornflakes", "oats", "poha", "upma", "idli", "dosa", "uttapam", "appam"],
    "Egg Dishes": ["egg", "omelette", "anda", "scrambled", "poached"],
    "Soups / Stocks / Stews": ["soup", "consomme", "stock", "stew", "rasam", "yakhni"],
    "Breads": ["roti", "naan", "parantha", "paratha", "puri", "poori", "bhature", "kulcha", "thepla"],
    "Rice & Grains": ["rice", "pulao", "biryani", "khichdi", "tahar", "chawal", "sadam", "anna", "fried rice"],
    "Pasta & Noodles": ["pasta", "macaroni", "spaghetti", "lasagne", "fettuccine", "chowmein", "noodle"],
    "Dal / Lentils": ["dal", "moong", "urad", "channa", "masoor", "moth", "lentil"],
    "Vegetable Dishes": ["aloo", "gobhi", "sabzi", "bhujia", "bhartha", "tinda", "okra", "baingan", "methi", "cabbage", "cauliflower", "beans", "bottle gourd", "lauki", "karela", "jackfruit", "yam"],
    "Paneer Dishes": ["paneer"],
    "Mutton Dishes": ["mutton", "keema", "gosht", "roghan josh", "lamb"],
    "Chicken Dishes": ["chicken"],
    "Fish / Seafood": ["fish", "prawn", "jhinga", "seafood"],
    "Appetizers / Snacks": ["samosa", "pakora", "vada", "cutlet", "roll", "spring roll", "kachori", "tikki", "sev", "bhaji", "bonda", "fritter"],
    "Salads & Raitas": ["salad", "raita", "kosambari"],
    "Desserts": ["halwa", "kheer", "payasam", "rasgulla", "gulab jamun", "laddu", "barfi", "sandesh", "rabri", "peda", "modak", "sheera", "shrikhand", "basundi", "malpua", "phirni", "pudding", "souffle", "custard", "mousse", "cake", "pastry", "cookies", "biscuit", "ice cream", "kulfi", "chikki"],
    "Chutneys & Pickles": ["chutney", "pickle", "achar", "achaar"],
    "Others": []
}

broad_category_map = {
    "Main Dish": ["Rice & Grains", "Pasta & Noodles", "Dal / Lentils", "Paneer Dishes", "Mutton Dishes", "Chicken Dishes", "Fish / Seafood", "Breads"],
    "Side Dish": ["Vegetable Dishes", "Egg Dishes", "Soups / Stocks / Stews", "Appetizers / Snacks"],
    "Dessert": ["Desserts"],
    "Drinks": ["Beverages"],
    "Other Food Categories": ["Sandwiches", "Salads & Raitas", "Chutneys & Pickles", "Others", "Breakfast / Cereals"]
}

def classify_dish(dish):
    dish_lower = str(dish).lower()
    for category, keywords in categories.items():
        for word in keywords:
            if word in dish_lower:
                return category
    return "Others"

food_data["Detailed_Category"] = food_data["Dish Name"].apply(classify_dish)

def map_to_broad(detailed_category):
    for broad, detailed_list in broad_category_map.items():
        if detailed_category in detailed_list:
            return broad
    return "Other Food Categories"

food_data["Broad_Category"] = food_data["Detailed_Category"].apply(map_to_broad)

# ------------------------------
# Step 3: Train ML model to predict Survival Score
# ------------------------------
# Encode categorical features
categorical_cols = ['Dietary_Preference','Health_Goals','Allergies','Role']
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_features = encoder.fit_transform(employee_data[categorical_cols])
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(categorical_cols))

# Features & Target
X = pd.concat([employee_data[['Step_Count','Daily_Calories_Required']], encoded_df], axis=1)
y = employee_data['Step_Count'] + 10  # simple target as example, can be real survival score if available

# Train Random Forest
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict Survival Scores
employee_data['Predicted_Survival_Score'] = rf_model.predict(X)

# ------------------------------
# Step 4: Food Recommendation (ML-guided)
# ------------------------------
def recommend_food_ml(employee):
    diet = employee['Dietary_Preference'].values[0].lower()
    allergy = str(employee['Allergies'].values[0]).lower()
    goal = str(employee['Health_Goals'].values[0]).lower()
    daily_calories = employee['Daily_Calories_Required'].values[0]

    recommended = food_data.copy()

    # Apply allergy/diet restrictions
    if "nut" in allergy:
        recommended = recommended[~recommended['Dish Name'].str.contains("nut", case=False)]
    if "lactose" in allergy or "milk" in allergy:
        recommended = recommended[~recommended['Dish Name'].str.contains("milk|curd|paneer|cheese", case=False)]
    if "vegetarian" in diet:
        recommended = recommended[~recommended['Dish Name'].str.contains("chicken|mutton|fish|egg", case=False)]

    # Use ML to score food choices: higher protein for muscle, lower calories for weight loss, etc.
    if "weight loss" in goal:
        recommended = recommended.sort_values(by="Calories (kcal)").head(20)
        reason = "Weight loss goal: prioritizing low-calorie foods."
    elif "energy" in goal:
        recommended = recommended.sort_values(by="Carbohydrates (g)", ascending=False).head(20)
        reason = "Energy boost goal: prioritizing high-carb foods."
    elif "muscle" in goal or "protein" in goal:
        recommended = recommended.sort_values(by="Protein (g)", ascending=False).head(20)
        reason = "Muscle gain goal: prioritizing high-protein foods."
    else:
        recommended = recommended.sample(20)
        reason = "Balanced recommendation based on ML guidance."

    # Pick one item per broad category
    final_recommendations = {}
    for cat in ["Main Dish","Side Dish","Dessert","Drinks","Other Food Categories"]:
        options = recommended[recommended["Broad_Category"]==cat]
        if not options.empty:
            final_recommendations[cat] = options.sample(1).iloc[0].to_dict()
        else:
            final_recommendations[cat] = "No suitable option"
    return reason, final_recommendations

# ------------------------------
# Step 5: Ask employee name
# ------------------------------
employee_name = input("Enter employee name: ")
employee_record = employee_data[employee_data['Name']==employee_name]

if employee_record.empty:
    print("Employee not found!")
else:
    print("\nEmployee Wellness Data:")
    print(employee_record)

    reason, foods = recommend_food_ml(employee_record)
    print("\nFood Recommendation Reason:", reason)

    # Display recommendations
    rec_df = pd.DataFrame([
        {
            "Category": k,
            "Dish Name": v["Dish Name"] if isinstance(v, dict) else v,
            "Calories": v.get("Calories (kcal)", "-") if isinstance(v, dict) else "-",
            "Protein": v.get("Protein (g)", "-") if isinstance(v, dict) else "-",
            "Carbs": v.get("Carbohydrates (g)", "-") if isinstance(v, dict) else "-",
            "Fats": v.get("Fats (g)", "-") if isinstance(v, dict) else "-"
        }
        for k,v in foods.items()
    ])
    print("\nRecommended Foods:")
    print(rec_df.to_string(index=False))

# ------------------------------
# Step 6: ML-based Team Leaderboard
# ------------------------------
def team_leaderboard_ml(df):
    leaderboard = []
    for role in df['Role'].unique():
        role_emp = df[df['Role']==role]
        top5 = role_emp.nlargest(5,'Predicted_Survival_Score')
        avg_score = round(top5['Predicted_Survival_Score'].mean(),2)
        leaderboard.append({
            "Role": role,
            "Top5_Avg_Survival_Score": avg_score
        })
    return pd.DataFrame(leaderboard).sort_values(by='Top5_Avg_Survival_Score', ascending=False)

print("\n=== ML-based Team Leaderboard ===")
print(team_leaderboard_ml(employee_data).to_string(index=False))


Enter employee name: Priya Reddy

Employee Wellness Data:
   Employee_ID         Name  Age Gender  Height_cm  Weight_kg   BMI  \
58        E059  Priya Reddy   28   Male        179         55  17.2   

   Dietary_Preference Allergies Health_Goals  ... Daily_Calories_Required  \
58         Vegetarian      Nuts  Muscle Gain  ...                    2391   

    Heart_Rate_bpm  Step_Count  Calories_Burned  SpO2  Respiratory_Rate  HRV  \
58              73        7377              202   100                14   54   

    Team_Name         Role Predicted_Survival_Score  
58   Theta IT  QA Engineer                  7375.14  

[1 rows x 21 columns]

Food Recommendation Reason: Muscle gain goal: prioritizing high-protein foods.

Recommended Foods:
             Category                    Dish Name Calories Protein  Carbs   Fats
            Main Dish Paneer stuffed cheela/chilla   205.19   11.44  19.16   8.89
            Side Dish         Consomme au julienne    27.76   12.49   1.49  10.66
      