In [2]:
import pandas as pd

# Load the final dataset
final_df = pd.read_csv("/content/final_output.csv")  # Adjust path if needed


In [3]:
# Define required daily intake
required_nutrients = {
    'calories': 2000,
    'protein': 50,
    'fat': 70,
    'carbohydrates': 310
}

# Create working copy
gap_df = final_df.copy()

# Compute nutrient gaps
gap_df['calories_gap'] = required_nutrients['calories'] - gap_df['calories']
gap_df['protein_gap'] = required_nutrients['protein'] - gap_df['protein']
gap_df['fat_gap'] = required_nutrients['fat'] - gap_df['fat']
gap_df['carbohydrates_gap'] = required_nutrients['carbohydrates'] - gap_df['carbohydrates']

# Flag deficiencies
gap_df['calories_deficiency'] = gap_df['calories'] < required_nutrients['calories']
gap_df['protein_deficiency'] = gap_df['protein'] < required_nutrients['protein']
gap_df['fat_deficiency'] = gap_df['fat'] < required_nutrients['fat']
gap_df['carbohydrates_deficiency'] = gap_df['carbohydrates'] < required_nutrients['carbohydrates']


MRR recommendation model

In [4]:
import pandas as pd

# Step 1: Load the final dataset
final_df = pd.read_csv("/content/final_output.csv")  # Update path if needed

# Step 2: Define daily nutrient requirements (modify as needed)
required_nutrients = {
    'calories': 2000,
    'protein': 50,
    'fat': 70,
    'carbohydrates': 310
}

# Step 3: Perform Nutrient Gap Analysis
gap_df = final_df.copy()
gap_df['calories_gap'] = required_nutrients['calories'] - gap_df['calories']
gap_df['protein_gap'] = required_nutrients['protein'] - gap_df['protein']
gap_df['fat_gap'] = required_nutrients['fat'] - gap_df['fat']
gap_df['carbohydrates_gap'] = required_nutrients['carbohydrates'] - gap_df['carbohydrates']

# Step 4: Flag Deficiencies
gap_df['calories_deficiency'] = gap_df['calories'] < required_nutrients['calories']
gap_df['protein_deficiency'] = gap_df['protein'] < required_nutrients['protein']
gap_df['fat_deficiency'] = gap_df['fat'] < required_nutrients['fat']
gap_df['carbohydrates_deficiency'] = gap_df['carbohydrates'] < required_nutrients['carbohydrates']

# Step 5: Save as CSV
output_filename = "nutrient_gap_analysis.csv"
gap_df.to_csv(output_filename, index=False)

# Step 6: Generate Download Link (for Colab or Jupyter)
from IPython.display import FileLink, display
display(FileLink(output_filename))


In [6]:
import pandas as pd
import numpy as np

# Load your final dataset (already done earlier)
df = pd.read_csv("/content/final_output.csv")

# Step 1: Simulate 5 users with nutrient gap targets
user_profiles = [
    {'user_id': 'U1', 'calories': 1800, 'protein': 60, 'fat': 50, 'carbohydrates': 250},
    {'user_id': 'U2', 'calories': 2000, 'protein': 50, 'fat': 70, 'carbohydrates': 310},
    {'user_id': 'U3', 'calories': 1500, 'protein': 30, 'fat': 40, 'carbohydrates': 200},
    {'user_id': 'U4', 'calories': 2200, 'protein': 70, 'fat': 80, 'carbohydrates': 330},
    {'user_id': 'U5', 'calories': 1600, 'protein': 40, 'fat': 60, 'carbohydrates': 270},
]

# Step 2: Function to rank recipes by inverse distance for a user
def rank_recipes(user, df):
    df_copy = df.copy()
    df_copy['distance'] = np.sqrt(
        (df['calories'] - user['calories'])**2 +
        (df['protein'] - user['protein'])**2 +
        (df['fat'] - user['fat'])**2 +
        (df['carbohydrates'] - user['carbohydrates'])**2
    )
    df_copy['score'] = 1 / (1 + df_copy['distance'])
    df_copy_sorted = df_copy.sort_values(by='score', ascending=False).reset_index(drop=True)
    return df_copy_sorted

# Step 3: Assume the top 1 true match per user is the "relevant recipe"
# We'll use top-1 matching recipe per profile as the ideal match
reciprocal_ranks = []

for user in user_profiles:
    ranked_df = rank_recipes(user, df)

    # Define "relevant" recipe as one within a 5% nutrient margin
    margin = 0.05
    def is_relevant(row):
        return (
            abs(row['calories'] - user['calories']) / user['calories'] <= margin and
            abs(row['protein'] - user['protein']) / user['protein'] <= margin and
            abs(row['fat'] - user['fat']) / user['fat'] <= margin and
            abs(row['carbohydrates'] - user['carbohydrates']) / user['carbohydrates'] <= margin
        )

    ranked_df['relevant'] = ranked_df.apply(is_relevant, axis=1)

    # Find the rank of the first relevant recipe
    relevant_indices = ranked_df.index[ranked_df['relevant'] == True].tolist()
    if relevant_indices:
        rank = relevant_indices[0] + 1  # +1 for 1-based index
        reciprocal_ranks.append(1 / rank)
    else:
        reciprocal_ranks.append(0)

# Step 4: Calculate final MRR
mean_reciprocal_rank = sum(reciprocal_ranks) / len(user_profiles)
print("📊 Mean Reciprocal Rank (MRR):", round(mean_reciprocal_rank, 4))


📊 Mean Reciprocal Rank (MRR): 0.0224


IMPROVED MRR RECOMMENDATION MODEL

In [7]:
import pandas as pd
import numpy as np

# Load your final nutrient dataset
df = pd.read_csv("final_output.csv")

# Step 1: Simulate 20 users with random nutrient gaps
np.random.seed(42)
user_profiles = []
for i in range(20):
    user_profiles.append({
        'user_id': f'U{i+1}',
        'calories': np.random.randint(1500, 2500),
        'protein': np.random.randint(30, 90),
        'fat': np.random.randint(40, 100),
        'carbohydrates': np.random.randint(200, 350)
    })

# Step 2: Define ranking function based on inverse distance
def rank_recipes(user, df):
    df_copy = df.copy()
    df_copy['distance'] = np.sqrt(
        (df['calories'] - user['calories'])**2 +
        (df['protein'] - user['protein'])**2 +
        (df['fat'] - user['fat'])**2 +
        (df['carbohydrates'] - user['carbohydrates'])**2
    )
    df_copy['score'] = 1 / (1 + df_copy['distance'])
    df_copy_sorted = df_copy.sort_values(by='score', ascending=False).reset_index(drop=True)
    return df_copy_sorted

# Step 3: Generate top-5 recommendations per user
top_recipes_per_user = []

for user in user_profiles:
    ranked_df = rank_recipes(user, df)
    top_5 = ranked_df[['recipe_name', 'calories', 'protein', 'fat', 'carbohydrates', 'score']].head(5).copy()

    # Add user nutrient goals
    top_5['user_id'] = user['user_id']
    top_5['target_calories'] = user['calories']
    top_5['target_protein'] = user['protein']
    top_5['target_fat'] = user['fat']
    top_5['target_carbs'] = user['carbohydrates']

    top_recipes_per_user.append(top_5)

# Step 4: Combine all into one DataFrame
final_recommendation_df = pd.concat(top_recipes_per_user).reset_index(drop=True)

# Step 5: Display or export
# To view in notebook:
print(final_recommendation_df.head(10))

# Optional: Save to CSV
# final_recommendation_df.to_csv("top_5_recommendations_per_user.csv", index=False)


                                         recipe_name     calories    protein  \
0  recipe: roasted cauliflower & lentil bowl with...  1591.282051  63.497222   
1  goat cheese crostini with blood orange and bla...  1607.310000  67.180000   
2                mediterranean barley paella recipes  1606.748813  84.261660   
3                             yellow vegetable curry  1618.660000  60.040000   
4                         mediterranean barley salad  1590.200000  57.890000   
5         nutella, banana, and caramel dessert pizza  1604.699000  21.771123   
6                         summer farro salad recipes  1599.940000  55.480000   
7            pumpkin maple cornbread muffins recipes  1585.626500  27.991580   
8  banana & cinnamon waffles - gluten free & vega...  1611.989000  38.067540   
9              crispy roasted potatoes with rosemary  1602.557580  37.491815   

         fat  carbohydrates     score user_id  target_calories  \
0  63.798277     216.019248  0.045359      U1        

MRR EVALUATION

In [9]:
# ✅ Mean Reciprocal Rank (MRR) Evaluation Code

# Step 1: Re-rank recipes per user using nutrient distance
def rank_recipes(user, df):
    df_copy = df.copy()
    df_copy['distance'] = np.sqrt(
        (df['calories'] - user['calories'])**2 +
        (df['protein'] - user['protein'])**2 +
        (df['fat'] - user['fat'])**2 +
        (df['carbohydrates'] - user['carbohydrates'])**2
    )
    df_copy['score'] = 1 / (1 + df_copy['distance'])
    df_copy_sorted = df_copy.sort_values(by='score', ascending=False).reset_index(drop=True)
    return df_copy_sorted

# Step 2: Evaluate using MRR across 20 users
reciprocal_ranks = []
user_ranks = []

# Define margin for relevance (10%)
margin = 0.10

for user in user_profiles:  # Changed from user_profiles_improved to user_profiles
    ranked_df = rank_recipes(user, df) # Changed from recommendation_df to df

    # Mark recipes as relevant if they fall within 10% of user's target
    def is_relevant(row):
        return (
            abs(row['calories'] - user['calories']) / user['calories'] <= margin and
            abs(row['protein'] - user['protein']) / user['protein'] <= margin and
            abs(row['fat'] - user['fat']) / user['fat'] <= margin and
            abs(row['carbohydrates'] - user['carbohydrates']) / user['carbohydrates'] <= margin
        )

    ranked_df['relevant'] = ranked_df.apply(is_relevant, axis=1)

    # Get the rank of the first relevant recipe
    relevant_indices = ranked_df.index[ranked_df['relevant'] == True].tolist()
    if relevant_indices:
        rank = relevant_indices[0] + 1
        reciprocal_ranks.append(1 / rank)
        user_ranks.append(rank)
    else:
        reciprocal_ranks.append(0)
        user_ranks.append(None)

# Step 3: Compute final MRR
mrr_score = round(sum(reciprocal_ranks) / len(user_profiles), 4) # Changed from user_profiles_improved to user_profiles

# Step 4: Print the result
print("📊 Final Evaluation Metric:")
print("Mean Reciprocal Rank (MRR) =", mrr_score)

📊 Final Evaluation Metric:
Mean Reciprocal Rank (MRR) = 0.0322
