In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [6]:
df = pd.read_csv('final_Datasets.csv')

In [3]:
df.head()

Unnamed: 0,name,image,description,prep_time,cook_time,total_time,servings_number,servings_unit,course,cuisine,diet,ingredients,instructions,nutrition
0,7 Cups Sweet | 7 Cup Burfi Recipe,https://www.vegrecipesofindia.com/wp-content/u...,7 Cup Burfi recipe is an easy and delicious So...,5minutes,25minutes,30minutes,4.0,,Sweets,"South Indian, Tamil Nadu",Vegetarian,â–¢ Â½ cup besan (gram flour) or 50 grams â–¢ Â½ cup...,PreparationTake all the ingredients and keep t...,NutritionCalories: 512kcal | Carbohydrates: 59...
1,Aam Ka Murabba | Mango Murabba | Indian Mango ...,https://www.vegrecipesofindia.com/wp-content/u...,Aam Ka Murabba or Mango Murabba is a sweet and...,10minutes,15minutes,25minutes,450.0,grams,Side Dish,North Indian,"Gluten Free, Vegan","â–¢ 2 mangoes unripe, green and large â€“ 400 gra...",PreparationRinse and wipe dry the mangoes. The...,NutritionCalories: 70kcal | Carbohydrates: 18g...
2,Aam Panna Recipe â€“ With Boiled and Roasted Man...,https://www.vegrecipesofindia.com/wp-content/u...,Aam ka pannaÂ is a cooling Indian summer drink ...,15minutes,5minutes,20minutes,12.0,Aam Panna Drink,Beverages,Indian,Vegan,For boiled aam panna â€“ Recipe 1â–¢ 2 mangoes â€“ ...,Making Boiled Aam Panna â€“ Recipe 1Rinse the ra...,NutritionCalories: 118kcal | Carbohydrates: 30...
3,Aamras Recipe,https://www.vegrecipesofindia.com/wp-content/u...,Aamrasis a popular and traditional mango delic...,15minutes,0minutes,15minutes,4.0,,Desserts,"Gujarati, Maharashtrian","Gluten Free, Vegan",â–¢ 400 grams mangoes or 2 large alphonso or 3 t...,Making aamrasRinse the alphonso mangoes thorou...,NutritionCalories: 61kcal | Carbohydrates: 15g...
4,Achari Paneer Recipe (With Gravy),https://www.vegrecipesofindia.com/wp-content/u...,Achari Paneer is a creamy as well as robust fl...,15minutes,30minutes,45minutes,4.0,,Main Course,North Indian,Vegetarian,Pickling spicesâ–¢ 1 teaspoon fennel seedsâ–¢ 1 te...,Roasting Pickle SpicesFirst take the achari or...,NutritionCalories: 321kcal | Carbohydrates: 9g...


In [17]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# 1. Load the Dataset
# Ensure the file 'final_Datasets.csv' is in the same folder as this script
df = pd.read_csv('final_Datasets.csv')

print(f"Loaded {len(df)} recipes successfully.")

# 2. Data Cleaning & Preprocessing
# We replace missing values with empty strings so they don't cause errors
features = ['ingredients', 'description', 'cuisine', 'course', 'diet', 'name']
for feature in features:
    df[feature] = df[feature].fillna('')

# 3. Create a "Content Soup"
# We combine all important text features into one long string (soup) for each recipe.
# This gives the algorithm a full picture of the dish (what it is, how it's made, what's inside).
def create_soup(x):
    return (x['name'] + ' ' + 
            x['ingredients'] + ' ' + 
            x['description'] + ' ' + 
            x['cuisine'] + ' ' + 
            x['diet'] + ' ' + 
            x['course'])

df['soup'] = df.apply(create_soup, axis=1)

# 4. TF-IDF Vectorization
# Convert the text 'soup' into a matrix of numbers.
# stop_words='english' removes common words like "the", "a", "in".
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['soup'])

# 5. Compute Cosine Similarity
# Calculate the similarity score (0 to 1) between every pair of recipes.
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# 6. Create Index Mapping
# This allows us to find the numeric index of a recipe using its name.
indices = pd.Series(df.index, index=df['name']).drop_duplicates()

# 7. Define the Recommendation Function
def get_recommendations(title, cosine_sim=cosine_sim):
    # Check if title exists
    if title not in indices:
        return f"Recipe '{title}' not found in the dataset."
    
    # Get the index of the recipe
    idx = indices[title]
    
    # Handle cases where multiple recipes might have the exact same name
    if isinstance(idx, pd.Series):
        idx = idx.iloc[0]

    # Get similarity scores for all other recipes
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort recipes by similarity score (highest first)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get top 10 similar recipes (skipping the first one, which is the recipe itself)
    sim_scores = sim_scores[1:11]
    
    # Get the recipe indices
    recipe_indices = [i[0] for i in sim_scores]

    # Return the top 10 recommendations
    return df[['name', 'cuisine', 'diet', 'course']].iloc[recipe_indices]

# --- TEST THE SYSTEM ---
# Let's try it with a dish from your dataset
test_dish = "7 Cups Sweet | 7 Cup Burfi Recipe"
print(f"\nFinding recommendations for: {test_dish}")
recommendations = get_recommendations(test_dish)
print(recommendations)

Loaded 1048 recipes successfully.

Finding recommendations for: 7 Cups Sweet | 7 Cup Burfi Recipe
                                                  name                cuisine  \
326                      Gajar Ki Barfi | Carrot Burfi           North Indian   
245                   Coconut Burfi | Nariyal ki Barfi  Indian, Maharashtrian   
118                                     Besan Ka Halwa                 Indian   
119           Besan Ladoo (Easy Besan ke Laddu Recipe)           North Indian   
622                                  Mysore Pak Recipe              Karnataka   
120                                     Besan ki Barfi           North Indian   
247  Coconut Ladoo Recipe | Nariyal ke Laddu Made I...  Indian, Maharashtrian   
626               Nankhatai Recipe | Nankhatai Biscuit           North Indian   
231              Chocolate Barfi Recipe (Mithai Burfi)                 Indian   
66                            Atta Laddu | Wheat Ladoo  North Indian, Punjabi   

          

In [4]:
import pandas as pd
import re

# 1. SETUP
df = pd.read_csv('final_Datasets.csv')
df = df.fillna('')

def parse_nutrition(text):
    try:
        cal_match = re.search(r'Calories:\s*(\d+)', str(text))
        calories = int(cal_match.group(1)) if cal_match else 9999
        prot_match = re.search(r'Protein:\s*(\d+)', str(text))
        protein = int(prot_match.group(1)) if prot_match else 0
        return calories, protein
    except:
        return 9999, 0

df[['calories', 'protein']] = df['nutrition'].apply(lambda x: pd.Series(parse_nutrition(x)))

location_map = {
    'Maharashtra': ['Maharashtrian', 'Mumbai', 'Konkani'],
    'Punjab': ['Punjabi', 'North Indian'],
    'South India': ['South Indian', 'Kerala', 'Tamil Nadu', 'Karnataka', 'Andhra'],
    'Gujarat': ['Gujarati'],
    'Bengal': ['Bengali']
}

# first fuction
def get_smart_recommendations(dish_name, user_location=None, user_diet=None):
    """
    1. Finds similar dishes (Content)
    2. Boosts score if cuisine matches location (Location)
    3. Removes dishes that don't match the diet (Diet)
    """
    
    # Validation
    if dish_name not in indices:
        return f"Recipe '{dish_name}' not found."
    
    idx = indices[dish_name]
    if isinstance(idx, pd.Series): idx = idx.iloc[0]

    # A. CONTENT STEP: Get raw similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # B. LOCATION STEP: Apply Boosting
    # We create a new list of tuples (index, modified_score)
    boosted_scores = []
    
    # Get keywords for the user's location (if valid)
    loc_keywords = location_map.get(user_location, [])
    
    for i, score in sim_scores:
        # Skip the item itself
        if i == idx:
            continue
            
        recipe_cuisine = df.iloc[i]['cuisine']
        
        # Boost logic: Add 0.15 (15%) if it matches user location
        if loc_keywords and any(k in recipe_cuisine for k in loc_keywords):
            score += 0.15
            
        boosted_scores.append((i, score))

    # Sort based on the (possibly boosted) score
    boosted_scores = sorted(boosted_scores, key=lambda x: x[1], reverse=True)
    
    # C. DIET STEP: Apply Filtering
    final_indices = []
    
    for i, score in boosted_scores:
        recipe_diet = df.iloc[i]['diet']
        
        # Check Diet Constraint
        if user_diet:
            # "Vegetarian" logic: Allow "Vegetarian" OR "Vegan"
            if user_diet == "Vegetarian":
                if "Vegetarian" not in recipe_diet and "Vegan" not in recipe_diet:
                    continue # Skip this recipe
            # Strict logic for others (Vegan, Gluten Free, etc.)
            elif user_diet not in recipe_diet:
                continue # Skip this recipe
        
        # If it passes the filter, add to final list
        final_indices.append(i)
        
        # Stop after 10 recommendations
        if len(final_indices) >= 5:
            break

    # Return Result
    return df[['name', 'course', 'cuisine', 'diet']].iloc[final_indices]

# 2. THE RECOMMENDATION FUNCTION
def onboarding_recommendation(home_state, diet_type, is_gluten_free, diet_goal):
    
    # A. STRICT COURSE FILTER
    exclude_pattern = 'Dessert|Sweet|Spice Blend|Side Dish|Condiment|Spices'
    filtered_df = df[~df['course'].str.contains(exclude_pattern, case=False, na=False)].copy()

    # B. DIET FILTER
    if diet_type == 'Vegan':
        filtered_df = filtered_df[filtered_df['diet'].str.contains('Vegan', case=False)]
    elif diet_type == 'Vegetarian':
        filtered_df = filtered_df[filtered_df['diet'].str.contains('Vegetarian|Vegan', case=False)]
        
    if is_gluten_free:
        filtered_df = filtered_df[filtered_df['diet'].str.contains('Gluten Free', case=False)]

    # C. LOCATION BOOST
    loc_keywords = location_map.get(home_state, [])
    def calculate_relevance(row):
        score = 0
        if any(k in row['cuisine'] for k in loc_keywords):
            score += 100 
        return score
    filtered_df['relevance_score'] = filtered_df.apply(calculate_relevance, axis=1)

    # D. GOAL SORTING logic
    
    if diet_goal == 'weight_loss':
        # Prioritize Location, then Low Calories
        results = filtered_df.sort_values(by=['relevance_score', 'calories'], ascending=[False, True])
        
    elif diet_goal == 'weight_gain':
        # Prioritize Location, then High Calories
        results = filtered_df.sort_values(by=['relevance_score', 'calories'], ascending=[False, False])
        
    elif diet_goal == 'high_protein':
        # Prioritize Location, then Max Protein
        results = filtered_df.sort_values(by=['relevance_score', 'protein'], ascending=[False, False])
        
    elif diet_goal == 'lean_muscle': # <--- NEW GOAL for "Low Calorie + High Protein"
        
        # 1. Filter: Ensure it has significant protein first (e.g., > 10g)
        # We don't want to recommend a 20-calorie spice mix just because it has 1g protein (high ratio)
        lean_df = filtered_df[filtered_df['protein'] > 10].copy()
        
        # 2. Calculate Protein Density (Protein grams per Calorie)
        # (Add 1 to calories to avoid division by zero error)
        lean_df['protein_density'] = lean_df['protein'] / (lean_df['calories'] + 1)
        
        # 3. Sort by Relevance, then Density
        results = lean_df.sort_values(by=['relevance_score', 'protein_density'], ascending=[False, False])
        
    else: 
        results = filtered_df.sort_values(by='relevance_score', ascending=False)

    return results[['name', 'cuisine', 'protein', 'calories']].head(10)


In [14]:
# ==========================================================
#                 1. IMPORTS & DATA LOADING
# ==========================================================
import pandas as pd
import re

df = pd.read_csv("final_Datasets.csv")
df = df.fillna('')

# ==========================================================
#                 2. HELPER: PARSE NUTRITION
# ==========================================================
def parse_nutrition(text):
    try:
        cal_match = re.search(r'Calories:\s*(\d+)', str(text))
        calories = int(cal_match.group(1)) if cal_match else 9999

        prot_match = re.search(r'Protein:\s*(\d+)', str(text))
        protein = int(prot_match.group(1)) if prot_match else 0

        return calories, protein
    except:
        return 9999, 0

df[['calories', 'protein']] = df['nutrition'].apply(lambda x: pd.Series(parse_nutrition(x)))

# ==========================================================
#                 3. LOCATION BOOST MAP
# ==========================================================
location_map = {
    'Maharashtra': ['Maharashtrian', 'Mumbai', 'Konkani'],
    'Punjab': ['Punjabi', 'North Indian'],
    'South India': ['South Indian', 'Kerala', 'Tamil Nadu', 'Karnataka', 'Andhra'],
    'Gujarat': ['Gujarati'],
    'Bengal': ['Bengali']
}

# ==========================================================
#          4. CONTENT SIMILARITY MODEL (REQUIRED)
# ==========================================================
# You already have: indices, cosine_sim, etc.
# Assuming these are already computed earlier.
# (If not, let me know to generate full TF-IDF + cosine code)

# Example placeholder (REMOVE when you add real data):
indices = {name: idx for idx, name in enumerate(df['name'])}
cosine_sim = [[1 for i in range(len(df))] for j in range(len(df))]  # Dummy


# ==========================================================
#          5. SMART RECOMMENDATION (OLD USER)
# ==========================================================
def get_smart_recommendations(dish_name, user_location=None, user_diet=None):

    if dish_name not in indices:
        return f"Recipe '{dish_name}' not found."

    idx = indices[dish_name]
    if isinstance(idx, pd.Series):
        idx = idx.iloc[0]

    # A. Content similarity
    sim_scores = list(enumerate(cosine_sim[idx]))

    # B. Location boost
    boosted_scores = []
    loc_keywords = location_map.get(user_location, [])

    for i, score in sim_scores:
        if i == idx:
            continue

        recipe_cuisine = df.iloc[i]['cuisine']

        if loc_keywords and any(k in recipe_cuisine for k in loc_keywords):
            score += 0.15

        boosted_scores.append((i, score))

    boosted_scores = sorted(boosted_scores, key=lambda x: x[1], reverse=True)

    # C. Diet filtering
    final_indices = []

    for i, score in boosted_scores:
        recipe_diet = df.iloc[i]['diet']

        if user_diet:
            if user_diet == "Vegetarian":
                if "Vegetarian" not in recipe_diet and "Vegan" not in recipe_diet:
                    continue
            elif user_diet not in recipe_diet:
                continue

        final_indices.append(i)
        if len(final_indices) >= 5:
            break

    return df[['name', 'course', 'cuisine', 'diet']].iloc[final_indices]


# ==========================================================
#       6. ONBOARDING RECOMMENDATION (NEW USER)
# ==========================================================
def onboarding_recommendation(home_state, diet_type, is_gluten_free, diet_goal):

    exclude_pattern = 'Dessert|Sweet|Spice Blend|Side Dish|Condiment|Spices'
    filtered_df = df[~df['course'].str.contains(exclude_pattern, case=False, na=False)].copy()

    if diet_type == 'Vegan':
        filtered_df = filtered_df[filtered_df['diet'].str.contains('Vegan', case=False)]
    elif diet_type == 'Vegetarian':
        filtered_df = filtered_df[filtered_df['diet'].str.contains('Vegetarian|Vegan', case=False)]

    if is_gluten_free:
        filtered_df = filtered_df[filtered_df['diet'].str.contains('Gluten Free', case=False)]

    loc_keywords = location_map.get(home_state, [])

    def calculate_relevance(row):
        score = 0
        if any(k in row['cuisine'] for k in loc_keywords):
            score += 100
        return score

    filtered_df['relevance_score'] = filtered_df.apply(calculate_relevance, axis=1)

    if diet_goal == 'weight_loss':
        results = filtered_df.sort_values(by=['relevance_score', 'calories'], ascending=[False, True])

    elif diet_goal == 'weight_gain':
        results = filtered_df.sort_values(by=['relevance_score', 'calories'], ascending=[False, False])

    elif diet_goal == 'high_protein':
        results = filtered_df.sort_values(by=['relevance_score', 'protein'], ascending=[False, False])

    elif diet_goal == 'lean_muscle':
        lean_df = filtered_df[filtered_df['protein'] > 10].copy()
        lean_df['protein_density'] = lean_df['protein'] / (lean_df['calories'] + 1)
        results = lean_df.sort_values(by=['relevance_score', 'protein_density'], ascending=[False, False])

    else:
        results = filtered_df.sort_values(by='relevance_score', ascending=False)

    return results[['name', 'cuisine', 'protein', 'calories']].head(10)


# ==========================================================
#                  7. USER HISTORY STORAGE
# ==========================================================
user_history = {}

def save_history(user_id, field, value):

    if user_id not in user_history:
        user_history[user_id] = {
            "last_viewed": None,
            "liked": [],
            "recent_searches": [],
            "view_history": [],
            "diet_profile": {}
        }

    if field == "last_viewed":
        user_history[user_id]["last_viewed"] = value
        user_history[user_id]["view_history"].append(value)

    elif field == "liked":
        user_history[user_id]["liked"].append(value)

    elif field == "recent_search":
        user_history[user_id]["recent_searches"].append(value)

    elif field == "diet_profile":
        user_history[user_id]["diet_profile"] = value

    return user_history[user_id]


def is_new_user(user_id):
    return user_id not in user_history or user_history[user_id]["diet_profile"] == {}


# ==========================================================
#               8. SIMULATION FUNCTION
# ==========================================================
def simulate_user_flow(user_id, last_dish=None, profile=None):

    if is_new_user(user_id):
        print("\nðŸŸ¢ NEW USER â€” Onboarding Recommendations\n")

        recs = onboarding_recommendation(
            home_state=profile['home_state'],
            diet_type=profile['diet_type'],
            is_gluten_free=profile['gluten_free'],
            diet_goal=profile['goal']
        )

        save_history(user_id, "diet_profile", profile)
        return recs

    else:
        print("\nðŸ”µ RETURNING USER â€” Personalized Smart Recommendations\n")

        last_seen = user_history[user_id]["last_viewed"]
        recs = get_smart_recommendations(
            dish_name=last_seen,
            user_location=user_history[user_id]["diet_profile"]["home_state"],
            user_diet=user_history[user_id]["diet_profile"]["diet_type"]
        )

        return recs

In [15]:
home_state = input("Enter the Home Location : ")
diet_type = input("Enter your diet (Vegetarian, Vegan) : ")
gluten_free = input("Gluten Free (Yes or No) : ")
goal = input("Enter Your Goal like (lean_muscle, weight_loss, weight_gain, high_protein) : ") 

profile = {
    "home_state": home_state,
    "diet_type": diet_type,
    "gluten_free": gluten_free,
    "goal": goal
}

print(simulate_user_flow(101, profile=profile))

save_history(101, "last_viewed", "poha idli")

print(simulate_user_flow(101))

Enter the Home Location :  North Indian
Enter your diet (Vegetarian, Vegan) :  Vegan
Gluten Free (Yes or No) :  Yes
Enter Your Goal like (lean_muscle, weight_loss, weight_gain, high_protein) :  weight_loss



ðŸŸ¢ NEW USER â€” Onboarding Recommendations

                                                  name  \
927  Tandoori Gobi | Gobi Tikka | Tandoori Cauliflower   
542    Matar Kulcha | Matar Chaat â€“ Delhi Style Recipe   
421          Kali Dal | Punjabi Black Dal (Maa Ki Dal)   
893                           Soya Chaap Masala Recipe   
708  Paneer Tikka Recipe (Tandoori Paneer Starter S...   
54                      Amritsari Dal | Langarwali Dal   
384                            Instant Pot Dal Makhani   
5                                  Achari Paneer Tikka   
236                 Chole Paneer Recipe | Chana Paneer   
495        Madra Recipe | Himachali Rajma Madra Recipe   

                     cuisine  protein  calories  
927    North Indian, Punjabi       11       133  
542    North Indian, Punjabi       23       360  
421          Indian, Punjabi       13       211  
893             North Indian       16       265  
708             North Indian       17       316  
54          