In [1]:
import pandas as pd
import ast
import os
import re
from langdetect import detect
from deep_translator import GoogleTranslator
from tqdm import tqdm

tqdm.pandas()

file_path = "/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/raw/cuisines.csv"
df = pd.read_csv(file_path)

CULINARY_STOPWORDS = [
    "to taste", "a pinch", "as needed", "optional", "for garnish",
    "as required", "as per taste", "few", "some", "handful", "a few"
]

# Clean ingredients (remove quantity, stopwords, punctuation, etc.)
def clean_ingredients(raw):
    try:
        items = ast.literal_eval(raw) if isinstance(raw, str) and raw.startswith("[") else raw.split("\n")
    except:
        items = raw.split("\n")

    cleaned = []
    for item in items:
        if not isinstance(item, str):
            continue

        item = item.lower().strip()
        item = item.replace("\t", " ")

        item = re.sub(r"\d+/\d+|\d+\s*(cups?|tablespoons?|tbsps?|teaspoons?|tsps?|grams?|g|kg|ml|liters?|l)", "", item)
        item = re.sub(r"[\d½¼¾⅓⅔\-]+", "", item)
        item = re.sub(r"[^\w\s]", "", item)

        for phrase in CULINARY_STOPWORDS:
            item = item.replace(phrase, "")

        item = re.sub(r"\s{2,}", " ", item).strip()
        if len(item) > 1:
            cleaned.append(item)

    return cleaned

def translate_ingredient_list(ingredients):
    translated = []
    for item in ingredients:
        try:
            lang = detect(item)
            if lang == "hi":
                item = GoogleTranslator(source='auto', target='en').translate(item)
        except:
            pass
        translated.append(item)
    return translated

def translate_instruction(text):
    try:
        lang = detect(text)
        if lang == "hi":
            return GoogleTranslator(source="auto", target="en").translate(text)
    except:
        pass
    return text

df['cleaned_ingredients'] = df['ingredients'].apply(clean_ingredients)
df['translated_ingredients'] = df['cleaned_ingredients'].progress_apply(translate_ingredient_list)
df['instructions_translated'] = df['instructions'].progress_apply(translate_instruction)

output_path = "/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/processed/recipes_translated_full.csv"
os.makedirs(os.path.dirname(output_path), exist_ok=True)
df.to_csv(output_path, index=False)

df[['name', 'translated_ingredients', 'instructions_translated']].head()

100%|██████████| 4236/4236 [45:04<00:00,  1.57it/s]  
100%|██████████| 4236/4236 [15:33<00:00,  4.54it/s] 


Unnamed: 0,name,translated_ingredients,instructions_translated
0,Thayir Semiya Recipe (Curd Semiya),"[cup semiya vermicelli roasted, curd dahi yogu...","To begin making the Thayir Semiya recipe, firs..."
1,Chettinad Style Kara Kuzhambu Recipe with Pota...,"[for ground masala, teaspoon methi seeds fenug...",To begin making the Chettinad Style Kara Kuzha...
2,Goan Chana Ros Recipe (White Peas Curry),"[cup dried green peas vatana soaked overnight,...","To prepare Goan Chana Ros recipe, soak the whi..."
3,Minced Meat And Egg Croquettes Recipe,"[mutton minced, whole eggs boiled and peeled, ...",To begin making the Minced Meat And Egg Croque...
4,Thekera Tenga Recipe,"[potatoes aloo thinly sliced, mustard oil, tea...","To begin making the Thekera Tenga recipe, Heat..."


In [4]:
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity

# # Common pantry items to ignore in similarity scoring
# COMMON_INGREDIENTS = set([
#     "salt", "water", "sugar", "oil", "pepper", "chili powder",
#     "turmeric", "jeera", "hing", "garam masala", "mustard seeds",
#     "cumin", "coriander", "ginger", "garlic", "green chili"
# ])

# # Function to remove common pantry items
# def exclude_common_ingredients(ingredient_list):
#     return [i for i in ingredient_list if i not in COMMON_INGREDIENTS]

# # Final recipe recommender function
# def recommend_recipes(user_ingredients, user_diet=None):
#     # Load processed dataset
#     df = pd.read_csv("/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/processed/recipes_translated_full.csv")
#     df['translated_ingredients'] = df['translated_ingredients'].apply(ast.literal_eval)

#     # Filter by diet with fallback
#     if user_diet:
#         df_filtered = df[df['diet'].str.lower() == user_diet.lower()]
#         if df_filtered.empty:
#             print(f"⚠️ No recipes found for diet '{user_diet}'. Showing general results instead.")
#             df_filtered = df
#     else:
#         df_filtered = df

#     # Remove common ingredients from both user and recipe sides
#     df_filtered['match_ingredients'] = df_filtered['translated_ingredients'].apply(exclude_common_ingredients)
#     filtered_user_ingredients = exclude_common_ingredients(user_ingredients)

#     # Convert to text for TF-IDF
#     recipe_docs = df_filtered['match_ingredients'].apply(lambda x: " ".join(x)).tolist()
#     user_doc = " ".join(filtered_user_ingredients)

#     # Vectorize and compute cosine similarity
#     vectorizer = TfidfVectorizer()
#     tfidf_matrix = vectorizer.fit_transform(recipe_docs + [user_doc])
#     recipe_vectors = tfidf_matrix[:-1]
#     user_vector = tfidf_matrix[-1]
#     similarities = cosine_similarity(user_vector, recipe_vectors).flatten()

#     # Get top 10 results (with similarity > 0.1)
#     top_indices = similarities.argsort()[::-1][:10]
#     top_recipes = df_filtered.iloc[top_indices].copy()
#     top_recipes['similarity'] = similarities[top_indices]
#     top_recipes = top_recipes[top_recipes['similarity'] > 0.1]

#     # Return desired columns
#     return top_recipes[['name', 'translated_ingredients', 'instructions_translated', 'diet', 'prep_time', 'image_url', 'similarity']]

In [9]:
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity
# import pandas as pd
# import ast

# # Common ingredients to ignore for matching
# COMMON_INGREDIENTS = set([
#     "salt", "water", "sugar", "oil", "pepper", "chili powder",
#     "turmeric", "jeera", "hing", "garam masala", "mustard seeds",
#     "cumin", "coriander", "ginger", "garlic", "green chili"
# ])

# def exclude_common_ingredients(ingredient_list):
#     return [i for i in ingredient_list if i not in COMMON_INGREDIENTS]

# def recommend_recipes(user_ingredients, user_diet=None, selected_courses=None, selected_cuisines=None):
#     df = pd.read_csv("/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/processed/recipes_translated_full.csv")
#     df['translated_ingredients'] = df['translated_ingredients'].apply(ast.literal_eval)

#     # --- Filter: Diet ---
#     if user_diet:
#         df = df[df['diet'].str.lower() == user_diet.lower()]
#         if df.empty:
#             print(f"⚠️ No recipes found for diet '{user_diet}'. Showing all diets instead.")
#             df = pd.read_csv("/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/processed/recipes_translated_full.csv")
#             df['translated_ingredients'] = df['translated_ingredients'].apply(ast.literal_eval)

#     # --- Filter: Course ---
#     if selected_courses:
#         df = df[df['course'].isin(selected_courses)]

#     # --- Filter: Cuisine ---
#     if selected_cuisines:
#         df = df[df['cuisine'].isin(selected_cuisines)]

#     if df.empty:
#         print("⚠️ No recipes matched the filters. Please broaden your filters.")
#         return pd.DataFrame()

#     # --- Ingredient Matching ---
#     df['match_ingredients'] = df['translated_ingredients'].apply(exclude_common_ingredients)
#     filtered_user_ingredients = exclude_common_ingredients(user_ingredients)
#     recipe_docs = df['match_ingredients'].apply(lambda x: " ".join(x)).tolist()
#     user_doc = " ".join(filtered_user_ingredients)

#     vectorizer = TfidfVectorizer()
#     tfidf_matrix = vectorizer.fit_transform(recipe_docs + [user_doc])
#     recipe_vectors = tfidf_matrix[:-1]
#     user_vector = tfidf_matrix[-1]
#     similarities = cosine_similarity(user_vector, recipe_vectors).flatten()

#     top_indices = similarities.argsort()[::-1][:10]
#     top_recipes = df.iloc[top_indices].copy()
#     top_recipes['similarity'] = similarities[top_indices]
#     top_recipes = top_recipes[top_recipes['similarity'] > 0.1]

#     return top_recipes[['name', 'translated_ingredients', 'instructions_translated',
#                         'diet', 'course', 'cuisine', 'prep_time', 'image_url', 'similarity']]

In [14]:
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity
# import pandas as pd
# import ast

# # Common ingredients to ignore for matching
# COMMON_INGREDIENTS = set([
#     "salt", "water", "sugar", "oil", "pepper", "chili powder",
#     "turmeric", "jeera", "hing", "garam masala", "mustard seeds",
#     "cumin", "coriander", "ginger", "garlic", "green chili"
# ])

# # Expanded substitution dictionary
# SUBSTITUTIONS = {
#     "ginger": "dry ginger powder, galangal, or omit",
#     "garlic": "garlic powder, asafoetida (hing), or omit",
#     "green chili": "chili flakes, jalapeño, or black pepper",
#     "kasuri methi": "dried oregano, celery leaves, or omit",
#     "curry leaves": "bay leaf, kaffir lime leaf, or omit",
#     "tamarind paste": "lemon juice, amchur powder, or vinegar",
#     "onions": "shallots, leek, or omit with flavor adjustments",
#     "fresh coconut": "desiccated coconut, coconut milk, or cashew paste",
#     "tomatoes": "tomato puree, canned tomatoes, or red bell pepper puree",
#     "paneer": "tofu, cottage cheese, or omit for vegan",
#     "milk": "almond milk, soy milk, or water with cashew paste",
#     "yogurt": "plant-based yogurt, sour cream, or lemon juice with water",
#     "ghee": "butter, coconut oil, or vegetable oil",
#     "butter": "ghee, margarine, or neutral oil",
#     "mustard seeds": "caraway seeds or omit",
#     "fennel seeds": "anise seeds or caraway seeds",
#     "ajwain": "thyme or caraway seeds",
#     "cloves": "allspice, cinnamon, or omit",
#     "cinnamon": "nutmeg or allspice",
#     "poppy seeds": "chia seeds or omit",
#     "black peppercorns": "white pepper or ground pepper",
#     "bay leaf": "curry leaf, basil leaf, or omit",
#     "amchur": "lemon juice or tamarind",
#     "asafoetida": "garlic or onion (if diet allows)",
#     "cardamom": "cinnamon or nutmeg",
#     "star anise": "fennel or a pinch of cinnamon",
#     "saffron": "turmeric (for color), or omit"
# }

# def exclude_common_ingredients(ingredient_list):
#     return [i for i in ingredient_list if i not in COMMON_INGREDIENTS]

# def get_substitutes(missing_list):
#     return {
#         item: SUBSTITUTIONS.get(item, "no known substitute, optional or skip")
#         for item in missing_list
#     }

# def recommend_recipes(user_ingredients, user_diet=None, selected_courses=None, selected_cuisines=None):
#     df = pd.read_csv("/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/processed/recipes_translated_full.csv")
#     df['translated_ingredients'] = df['translated_ingredients'].apply(ast.literal_eval)

#     if user_diet:
#         df = df[df['diet'].str.lower() == user_diet.lower()]
#         if df.empty:
#             print(f"⚠️ No recipes found for diet '{user_diet}'. Showing all diets instead.")
#             df = pd.read_csv("/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/processed/recipes_translated_full.csv")
#             df['translated_ingredients'] = df['translated_ingredients'].apply(ast.literal_eval)

#     if selected_courses:
#         df = df[df['course'].isin(selected_courses)]

#     if selected_cuisines:
#         df = df[df['cuisine'].isin(selected_cuisines)]

#     if df.empty:
#         print("⚠️ No recipes matched the filters. Please broaden your filters.")
#         return pd.DataFrame()

#     df['match_ingredients'] = df['translated_ingredients'].apply(exclude_common_ingredients)
#     filtered_user_ingredients = exclude_common_ingredients(user_ingredients)
#     recipe_docs = df['match_ingredients'].apply(lambda x: " ".join(x)).tolist()
#     user_doc = " ".join(filtered_user_ingredients)

#     vectorizer = TfidfVectorizer()
#     tfidf_matrix = vectorizer.fit_transform(recipe_docs + [user_doc])
#     recipe_vectors = tfidf_matrix[:-1]
#     user_vector = tfidf_matrix[-1]
#     similarities = cosine_similarity(user_vector, recipe_vectors).flatten()

#     top_indices = similarities.argsort()[::-1][:10]
#     top_recipes = df.iloc[top_indices].copy()
#     top_recipes['similarity'] = similarities[top_indices]
#     top_recipes = top_recipes[top_recipes['similarity'] > 0.1]

#     top_recipes['missing_ingredients'] = top_recipes['match_ingredients'].apply(
#         lambda recipe_ings: list(set(recipe_ings) - set(filtered_user_ingredients))
#     )
#     top_recipes['missing_count'] = top_recipes['missing_ingredients'].apply(len)
#     top_recipes['suggested_substitutes'] = top_recipes['missing_ingredients'].apply(get_substitutes)

#     return top_recipes[['name', 'translated_ingredients', 'instructions_translated',
#                         'diet', 'course', 'cuisine', 'prep_time', 'image_url',
#                         'similarity', 'missing_ingredients', 'missing_count', 'suggested_substitutes']]

In [17]:
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity
# import pandas as pd
# import ast

# # Common ingredients to ignore for matching and missing logic
# COMMON_INGREDIENTS = set([
#     "salt", "water", "sugar", "oil", "pepper", "chili powder",
#     "turmeric", "jeera", "hing", "garam masala", "mustard seeds",
#     "cumin", "coriander", "ginger", "garlic", "green chili"
# ])

# # Substitution dictionary
# SUBSTITUTIONS = {
#     "ginger": "dry ginger powder, galangal, or omit",
#     "garlic": "garlic powder, asafoetida (hing), or omit",
#     "green chili": "chili flakes, jalapeño, or black pepper",
#     "kasuri methi": "dried oregano, celery leaves, or omit",
#     "curry leaves": "bay leaf, kaffir lime leaf, or omit",
#     "tamarind paste": "lemon juice, amchur powder, or vinegar",
#     "onions": "shallots, leek, or omit with flavor adjustments",
#     "fresh coconut": "desiccated coconut, coconut milk, or cashew paste",
#     "tomatoes": "tomato puree, canned tomatoes, or red bell pepper puree",
#     "paneer": "tofu, cottage cheese, or omit for vegan",
#     "milk": "almond milk, soy milk, or water with cashew paste",
#     "yogurt": "plant-based yogurt, sour cream, or lemon juice with water",
#     "ghee": "butter, coconut oil, or vegetable oil",
#     "butter": "ghee, margarine, or neutral oil",
#     "mustard seeds": "caraway seeds or omit",
#     "fennel seeds": "anise seeds or caraway seeds",
#     "ajwain": "thyme or caraway seeds",
#     "cloves": "allspice, cinnamon, or omit",
#     "cinnamon": "nutmeg or allspice",
#     "poppy seeds": "chia seeds or omit",
#     "black peppercorns": "white pepper or ground pepper",
#     "bay leaf": "curry leaf, basil leaf, or omit",
#     "amchur": "lemon juice or tamarind",
#     "asafoetida": "garlic or onion (if diet allows)",
#     "cardamom": "cinnamon or nutmeg",
#     "star anise": "fennel or a pinch of cinnamon",
#     "saffron": "turmeric (for color), or omit"
# }

# def exclude_common_ingredients(ingredient_list):
#     return [i for i in ingredient_list if i not in COMMON_INGREDIENTS]

# def get_substitutes(missing_list):
#     return {
#         item: SUBSTITUTIONS.get(item, "no known substitute, optional or skip")
#         for item in missing_list
#     }

# def recommend_recipes(user_ingredients, user_diet=None, selected_courses=None, selected_cuisines=None):
#     df = pd.read_csv("/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/processed/recipes_translated_full.csv")
#     df['translated_ingredients'] = df['translated_ingredients'].apply(ast.literal_eval)

#     # Apply filters
#     if user_diet:
#         df = df[df['diet'].str.lower() == user_diet.lower()]
#         if df.empty:
#             print(f"⚠️ No recipes found for diet '{user_diet}'. Showing all diets instead.")
#             df = pd.read_csv("/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/processed/recipes_translated_full.csv")
#             df['translated_ingredients'] = df['translated_ingredients'].apply(ast.literal_eval)

#     if selected_courses:
#         df = df[df['course'].isin(selected_courses)]

#     if selected_cuisines:
#         df = df[df['cuisine'].isin(selected_cuisines)]

#     if df.empty:
#         print("⚠️ No recipes matched the filters. Please broaden your filters.")
#         return pd.DataFrame()

#     # Clean ingredient lists
#     df['match_ingredients'] = df['translated_ingredients'].apply(exclude_common_ingredients)
#     filtered_user_ingredients = exclude_common_ingredients(user_ingredients)

#     # Vectorize
#     recipe_docs = df['match_ingredients'].apply(lambda x: " ".join(x)).tolist()
#     user_doc = " ".join(filtered_user_ingredients)
#     vectorizer = TfidfVectorizer()
#     tfidf_matrix = vectorizer.fit_transform(recipe_docs + [user_doc])
#     recipe_vectors = tfidf_matrix[:-1]
#     user_vector = tfidf_matrix[-1]
#     similarities = cosine_similarity(user_vector, recipe_vectors).flatten()

#     # Select top similar recipes
#     top_indices = similarities.argsort()[::-1][:10]
#     top_recipes = df.iloc[top_indices].copy()
#     top_recipes['similarity'] = similarities[top_indices]
#     top_recipes = top_recipes[top_recipes['similarity'] > 0.1]

#     # Compute missing and substitution suggestions
#     top_recipes['missing_ingredients'] = top_recipes['match_ingredients'].apply(
#         lambda recipe_ings: [
#             ing for ing in recipe_ings
#             if ing not in filtered_user_ingredients and ing not in COMMON_INGREDIENTS
#         ]
#     )
#     top_recipes['missing_count'] = top_recipes['missing_ingredients'].apply(len)
#     top_recipes['suggested_substitutes'] = top_recipes['missing_ingredients'].apply(get_substitutes)

#     return top_recipes[['name', 'translated_ingredients', 'instructions_translated',
#                         'diet', 'course', 'cuisine', 'prep_time', 'image_url',
#                         'similarity', 'missing_ingredients', 'missing_count', 'suggested_substitutes']]

In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import ast
import re

COMMON_INGREDIENTS = set([
    "salt", "water", "sugar", "oil", "pepper", "chili powder",
    "turmeric", "jeera", "hing", "garam masala", "mustard seeds",
    "cumin", "coriander", "ginger", "garlic", "green chili","garam masala","laung","cloves"
])

SUBSTITUTIONS = {
    "ginger": "dry ginger powder, galangal, or omit",
    "garlic": "garlic powder, asafoetida (hing), or omit",
    "green chili": "chili flakes, jalapeño, or black pepper",
    "kasuri methi": "dried oregano, celery leaves, or omit",
    "curry leaves": "bay leaf, kaffir lime leaf, or omit",
    "tamarind paste": "lemon juice, amchur powder, or vinegar",
    "onions": "shallots, leek, or omit with flavor adjustments",
    "fresh coconut": "desiccated coconut, coconut milk, or cashew paste",
    "tomatoes": "tomato puree, canned tomatoes, or red bell pepper puree",
    "paneer": "tofu, cottage cheese, or omit for vegan",
    "milk": "almond milk, soy milk, or water with cashew paste",
    "yogurt": "plant-based yogurt, sour cream, or lemon juice with water",
    "ghee": "butter, coconut oil, or vegetable oil",
    "butter": "ghee, margarine, or neutral oil",
    "mustard seeds": "caraway seeds or omit",
    "fennel seeds": "anise seeds or caraway seeds",
    "ajwain": "thyme or caraway seeds",
    "cloves": "allspice, cinnamon, or omit",
    "cinnamon": "nutmeg or allspice",
    "poppy seeds": "chia seeds or omit",
    "black peppercorns": "white pepper or ground pepper",
    "bay leaf": "curry leaf, basil leaf, or omit",
    "amchur": "lemon juice or tamarind",
    "asafoetida": "garlic or onion (if diet allows)",
    "cardamom": "cinnamon or nutmeg",
    "star anise": "fennel or a pinch of cinnamon",
    "saffron": "turmeric (for color), or omit"
}

def normalize_ingredient(ingredient):
    ingredient = ingredient.lower()
    ingredient = re.sub(
        r"\b(chopped|sliced|grated|minced|diced|ground|paste|cut into quarters|crushed|powder|fresh|dry|roasted|boiled|blanched|soaked|julienned|peeled|mashed)\b",
        "", ingredient)
    ingredient = re.sub(
        r"\b(cup|cups|teaspoon|tablespoon|grams|kg|ml|liters|tbsp|tsp|½|¼|¾|⅓|⅔|dash|pinch|small|large|medium)\b",
        "", ingredient)
    ingredient = re.sub(r"[\d/]+", "", ingredient)
    ingredient = re.sub(r"\s+", " ", ingredient)
    return ingredient.strip()

def exclude_common_ingredients(ingredient_list):
    return [i for i in ingredient_list if i not in COMMON_INGREDIENTS]

def get_substitutes(missing_list):
    return {
        item: SUBSTITUTIONS.get(item, "no known substitute, optional or skip")
        for item in missing_list
    }

def recommend_recipes(user_ingredients, user_diet=None, selected_courses=None, selected_cuisines=None):
    df = pd.read_csv("/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/processed/recipes_translated_full.csv")
    df['translated_ingredients'] = df['translated_ingredients'].apply(ast.literal_eval)


    df['translated_ingredients'] = df['translated_ingredients'].apply(
        lambda lst: [normalize_ingredient(i) for i in lst]
    )

    user_ingredients = [normalize_ingredient(i) for i in user_ingredients]


    if user_diet:
        df = df[df['diet'].str.lower() == user_diet.lower()]
        if df.empty:
            print(f"⚠️ No recipes found for diet '{user_diet}'. Showing all diets instead.")
            df = pd.read_csv("/Users/dhruvkapur/Library/CloudStorage/Dropbox/recipe_recommender/data/processed/recipes_translated_full.csv")
            df['translated_ingredients'] = df['translated_ingredients'].apply(ast.literal_eval)
            df['translated_ingredients'] = df['translated_ingredients'].apply(
                lambda lst: [normalize_ingredient(i) for i in lst]
            )

    if selected_courses:
        df = df[df['course'].isin(selected_courses)]

    if selected_cuisines:
        df = df[df['cuisine'].isin(selected_cuisines)]

    if df.empty:
        print("⚠️ No recipes matched the filters. Please broaden your filters.")
        return pd.DataFrame()

    # Ingredient matching
    df['match_ingredients'] = df['translated_ingredients'].apply(exclude_common_ingredients)
    filtered_user_ingredients = exclude_common_ingredients(user_ingredients)
    recipe_docs = df['match_ingredients'].apply(lambda x: " ".join(x)).tolist()
    user_doc = " ".join(filtered_user_ingredients)

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(recipe_docs + [user_doc])
    recipe_vectors = tfidf_matrix[:-1]
    user_vector = tfidf_matrix[-1]
    similarities = cosine_similarity(user_vector, recipe_vectors).flatten()

    # Top recipes
    top_indices = similarities.argsort()[::-1][:10]
    top_recipes = df.iloc[top_indices].copy()
    top_recipes['similarity'] = similarities[top_indices]
    top_recipes = top_recipes[top_recipes['similarity'] > 0.1]

    # Compute missing ingredients
    top_recipes['missing_ingredients'] = top_recipes['match_ingredients'].apply(
        lambda recipe_ings: [
            ing for ing in recipe_ings
            if ing not in filtered_user_ingredients and ing not in COMMON_INGREDIENTS
        ]
    )
    top_recipes['missing_count'] = top_recipes['missing_ingredients'].apply(len)
    top_recipes['suggested_substitutes'] = top_recipes['missing_ingredients'].apply(get_substitutes)

    return top_recipes[['name', 'translated_ingredients', 'instructions_translated',
                        'diet', 'course', 'cuisine', 'prep_time', 'image_url',
                        'similarity', 'missing_ingredients', 'missing_count', 'suggested_substitutes']]

In [24]:
user_ingredients = ["onion", "peas", "wheat flour", "bread", "butter", "tomatoes"]
user_diet = "vegetarian"
selected_courses = ["Dinner", "Main Course"]
selected_cuisines = ["Punjabi", "Bengali Recipes"]

results = recommend_recipes(
    user_ingredients=user_ingredients,
    user_diet=user_diet,
    selected_courses=selected_courses,
    selected_cuisines=selected_cuisines
)

results.head()

Unnamed: 0,name,translated_ingredients,instructions_translated,diet,course,cuisine,prep_time,image_url,similarity,missing_ingredients,missing_count,suggested_substitutes
108,Dal Bukhara Recipe (Punjabi Style Black Urad Dal),"[black urad dal whole overnight, onion finely,...","To begin making the Dal Bukhara recipe, pressu...",Vegetarian,Dinner,Punjabi,Total in 80 M,https://www.archanaskitchen.com/images/archana...,0.156441,"[black urad dal whole overnight, onion finely,...",12,{'black urad dal whole overnight': 'no known s...
914,Punjabi Black Chickpea Curry Recipe - Kale Cha...,"[kala chana brown chickpeas, oil, cumin seeds ...",To begin making the Punjabi Black Chickpea Cur...,Vegetarian,Dinner,Punjabi,Total in 40 M,https://www.archanaskitchen.com/images/archana...,0.128629,"[kala chana brown chickpeas, cumin seeds jeera...",7,{'kala chana brown chickpeas': 'no known subst...
1050,Corn Methi Malai Gravy Recipe,"[methi leaves fenugreek leaves, sweet corn, on...","To begin making Corn Methi Malai Recipe, the ...",Vegetarian,Dinner,Punjabi,Total in 30 M,https://www.archanaskitchen.com/images/archana...,0.118104,"[methi leaves fenugreek leaves, sweet corn, on...",16,{'methi leaves fenugreek leaves': 'no known su...
452,Baked Amritsari Kulcha Recipe (Without Yeast),"[all purpose flour maida, salt, sugar, baking,...",To begin making the Baked Amritsari Kulcha rec...,Vegetarian,Dinner,Punjabi,Total in 20 M,https://www.archanaskitchen.com/images/archana...,0.117473,"[all purpose flour maida, baking, baking soda,...",8,{'all purpose flour maida': 'no known substitu...


In [25]:
user_ingredients = [
    "button mushrooms", "black peppercorns", "cloves", "ajwain",
    "fennel seeds", "cumin seeds", "coriander seeds", "cinnamon stick",
    "dry red chilli", "poppy seeds", "fresh coconut", "oil",
    "onions", "garlic", "salt", "tamarind paste"
]

user_diet = "vegetarian"
selected_courses = ["Main Course"]
selected_cuisines = [ "Goan Recipes"]
results = recommend_recipes(
    user_ingredients=user_ingredients,
    user_diet=user_diet,
    selected_courses=selected_courses,
    selected_cuisines=selected_cuisines
)

results.head(5)

Unnamed: 0,name,translated_ingredients,instructions_translated,diet,course,cuisine,prep_time,image_url,similarity,missing_ingredients,missing_count,suggested_substitutes
29,Goan Style Mushroom Shagoti Recipe,"[button mushrooms, whole black peppercorns, cl...","To begin making the Mushroom Shagoti Recipe, i...",Vegetarian,Main Course,Goan Recipes,Total in 35 M,https://www.archanaskitchen.com/images/archana...,0.791156,"[whole black peppercorns, cloves laung, ajwain...",9,{'whole black peppercorns': 'no known substitu...
