In [None]:
import numpy as np
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---------------------------
# Load and clean the dataset
# ---------------------------
df = pd.read_excel('data/recipes.xlsx')
df = df[['recipe_name', 'food_name_org']].copy()
df = df.dropna(subset=['recipe_name'])
df['food_name_org'] = df['food_name_org'].fillna('')

def clean_text(text):
    text = str(text).lower().strip()
    text = re.sub(r'[^a-z\s]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text

df['recipe_name'] = df['recipe_name'].apply(clean_text)
df['food_name_org'] = df['food_name_org'].apply(clean_text)

# ---------------------------
# Combine all ingredient names per recipe
# ---------------------------
recipes_grouped = (
    df.groupby('recipe_name')['food_name_org']
    .apply(lambda x: ' '.join(x))
    .reset_index()
)

recipes_grouped = recipes_grouped.drop_duplicates(subset=['recipe_name']).reset_index(drop=True)

# ---------------------------
# TF-IDF representation
# ---------------------------
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(recipes_grouped['food_name_org'])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# ---------------------------
# Function to get similar recipes
# ---------------------------
def get_similar_recipes(recipe_name, cosine_sim=cosine_sim, recipes=recipes_grouped):
    recipe_name = clean_text(recipe_name)
    if recipe_name not in recipes['recipe_name'].values:
        return []

    idx = recipes[recipes['recipe_name'] == recipe_name].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:6]
    return [recipes.iloc[i[0]]['recipe_name'] for i in sim_scores]

# ---------------------------
# Test the model
# ---------------------------
results1 = get_similar_recipes('bhel puri')
print("Top similar recipes to 'bhel puri':")
print(results1)

#
results2 = get_similar_recipes('grilled chicken')
print("Top similar recipes to 'grilled chicken':")
print(results2)


Top similar recipes to 'grilled chicken':
['khakhra chaat', 'split bengal gram dal channa dal', 'oniongreen chilli paranthaparatha pyaaz aur hari mirch ka paranthaparatha', 'vegetarian nargisi kofta curry', 'potato samosa aloo ka samosa']


In [15]:
def get_recipes_by_ingredients(ingredients, tfidf=tfidf, tfidf_matrix=tfidf_matrix, recipes=recipes_grouped):
    ingredients = clean_text(ingredients)
    ingredients_vec = tfidf.transform([ingredients])
    sim_scores = cosine_similarity(ingredients_vec, tfidf_matrix).flatten()
    top_indices = sim_scores.argsort()[::-1][:5]
    return recipes.iloc[top_indices]['recipe_name'].tolist()
get_recipes_by_ingredients("potato tomato onion")



['shepherds pie with minced meat',
 'spinach and potato palak aloo',
 'vegetable soup',
 'stuffed tomatoes bharwa tamatar',
 'potato with curd']