In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re

In [14]:
df = pd.read_csv("./Recipe/IndianFoodDatasetCSV.csv")
print(df.shape)
df = df.drop(columns=['Srno', 'RecipeName', 'Ingredients', 'Instructions'])
df = df.drop_duplicates(subset=['TranslatedRecipeName']).reset_index(drop=True)
df = df.dropna(subset=['TranslatedIngredients'])
print(df.shape)

def remove_non_english(text):
    return re.sub(r'[^\x00-\x7F]+', ' ', str(text))
df['TranslatedIngredients'] = df['TranslatedIngredients'].apply(remove_non_english)
df['TranslatedInstructions'] = df['TranslatedInstructions'].apply(remove_non_english)

def clean_ingredients(text):
    if pd.isna(text):
        return None
    text = str(text).lower()
    # Only keep letters,commas and spaces
    text = re.sub(r"[^a-z,\s]", "", text)
    # Remove measurement words
    text = re.sub(r"\b(cup|cups|tablespoon|tablespoons|tbsp|teaspoon|teaspoons|tsp|grams|g|kg|ml|ltr|litre|pinch|piece|pieces|slice|slices|cloves?|nos?|small|medium|large)\b", "", text, flags=re.IGNORECASE)
    # Remove extra spaces
    text = re.sub(r"\s+", " ", text).strip()
    # Split by commas and remove duplicates
    ingredients = list(dict.fromkeys([i.strip() for i in text.split(",") if i.strip()]))
    return ", ".join(ingredients)

# Apply cleaning
df["CleanedIngredients"] = df["TranslatedIngredients"].apply(clean_ingredients)
df = df[df['CleanedIngredients'].notna()]
print(df[["TranslatedIngredients", "CleanedIngredients"]].head())

def split_steps(text):
    if pd.isna(text):
        return None
    if not re.search(r'[a-zA-Z]', text):
        return None
    # Split by sentence or numbered steps
    steps = re.split(r'(?:(?<=\.)|(?<=\d\)))\s+', text)
    steps = [step.strip() for step in steps if len(step.strip()) > 2]
    return steps

df["RecipeSteps"] = df["TranslatedInstructions"].apply(split_steps)
df = df[df['RecipeSteps'].notna()]
print(df[["TranslatedInstructions", "RecipeSteps"]].head())
print(df.shape)

# Save cleaned data
df.to_csv("cleaned_indian_food.csv", index=False)

print("Cleaned dataset saved as 'cleaned_indian_food.csv'")


(6871, 15)
(6838, 11)
                               TranslatedIngredients  \
0  6 Karela (Bitter Gourd/ Pavakkai) - deseeded,S...   
1  2-1 / 2 cups rice - cooked, 3 tomatoes, 3 teas...   
2  1-1/2 cups Rice Vermicelli Noodles (Thin),1 On...   
3  500 grams Chicken,2 Onion - chopped,1 Tomato -...   
4  1 tablespoon chana dal, 1 tablespoon white ura...   

                                  CleanedIngredients  
0  karela bitter gourd pavakkai deseeded, salt to...  
1  rice cooked, tomatoes, bc belle bhat powder, s...  
2  rice vermicelli noodles thin, onion sliced, ca...  
3  chicken, onion chopped, tomato chopped, green ...  
4  chana dal, white urad dal, red chillies, coria...  
                              TranslatedInstructions  \
0  To begin making the Masala Karela Recipe,de-se...   
1  To make tomato puliogere, first cut the tomato...   
2  To begin making the Ragi Vermicelli Recipe, fi...   
3  To begin making Gongura Chicken Curry Recipe f...   
4  To make Andhra Style Alam Pa

In [15]:
df = pd.read_csv("cleaned_indian_food.csv")
df = df.drop(columns=['TranslatedInstructions', 'TranslatedIngredients'])

# Step 3: Basic preprocessing
df.dropna(subset=["CleanedIngredients"], inplace=True)
df.reset_index(drop=True, inplace=True)

# Step 4: TF-IDF Vectorization on cleaned ingredients
vectorizer = TfidfVectorizer(stop_words="english")
ingredient_vectors = vectorizer.fit_transform(df["CleanedIngredients"])

In [16]:
def recommend_recipe(user_ingredients, top_n=5):
    # top_n: number of results to return
    if isinstance(user_ingredients, list):
        user_input = ", ".join(user_ingredients)
    else:
        user_input = user_ingredients

    user_vector = vectorizer.transform([user_input])
    similarity = cosine_similarity(user_vector, ingredient_vectors).flatten()

    indices = np.argsort(similarity)[::-1][:top_n]
    recommendations = df.iloc[indices][["TranslatedRecipeName", "CleanedIngredients", "RecipeSteps", "Cuisine", "Course", "Diet"]]
    
    return recommendations

# Step 6: Function to neatly display recipe steps
def show_recipe_steps(recipe_name):
    """
    Display the steps of a recipe given its name.
    """
    recipe = df[df["TranslatedRecipeName"].str.lower() == recipe_name.lower()]
    if recipe.empty:
        print("Recipe not found.")
        return
    steps = eval(recipe.iloc[0]["RecipeSteps"]) if isinstance(recipe.iloc[0]["RecipeSteps"], str) else recipe.iloc[0]["RecipeSteps"]
    print(f"\nSteps for {recipe_name}:\n")
    for i, step in enumerate(steps, start=1):
        print(f"Step {i}: {step}")

In [17]:
user_input = "onion, tomato, garlic, ginger"
print(f"\nTop recipe recommendations for: {user_input}\n")

recommendations = recommend_recipe(user_input, top_n=5)
display(recommendations)

# Step 8: Show recipe steps (example)
show_recipe_steps(recommendations.iloc[0]["TranslatedRecipeName"])


Top recipe recommendations for: onion, tomato, garlic, ginger



Unnamed: 0,TranslatedRecipeName,CleanedIngredients,RecipeSteps,Cuisine,Course,Diet
1409,Spinach Egg Muffins Recipe,"whole eggs, onion, spinach leaves palak, salt ...",['To bgein making the Spinach Egg Muffins reci...,Continental,World Breakfast,Eggetarian
4960,Chettinad Prawn Curry Recipe,"prawns, onion finely chopped, tomato, ginger g...","['To begin making the Chettinad Prawn Curry, f...",Chettinad,Lunch,Non Vegeterian
3096,Mushroom Do Pyaza Recipe,"button mushrooms, onion cut into cubes, onion ...",['To begin making the Mushroom Do Pyaza Recipe...,North Indian Recipes,Lunch,Vegetarian
5458,Hurali Saaru Recipe - Healthy Horse Gram and C...,horse gram dal kollu kulith soaked for at leas...,"['To begin, pressure cook the soaked horse-gra...",Tamil Nadu,Dinner,High Protein Vegetarian
5743,Chicken Wings With Bhuna Masala Recipe,"chicken wings, onion chopped, tomato chopped, ...","[""To begin making the Chicken Wings With Bhuna...",Indian,Appetizer,High Protein Non Vegetarian



Steps for Spinach Egg Muffins Recipe:

Step 1: To bgein making the Spinach Egg Muffins recipe, preheat oven to 180 C.
Step 2: Grease the muffin tray with oil.Heat a heavy bottomed pan adding oil, add chopped onion and saute until translucent.
Step 3: Add chopped tomato and cook until mushy.Once the tomatoes are mushy, add washed spinach leaves and saute for a minute until it shrinks.
Step 4: Switch off the flame.Take a mixing bowl, break eggs and add to the bowl.
Step 5: Now add salt, pepper powder, garlic and saute spinach mixture to the eggs in bowl.
Step 6: Whisk well until combined.Ladle the egg mixture into greased muffin tray and bake for 15-20 minutes until the top becomes little firm.Allow it to cool completely.
Step 7: Remove from the tray using spoon.
Step 8: Serve Spinach Egg Muffins for breakfast with Orange Juice or for tea time snacks with a cup of Adrak Chai.
