In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re

In [None]:
df = pd.read_csv("./IndianFoodDatasetCSV.csv")
print(df.shape)
df = df.drop(columns=['Srno', 'RecipeName', 'Ingredients', 'Instructions'])
df = df.drop_duplicates(subset=['TranslatedRecipeName']).reset_index(drop=True)
df = df.dropna(subset=['TranslatedIngredients'])
print(df.shape)

def remove_non_english(text):
    return re.sub(r'[^\x00-\x7F]+', ' ', str(text))
df['TranslatedIngredients'] = df['TranslatedIngredients'].apply(remove_non_english)
df['TranslatedInstructions'] = df['TranslatedInstructions'].apply(remove_non_english)

def clean_ingredients(text):
    if pd.isna(text):
        return None
    text = str(text).lower()
    # Only keep letters,commas and spaces
    text = re.sub(r"[^a-z,\s]", "", text)
    # Remove measurement words
    text = re.sub(r"\b(cup|cups|tablespoon|tablespoons|tbsp|teaspoon|teaspoons|tsp|grams|g|kg|ml|ltr|litre|pinch|piece|pieces|slice|slices|cloves?|nos?|small|medium|large)\b", "", text, flags=re.IGNORECASE)
    # Remove extra spaces
    text = re.sub(r"\s+", " ", text).strip()
    # Split by commas and remove duplicates
    ingredients = list(dict.fromkeys([i.strip() for i in text.split(",") if i.strip()]))
    return ", ".join(ingredients)

# Apply cleaning
df["CleanedIngredients"] = df["TranslatedIngredients"].apply(clean_ingredients)
df = df[df['CleanedIngredients'].notna()]
print(df[["TranslatedIngredients", "CleanedIngredients"]].head())

def split_steps(text):
    if pd.isna(text):
        return None
    if not re.search(r'[a-zA-Z]', text):
        return None
    # Split by sentence or numbered steps
    steps = re.split(r'(?:(?<=\.)|(?<=\d\)))\s+', text)
    steps = [step.strip() for step in steps if len(step.strip()) > 2]
    return steps

df["RecipeSteps"] = df["TranslatedInstructions"].apply(split_steps)
df = df[df['RecipeSteps'].notna()]
print(df[["TranslatedInstructions", "RecipeSteps"]].head())
print(df.shape)

# Save cleaned data
df.to_csv("cleaned_indian_food_1.csv", index=False)

print("Cleaned dataset saved as 'cleaned_indian_food_1.csv'")


(6871, 15)
(6838, 11)
                               TranslatedIngredients  \
0  6 Karela (Bitter Gourd/ Pavakkai) - deseeded,S...   
1  2-1 / 2 cups rice - cooked, 3 tomatoes, 3 teas...   
2  1-1/2 cups Rice Vermicelli Noodles (Thin),1 On...   
3  500 grams Chicken,2 Onion - chopped,1 Tomato -...   
4  1 tablespoon chana dal, 1 tablespoon white ura...   

                                  CleanedIngredients  
0  karela bitter gourd pavakkai deseeded, salt to...  
1  rice cooked, tomatoes, bc belle bhat powder, s...  
2  rice vermicelli noodles thin, onion sliced, ca...  
3  chicken, onion chopped, tomato chopped, green ...  
4  chana dal, white urad dal, red chillies, coria...  
                              TranslatedInstructions  \
0  To begin making the Masala Karela Recipe,de-se...   
1  To make tomato puliogere, first cut the tomato...   
2  To begin making the Ragi Vermicelli Recipe, fi...   
3  To begin making Gongura Chicken Curry Recipe f...   
4  To make Andhra Style Alam Pa

In [None]:
df = pd.read_csv("cleaned_indian_food_1.csv")
df = df.drop(columns=['TranslatedInstructions', 'TranslatedIngredients'])

# Step 3: Basic preprocessing
df.dropna(subset=["CleanedIngredients"], inplace=True)
df.reset_index(drop=True, inplace=True)

# Step 4: TF-IDF Vectorization on cleaned ingredients
vectorizer = TfidfVectorizer(stop_words="english")
ingredient_vectors = vectorizer.fit_transform(df["CleanedIngredients"])

In [None]:
import re

def normalize_ingredient_name(text):
    """
    Simplify ingredient names by removing adjectives and unnecessary words.
    Example: 'onion chopped finely' -> 'onion'
    """
    text = text.lower().strip()
    # remove preparation words and descriptors
    text = re.sub(r'\b(chopped|finely|roughly|sliced|diced|grated|minced|crushed|powder|paste|optional|to taste|fresh|whole|small|medium|large|inch)\b', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def recommend_recipe_precise(user_ingredients, top_n=10):
    """
    Rank recipes so that those needing fewer extra ingredients come first.
    """

    # Normalize user ingredients
    if isinstance(user_ingredients, str):
        user_ingredients = [i.strip().lower() for i in user_ingredients.split(",")]
    else:
        user_ingredients = [i.strip().lower() for i in user_ingredients]
    
    # Apply normalization
    user_set = set([normalize_ingredient_name(i) for i in user_ingredients])

    results = []
    for idx, row in df.iterrows():
        recipe_ingredients = [normalize_ingredient_name(i) for i in row["CleanedIngredients"].split(",")]
        recipe_set = set(recipe_ingredients)

        intersection = len(user_set & recipe_set)
        if intersection == 0:
            continue

        extra_ingredients = list(recipe_set - user_set)
        num_extras = len(extra_ingredients)

        results.append((idx, intersection, num_extras, extra_ingredients))

    if not results:
        print("No matching recipes found.")
        return pd.DataFrame(columns=[
            "Rank", "TranslatedRecipeName", "CleanedIngredients",
            "Cuisine", "Course", "Diet", "ExtraIngredientsCount", "MissingIngredients"
        ])

    # Sort by fewer extra ingredients, then more matches
    results = sorted(results, key=lambda x: (x[2], -x[1]))

    # Build final dataframe
    indices = [r[0] for r in results]
    extra_counts = [r[2] for r in results]
    missing_ings = [", ".join(r[3]) if r[3] else "None" for r in results]

    recommendations = df.iloc[indices][[
        "TranslatedRecipeName", "CleanedIngredients", "Cuisine", "Course", "Diet"
    ]].copy()

    recommendations["ExtraIngredientsCount"] = extra_counts
    recommendations["MissingIngredients"] = missing_ings
    recommendations["Rank"] = range(1, len(recommendations) + 1)
    # recommendations.to_csv("rec.csv", index=False)

    return recommendations


# Step 6: Function to neatly display recipe steps
def show_recipe_steps(recipe_name):
    """
    Display the steps of a recipe given its name.
    """
    recipe = df[df["TranslatedRecipeName"].str.lower() == recipe_name.lower()]
    if recipe.empty:
        print("Recipe not found.")
        return
    steps = eval(recipe.iloc[0]["RecipeSteps"]) if isinstance(recipe.iloc[0]["RecipeSteps"], str) else recipe.iloc[0]["RecipeSteps"]
    print(f"\nSteps for {recipe_name}:\n")
    for i, step in enumerate(steps, start=1):
        print(f"Step {i}: {step}")

In [13]:
user_input = "onion, tomato, garlic, ginger"
print(f"\nTop recipe recommendations for: {user_input}\n")

recommendations = recommend_recipe_precise(user_input, top_n=10)
display(recommendations)

# Step 8: Show recipe steps (example)
show_recipe_steps(recommendations.iloc[0]["TranslatedRecipeName"])


Top recipe recommendations for: onion, tomato, garlic, ginger



Unnamed: 0,TranslatedRecipeName,CleanedIngredients,Cuisine,Course,Diet,ExtraIngredientsCount,MissingIngredients,Rank
1920,Buttered Broccoli Poriyal Sabzi Recipe - Finge...,"broccoli cut into florets, butter, garlic minced",Continental,Side Dish,High Protein Vegetarian,2,"butter, broccoli cut into florets",1
3974,Garlic Bread With Herb Butter Recipe,"french loaf, garlic finely minced, herb butter...",Continental,Dinner,Vegetarian,2,"french loaf, herb butter as required",2
282,Orange Peppermint Green Tea Punch Recipe,"fresh orange juice, ginger grated, tea monk se...",Continental,Appetizer,Vegetarian,3,"tea monk seiki peppermint green tea, chilled w...",3
1092,Vietnamese Rice Pocket Recipe With Caramelised...,"rice, onion sliced, dry red chillies sliced, s...",Vietnamese,Lunch,Vegetarian,3,"dry red chillies, sunflower oil, rice",4
2009,Pudina Buttermilk Recipe,"curd dahi yogurt, sprig mint leaves pudina cho...",North Indian Recipes,Appetizer,Vegetarian,3,"curd dahi yogurt, cumin seeds jeera roasted an...",5
...,...,...,...,...,...,...,...,...
1441,Mixed Vegetable Handi Recipe,"sunflower oil, cumin seeds jeera, bay leaves t...",North Indian Recipes,Lunch,Vegetarian,28,"green beans french beans, bay leaves tej patta...",3015
4083,Makhana Moongphali Kadhi With Samvat Rice Recipe,"curd dahi yogurt, singoda flour water chestnut...",Indian,Lunch,Vegetarian,28,"phool makhana lotus seeds powdered, few curry ...",3016
1941,Bengali Style Khichuri Aar Labra Recipe-Khichd...,"govind bhog rice, yellow moong dal split, caul...",Bengali Recipes,Main Course,Vegetarian,30,"sugar or adjust, cauliflower gobi cut to flore...",3017
1316,Sagu Masala Dosa Recipe,"white urad dal split, red matta rice, dosa ric...",Karnataka,South Indian Breakfast,Vegetarian,31,"kaddu parangikai pumpkin red one with skin, as...",3018



Steps for Buttered Broccoli Poriyal Sabzi Recipe - Finger Food For Babies And Toddlers:

Step 1: To begin making Buttered Broccoli Poriyal Sabzi, we need to first steam the Broccoli floretsFirstly make sure to wash the broccoli thoroughly as there might be particles of mud or worms in between the florets.Add the broccoli florets into a pressure cooker along with 2 tablespoons of water and a pinch of salt.Allow to pressure cook for 1 whistle.
Step 2: Turn off the flame and run the pressure cooker under water to release the pressure immediately.
Step 3: Take out the steamed broccoli florets and keep aside.
Step 4: If you are cooking on the stove, add the broccoli florets to a pot of water (enough to cover all the florets).
Step 5: Allow the florets to boil in the water for 5 minutes.
Step 6: The broccoli should have sufficiently cooked.
Step 7: Make sure to not over cook broccoli as it might lose the nutrition when over cooked.
Step 8: In a heavy bottomed pan, add 1 teaspoon of butter a