In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re

In [2]:
df = pd.read_csv("./IndianFoodDatasetCSV.csv")
print(df.shape)
df = df.drop(columns=['Srno', 'RecipeName', 'Ingredients', 'Instructions'])
df = df.drop_duplicates(subset=['TranslatedRecipeName']).reset_index(drop=True)
df = df.dropna(subset=['TranslatedIngredients'])
print(df.shape)

def remove_non_english(text):
    return re.sub(r'[^\x00-\x7F]+', ' ', str(text))
df['TranslatedIngredients'] = df['TranslatedIngredients'].apply(remove_non_english)
df['TranslatedInstructions'] = df['TranslatedInstructions'].apply(remove_non_english)

def clean_ingredients(text):
    if pd.isna(text):
        return None
    text = str(text).lower()
    # Only keep letters,commas and spaces
    text = re.sub(r"[^a-z,\s]", "", text)
    # Remove measurement words
    text = re.sub(r"\b(cup|cups|tablespoon|tablespoons|tbsp|teaspoon|teaspoons|tsp|grams|g|kg|ml|ltr|litre|pinch|piece|pieces|slice|slices|cloves?|nos?|small|medium|large)\b", "", text, flags=re.IGNORECASE)
    # Remove extra spaces
    text = re.sub(r"\s+", " ", text).strip()
    # Split by commas and remove duplicates
    ingredients = list(dict.fromkeys([i.strip() for i in text.split(",") if i.strip()]))
    return ", ".join(ingredients)

# Apply cleaning
df["CleanedIngredients"] = df["TranslatedIngredients"].apply(clean_ingredients)
df = df[df['CleanedIngredients'].notna()]
print(df[["TranslatedIngredients", "CleanedIngredients"]].head())

def split_steps(text):
    if pd.isna(text):
        return None
    if not re.search(r'[a-zA-Z]', text):
        return None
    # Split by sentence or numbered steps
    steps = re.split(r'(?:(?<=\.)|(?<=\d\)))\s+', text)
    steps = [step.strip() for step in steps if len(step.strip()) > 2]
    return steps

df["RecipeSteps"] = df["TranslatedInstructions"].apply(split_steps)
df = df[df['RecipeSteps'].notna()]
print(df[["TranslatedInstructions", "RecipeSteps"]].head())
print(df.shape)

# Save cleaned data
df.to_csv("cleaned_indian_food_1.csv", index=False)

print("Cleaned dataset saved as 'cleaned_indian_food_1.csv'")


(6871, 15)
(6838, 11)
                               TranslatedIngredients  \
0  6 Karela (Bitter Gourd/ Pavakkai) - deseeded,S...   
1  2-1 / 2 cups rice - cooked, 3 tomatoes, 3 teas...   
2  1-1/2 cups Rice Vermicelli Noodles (Thin),1 On...   
3  500 grams Chicken,2 Onion - chopped,1 Tomato -...   
4  1 tablespoon chana dal, 1 tablespoon white ura...   

                                  CleanedIngredients  
0  karela bitter gourd pavakkai deseeded, salt to...  
1  rice cooked, tomatoes, bc belle bhat powder, s...  
2  rice vermicelli noodles thin, onion sliced, ca...  
3  chicken, onion chopped, tomato chopped, green ...  
4  chana dal, white urad dal, red chillies, coria...  
                              TranslatedInstructions  \
0  To begin making the Masala Karela Recipe,de-se...   
1  To make tomato puliogere, first cut the tomato...   
2  To begin making the Ragi Vermicelli Recipe, fi...   
3  To begin making Gongura Chicken Curry Recipe f...   
4  To make Andhra Style Alam Pa

In [3]:
df = pd.read_csv("cleaned_indian_food_1.csv")
df = df.drop(columns=['TranslatedInstructions', 'TranslatedIngredients'])

# Step 3: Basic preprocessing
df.dropna(subset=["CleanedIngredients"], inplace=True)
df.reset_index(drop=True, inplace=True)

# Step 4: TF-IDF Vectorization on cleaned ingredients
vectorizer = TfidfVectorizer(stop_words="english")
ingredient_vectors = vectorizer.fit_transform(df["CleanedIngredients"])

In [4]:
import re

def normalize_ingredient_name(text):
    """
    Simplify ingredient names by removing adjectives and unnecessary words.
    Example: 'onion chopped finely' -> 'onion'
    """
    text = text.lower().strip()
    # remove preparation words and descriptors
    text = re.sub(r'\b(chopped|finely|roughly|sliced|diced|grated|minced|crushed|powder|paste|optional|to taste|fresh|whole|small|medium|large|inch)\b', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def recommend_recipe_precise(user_ingredients, top_n=10):
    """
    Rank recipes so that those needing fewer extra ingredients come first.
    """

    # Normalize user ingredients
    if isinstance(user_ingredients, str):
        user_ingredients = [i.strip().lower() for i in user_ingredients.split(",")]
    else:
        user_ingredients = [i.strip().lower() for i in user_ingredients]
    
    # Apply normalization
    user_set = set([normalize_ingredient_name(i) for i in user_ingredients])

    results = []
    for idx, row in df.iterrows():
        recipe_ingredients = [normalize_ingredient_name(i) for i in row["CleanedIngredients"].split(",")]
        recipe_set = set(recipe_ingredients)

        intersection = len(user_set & recipe_set)
        if intersection == 0:
            continue

        extra_ingredients = list(recipe_set - user_set)
        num_extras = len(extra_ingredients)

        results.append((idx, intersection, num_extras, extra_ingredients))

    if not results:
        print("No matching recipes found.")
        return pd.DataFrame(columns=[
            "Rank", "TranslatedRecipeName", "CleanedIngredients",
            "Cuisine", "Course", "Diet", "ExtraIngredientsCount", "MissingIngredients"
        ])

    # Sort by fewer extra ingredients, then more matches
    results = sorted(results, key=lambda x: (x[2], -x[1]))

    # Build final dataframe
    indices = [r[0] for r in results]
    extra_counts = [r[2] for r in results]
    missing_ings = [", ".join(r[3]) if r[3] else "None" for r in results]

    recommendations = df.iloc[indices][[
        "TranslatedRecipeName", "CleanedIngredients", "Cuisine", "Course", "Diet"
    ]].copy()

    recommendations["ExtraIngredientsCount"] = extra_counts
    recommendations["MissingIngredients"] = missing_ings
    recommendations["Rank"] = range(1, len(recommendations) + 1)
    # recommendations.to_csv("rec.csv", index=False)

    return recommendations


# Step 6: Function to neatly display recipe steps
def show_recipe_steps(recipe_name):
    """
    Display the steps of a recipe given its name.
    """
    recipe = df[df["TranslatedRecipeName"].str.lower() == recipe_name.lower()]
    if recipe.empty:
        print("Recipe not found.")
        return
    steps = eval(recipe.iloc[0]["RecipeSteps"]) if isinstance(recipe.iloc[0]["RecipeSteps"], str) else recipe.iloc[0]["RecipeSteps"]
    print(f"\nSteps for {recipe_name}:\n")
    for i, step in enumerate(steps, start=1):
        print(f"Step {i}: {step}")

In [5]:
user_input = "chicken"
print(f"\nTop recipe recommendations for: {user_input}\n")

recommendations = recommend_recipe_precise(user_input, top_n=10)
display(recommendations)

# Step 8: Show recipe steps (example)
show_recipe_steps(recommendations.iloc[0]["TranslatedRecipeName"])


Top recipe recommendations for: chicken



Unnamed: 0,TranslatedRecipeName,CleanedIngredients,Cuisine,Course,Diet,ExtraIngredientsCount,MissingIngredients,Rank
4869,Chicken Mole With Brown Rice Recipe,"chicken, homemade mole sauce, brown rice, swee...",Mexican,Lunch,Non Vegeterian,5,"brown rice, sweet corn, homemade mole sauce, s...",1
5145,Thai Style Kai Jeow Moo Sab Recipe - Omelette ...,"whole eggs, chicken minced, soy sauce, stalk s...",Thai,World Breakfast,Non Vegeterian,5,"soy sauce, salt to season, stalk spring onion ...",2
5533,Chicken Dimsums Recipe - Steamed Chicken Dumpl...,"chicken minced, spring onion bulb greens finel...",Asian,Appetizer,High Protein Vegetarian,6,"sunflower oil as required, soy sauce, all purp...",3
3991,Coriander Chicken Roast Recipe,"chicken, red chilli powder, garam masala powde...",North Indian Recipes,Appetizer,High Protein Non Vegetarian,7,"sunflower oil, red chilli, turmeric haldi, sal...",4
4075,Devil Chicken Recipe,"chicken, roasted tomato pasta sauce, worcester...",Continental,Appetizer,High Protein Non Vegetarian,7,"ginger, worcestershire sauce, roasted tomato p...",5
3063,Restaurant Style Chicken 555 Recipe,"chicken chopped, red chilli powder, turmeric p...",Indian,Appetizer,High Protein Non Vegetarian,8,"curry leaves a few, rice flour, sooji semolina...",6
4626,Naga Style Chicken With Bamboo Shoot Recipe,"chicken, tomatoes crushed in pestle mortar, ga...",North East India Recipes,Side Dish,High Protein Non Vegetarian,8,"green chillies, ginger, cabbage patta gobi mut...",7
2443,Chicken Schezwan Momo - Steamed Chicken Dumpli...,"chicken minced, carrot gajjar finely minced, o...",Chinese,Appetizer,Non Vegeterian,10,"ginger, carrot gajjar, schezwan sauce, stalk s...",8
2914,Penne Rigate With Minced Chicken Sauce & Chees...,"penne pasta i used whole wheat pasta, homemade...",Italian Recipes,Dinner,Non Vegeterian,10,"extra virgin olive oil, red chilli flakes or b...",9
3024,Coriander Mint Chicken Tikka Recipe - Dhaniya ...,"chicken chopped, lemon juice, chaat masala pow...",North Indian Recipes,Appetizer,High Protein Non Vegetarian,10,"oil as per use, green chillies, ginger, hung c...",10



Steps for Chicken Mole With Brown Rice Recipe:

Step 1: To begin making the Chicken Mole With Brown Rice Recipe, make the mole sauce first by following the recipe link.Heat a pan with oil, add the chicken pieces and season them with salt, pepper and dried herbs.
Step 2: Once the chicken starts to brown.
Step 3: Add the mole sauce and mix well and bring it to a boil for 5 minutes and keep it aside.Boil the brown rice with 2 cups of water in a pressure cooker for about 5 whistle.
Step 4: Allow the pressure to release by itself.Once done, add dried oregano, salt and sweet corn and toss it over in the same pressure cooker over a medium heat.
Step 5: To serve, place some rice to side of a plate and pour the mole sauce next to it with chunks of chicken pieces and sprinkle with some toasted sesame seeds and serve.Serve the Chicken Mole With Brown Rice Recipe as a one dish meal along with a glass of Red Wine Sangria Cocktail to enjoy your Sunday brunch.
