# Data Preprocessing .

In [17]:
import pandas as pd
df = pd.read_csv("recipes.csv")
print(df.columns)


Index(['TranslatedRecipeName', 'TranslatedIngredients', 'TotalTimeInMins',
       'Cuisine', 'TranslatedInstructions', 'URL', 'Cleaned-Ingredients',
       'image-url', 'Ingredient-count'],
      dtype='object')


In [18]:
df["recipe_name"] = df["TranslatedRecipeName"]
df["ingredient_required"] = df["TranslatedIngredients"]
df["time_to_cook"] = df["TotalTimeInMins"]
df["cuisine"] = df["Cuisine"]
df["long_description"] = df["TranslatedInstructions"]
df["cleaned_ingredients"] = df["Cleaned-Ingredients"]
df["image_url"] = df["image-url"]
df["ingredient_count"] = df["Ingredient-count"]

df.drop(['TranslatedRecipeName', 'TranslatedIngredients', 'TotalTimeInMins',
       'Cuisine', 'TranslatedInstructions', 'Cleaned-Ingredients',
       'image-url', 'Ingredient-count'] , axis = 1 , inplace = True)

print("columns : " , df.columns)
df.head()

columns :  Index(['URL', 'recipe_name', 'ingredient_required', 'time_to_cook', 'cuisine',
       'long_description', 'cleaned_ingredients', 'image_url',
       'ingredient_count'],
      dtype='object')


Unnamed: 0,URL,recipe_name,ingredient_required,time_to_cook,cuisine,long_description,cleaned_ingredients,image_url,ingredient_count
0,https://www.archanaskitchen.com/masala-karela-...,Masala Karela Recipe,"1 tablespoon Red Chilli powder,3 tablespoon Gr...",45,Indian,"To begin making the Masala Karela Recipe,de-se...","salt,amchur (dry mango powder),karela (bitter ...",https://www.archanaskitchen.com/images/archana...,10
1,https://www.archanaskitchen.com/spicy-tomato-r...,Spicy Tomato Rice (Recipe),"2 teaspoon cashew - or peanuts, 1/2 Teaspoon ...",15,South Indian Recipes,"To make tomato puliogere, first cut the tomato...","tomato,salt,chickpea lentils,green chilli,rice...",https://www.archanaskitchen.com/images/archana...,12
2,https://www.archanaskitchen.com/ragi-vermicell...,Ragi Semiya Upma Recipe - Ragi Millet Vermicel...,"1 Onion - sliced,1 teaspoon White Urad Dal (Sp...",50,South Indian Recipes,"To begin making the Ragi Vermicelli Recipe, fi...","salt,rice vermicelli noodles (thin),asafoetida...",https://www.archanaskitchen.com/images/archana...,12
3,https://www.archanaskitchen.com/gongura-chicke...,Gongura Chicken Curry Recipe - Andhra Style Go...,"1/2 teaspoon Turmeric powder (Haldi),1 tablesp...",45,Andhra,To begin making Gongura Chicken Curry Recipe f...,"tomato,salt,ginger,sorrel leaves (gongura),fen...",https://www.archanaskitchen.com/images/archana...,15
4,https://www.archanaskitchen.com/andhra-style-a...,Andhra Style Alam Pachadi Recipe - Adrak Chutn...,"oil - as per use, 1 tablespoon coriander seed...",30,Andhra,"To make Andhra Style Alam Pachadi, first heat ...","tomato,salt,ginger,red chillies,curry,asafoeti...",https://www.archanaskitchen.com/images/archana...,12


In [19]:
# Check missing values
print(df.isnull().sum())

# Drop rows if necessary (if any column has many missing values)
df = df.dropna()


URL                    0
recipe_name            0
ingredient_required    0
time_to_cook           0
cuisine                0
long_description       0
cleaned_ingredients    0
image_url              0
ingredient_count       0
dtype: int64


In [20]:
import ast

def categorize_time(minutes):
    if minutes <= 15:
        return "Quick (<15 mins)"
    elif minutes <= 30:
        return "Fast (15-30 mins)"
    elif minutes <= 60:
        return "Moderate (30-60 mins)"
    else:
        return "Long (>60 mins)"

# Convert cuisine names to lowercase & remove inconsistencies
df['cuisine'] = df['cuisine'].str.lower().str.strip()

# Convert cooking time into integers
df['time_to_cook'] = pd.to_numeric(df['time_to_cook'], errors='coerce')
df['time_category'] = df['time_to_cook'].apply(categorize_time)

# Convert all text data to lowercase
df['recipe_name'] = df['recipe_name'].str.lower().str.strip()

# Convert "Cleaned-Ingredients" from a comma-separated string to a list
df['cleaned_ingredients'] = df['cleaned_ingredients'].apply(lambda x: [i.strip().lower() for i in x.split(',')])

# Ensure all URLs & image links are valid
df = df[df['URL'].str.startswith('http')]
df = df[df['image_url'].str.startswith('http')]

# Check final cleaned data
df.head()


Unnamed: 0,URL,recipe_name,ingredient_required,time_to_cook,cuisine,long_description,cleaned_ingredients,image_url,ingredient_count,time_category
0,https://www.archanaskitchen.com/masala-karela-...,masala karela recipe,"1 tablespoon Red Chilli powder,3 tablespoon Gr...",45,indian,"To begin making the Masala Karela Recipe,de-se...","[salt, amchur (dry mango powder), karela (bitt...",https://www.archanaskitchen.com/images/archana...,10,Moderate (30-60 mins)
1,https://www.archanaskitchen.com/spicy-tomato-r...,spicy tomato rice (recipe),"2 teaspoon cashew - or peanuts, 1/2 Teaspoon ...",15,south indian recipes,"To make tomato puliogere, first cut the tomato...","[tomato, salt, chickpea lentils, green chilli,...",https://www.archanaskitchen.com/images/archana...,12,Quick (<15 mins)
2,https://www.archanaskitchen.com/ragi-vermicell...,ragi semiya upma recipe - ragi millet vermicel...,"1 Onion - sliced,1 teaspoon White Urad Dal (Sp...",50,south indian recipes,"To begin making the Ragi Vermicelli Recipe, fi...","[salt, rice vermicelli noodles (thin), asafoet...",https://www.archanaskitchen.com/images/archana...,12,Moderate (30-60 mins)
3,https://www.archanaskitchen.com/gongura-chicke...,gongura chicken curry recipe - andhra style go...,"1/2 teaspoon Turmeric powder (Haldi),1 tablesp...",45,andhra,To begin making Gongura Chicken Curry Recipe f...,"[tomato, salt, ginger, sorrel leaves (gongura)...",https://www.archanaskitchen.com/images/archana...,15,Moderate (30-60 mins)
4,https://www.archanaskitchen.com/andhra-style-a...,andhra style alam pachadi recipe - adrak chutn...,"oil - as per use, 1 tablespoon coriander seed...",30,andhra,"To make Andhra Style Alam Pachadi, first heat ...","[tomato, salt, ginger, red chillies, curry, as...",https://www.archanaskitchen.com/images/archana...,12,Fast (15-30 mins)


In [21]:
from sklearn.feature_extraction.text import CountVectorizer

# Convert list of ingredients into a space-separated string for vectorization
df['ingredients_text'] = df['cleaned_ingredients'].apply(lambda x: ' '.join(x))

# Create Bag-of-Words representation
vectorizer = CountVectorizer()
ingredient_vectors = vectorizer.fit_transform(df['ingredients_text'])
df['ingredients_text'] = df['ingredients_text'].astype(str)

In [22]:
# Vectorizing all conditions

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Initialize TF-IDF Vectorizer
ingredient_vectorizer = TfidfVectorizer()
ingredient_vectors = ingredient_vectorizer.fit_transform(df['ingredients_text'])

recipe_vectorizer = TfidfVectorizer()
recipe_vectors = recipe_vectorizer.fit_transform(df['recipe_name'])


# Recommending recipes

In [23]:
## Recommendation based on Ingredients .
import numpy as np

def recommend_by_ingredients(user_ingredients, df, vectorizer, ingredient_vectors):
    user_input_str = ' '.join(user_ingredients)
    user_input_vector = vectorizer.transform([user_input_str])

    similarity_scores = cosine_similarity(user_input_vector, ingredient_vectors)

    # Create a copy of df to avoid overwriting
    df_copy = df.copy()

    # Assign similarity scores correctly
    df_copy['similarity_score'] = similarity_scores.flatten()[:len(df_copy)]

    return df_copy.sort_values(by='similarity_score', ascending=False).head(10)

## Recommendation using Recipe_name

def recommend_by_recipe_name(recipe_name, df, vectorizer, recipe_vectors):
    user_recipe_vector = vectorizer.transform([recipe_name])
    similarity_scores = cosine_similarity(user_recipe_vector, recipe_vectors)

    df_copy = df.copy()
    df_copy['name_similarity_score'] = similarity_scores.flatten()[:len(df_copy)]

    return df_copy.sort_values(by='name_similarity_score', ascending=False).head(10)

# filtring based on time.
def filter_by_time(df, max_time):
    return df[df['time_to_cook'] <= max_time]

# Combining all recommendation.
def get_recommendations(selected_time, user_ingredients, recipe_name, df):
    # Step 1: Filter by time
    time_filtered_df = df[df['time_to_cook'] <= selected_time].copy()

    # Step 2: Recommend by ingredients
    ingredient_recommendations = recommend_by_ingredients(user_ingredients, time_filtered_df, ingredient_vectorizer, ingredient_vectors)

    # Step 3: Recommend by name
    final_recommendations = recommend_by_recipe_name(recipe_name, ingredient_recommendations, recipe_vectorizer, recipe_vectors)

    return final_recommendations[['recipe_name', 'time_to_cook', 'cleaned_ingredients', 'image_url', 'URL']].head(10)


In [24]:
# # Use case of Ingredient based recommendation.

# user_ingredients = ["tomato", "onion", "salt"]
# ingredient_recommendations = recommend_by_ingredients(user_ingredients, df, ingredient_vectorizer, ingredient_vectors)
# print(ingredient_recommendations[['recipe_name', 'time_to_cook', 'URL']])


In [25]:
# # Use case of recipe_name based recommendation.

# recipe_name = "tomato rice"
# name_recommendations = recommend_by_recipe_name(recipe_name, df, recipe_vectorizer, recipe_vectors)
# print(name_recommendations[['recipe_name', 'time_to_cook', 'URL']])


In [26]:
# # Use case of time based recommendation.

# filtered_df = filter_by_time(df, 30)  # Recipes that take 30 minutes or less
# filtered_df[['recipe_name', 'time_to_cook' , 'URL', 'image_url']]


In [27]:
# Use case of all conditions based recommendation.
recommendations = get_recommendations(5, ["tomato", "onion", "salt"], "tomato rice", df)
print(recommendations)


                                            recipe_name  time_to_cook  \
2896           coffee recipe with instant coffee powder             4   
5149  kakka orotti recipe - steamed mini rice balls ...             0   
4567          burani raita recipe - garlic based yogurt             5   
4336             sweet & spicy soy dipping sauce recipe             5   
2055  kerala thenga chammanthi recipe- coconut onion...             5   
5620                             besan vegetable recipe             0   
394        indian beaten coffee recipe | dalgona coffee             5   
431     banana digestive pudding recipe with pistachios             5   
1851                       grated cucumber raita recipe             5   
3601  kanji recipe - probiotic drink made from carro...             5   

                                    cleaned_ingredients  \
2896                       [sugar, milk, coffee powder]   
5149  [tomato, kashmiri red chilli powder, onion, ch...   
4567  [cumin powder

In [28]:
!pip install gradio



In [29]:

def recommend_recipes(time, ingredients, recipe_name):
    ingredients_list = ingredients.split(",")
    recommendations = get_recommendations(time, ingredients, recipe_name, df)

    results = []
    for _, row in recommendations.iterrows():
        results.append({
            "Recipe Name": row['recipe_name'],
            "Time to Cook": f"{row['time_to_cook']} mins",
            "Ingredients": ', '.join(row['cleaned_ingredients']),
            "Image": row['image_url'],
            "Recipe URL": row['URL']
        })
    return results


In [15]:
import gradio as gr

def get_recommendations_gradio(selected_time, ingredients, recipe_name):
    user_ingredients = [ing.strip() for ing in ingredients.split(",")]
    recommendations = get_recommendations(selected_time, user_ingredients, recipe_name, df)

    # Convert DataFrame to a list of lists (Gradio-friendly format)
    return recommendations.values.tolist()

# Define Gradio UI
inputs = [
    gr.Slider(0, 120, value=30, label="Max Cooking Time (minutes)"),
    gr.Textbox(label="Enter Ingredients (comma-separated)"),
    gr.Textbox(label="Enter Recipe Name (optional)")
]

outputs = gr.Dataframe(headers=["Recipe Name", "Time to Cook", "Ingredients", "Image URL", "Recipe URL"])

# Create Gradio app
app = gr.Interface(fn=get_recommendations_gradio, inputs=inputs, outputs=outputs, title="Food Recommendation System")
app.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fc1f6b214ed83b10fe.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


