In [7]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv('recipes_vegetarian_strict_v2.csv')

print("üìä SHAPE:", df.shape)
print("\nüìã COLUMNS:", df.columns.tolist())
print("\nüëÄ SAMPLE:")
print(df.head(3).to_string())
print("\n‚ùå MISSING VALUES:")
print(df.isnull().sum())
print("\nüìà STATS:")
print(df[['num_ingredients', 'num_steps']].describe())
print("\nüè∑Ô∏è TOP CATEGORIES:")
print(df['category'].value_counts().head())

üìä SHAPE: (34391, 8)

üìã COLUMNS: ['recipe_title', 'category', 'subcategory', 'description', 'ingredients', 'directions', 'num_ingredients', 'num_steps']

üëÄ SAMPLE:
                                 recipe_title           category        subcategory                                                                                                                                                                                                                                                  description                                                                                                                                                                                                                                          ingredients                                                                                                                                                                                                                                                       

In [8]:
import os, pandas as pd
print("CWD:", os.getcwd())
print("Files:", os.listdir())

df_clean = pd.read_csv("recipes_cleaned_final.csv")
print(df_clean.shape)
df_clean.head(2)

CWD: c:\Users\Chantal Silva\OneDrive\Escritorio\vegetarian_chatbot
Files: ['app.py', 'audio1922855144.m4a', 'audio2922855144.m4a', 'chroma_low_budget_veg_recipes', 'cookbook_chatbot.py', 'data', 'rag_core.py', 'recipes_cleaned_final.csv', 'recipes_vegetarian_strict_v2.csv', 'recommender_core.py', 'recording.conf', 'vegetarian_rag_chatbot.ipynb', 'video1922855144.mp4', 'video2922855144.mp4', '__pycache__']
(31164, 11)


Unnamed: 0,recipe_title,category,subcategory,description,ingredients,directions,num_ingredients,num_steps,ingredients_list,directions_list,ingredient_tokens
0,Air Fryer Potato Slices with Dipping Sauce,Air Fryer Recipes,Air Fryer Recipes,"These air fryer potato slices, served with a b...","[""3/4 cup ketchup"", ""1/2 cup beer"", ""1 tablesp...","[""Combine ketchup, beer, Worcestershire sauce,...",9,5,"['3/4 cup ketchup', '1/2 cup beer', '1 tablesp...","['Combine ketchup, beer, Worcestershire sauce,...","['ketchup', 'beer', '1', 'tablespoon', 'worces..."
1,Air Fryer Corn on The Cob,Air Fryer Recipes,Air Fryer Recipes,Air fryer corn on the cob is super quick to ma...,"[""\u00bc cup mayonnaise"", ""2 teaspoons crumble...","[""Preheat an air fryer to 400 degrees F (200 d...",6,4,"['¬º cup mayonnaise', '2 teaspoons crumbled cot...",['Preheat an air fryer to 400 degrees F (200 d...,"['¬º', 'cup', 'mayonnaise', '2', 'teaspoons', '..."


In [9]:
import ast

if "ingredients_list" in df_clean.columns:
    def list_to_text(lst):
        if isinstance(lst, str):
            try:
                lst = ast.literal_eval(lst)
            except:
                lst = []
        return " ".join([str(x).lower() for x in lst])
    df_clean["ingredients_text"] = df_clean["ingredients_list"].apply(list_to_text)
else:
    df_clean["ingredients_text"] = df_clean["ingredients"].astype(str).str.lower()

df_clean["ingredients_text"].head()


0    3/4 cup ketchup 1/2 cup beer 1 tablespoon worc...
1    ¬º cup mayonnaise 2 teaspoons crumbled cotija c...
2    1/4 cup all-purpose flour 1/2 teaspoon cayenne...
3                                                     
4    1 large onion 1 1/2 teaspoons olive oil salt a...
Name: ingredients_text, dtype: object

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

vectorizer = TfidfVectorizer(stop_words="english")
X_ingredients = vectorizer.fit_transform(df_clean["ingredients_text"])
X_ingredients.shape

(31164, 2940)

In [11]:
def recommend_recipes(user_ingredients: str, top_n: int = 5):
    """
    Recebe ingredientes do utilizador (string com v√≠rgulas)
    e devolve top_n receitas mais semelhantes pelos ingredientes.
    """
    # normalizar input
    query_text = " ".join(
        [x.strip().lower() for x in user_ingredients.split(",") if x.strip()]
    )
    if not query_text:
        return df_clean.head(0)

    # vetor da query
    query_vec = vectorizer.transform([query_text])
    sims = cosine_similarity(query_vec, X_ingredients)[0]  # array (n_recipes,)

    # √≠ndices ordenados por similaridade (maior ‚Üí menor)
    top_idx = sims.argsort()[::-1][:top_n]
    results = df_clean.iloc[top_idx].copy()
    results["similarity"] = sims[top_idx]
    return results[["recipe_title", "category", "ingredients_text", "similarity"]]

In [12]:
recommend_recipes(
    "tomatoes, onion, garlic, olive oil, pasta, cheese",
    top_n=5
)

Unnamed: 0,recipe_title,category,ingredients_text,similarity
25958,Suki's Spinach and Feta Pasta,Mushrooms,1 (8 ounce) package penne pasta 2 tablespoons ...,0.411703
12650,4-Ingredient Creamy Tomato Soup for Two,Cooking For Two,"1/2 onion, quartered 1 (14.5 ounce) can diced ...",0.404545
23400,4-Ingredient Creamy Tomato Soup for Two,Lunch,"1/2 onion, quartered 1 (14.5 ounce) can diced ...",0.404545
14723,4-Ingredient Creamy Tomato Soup for Two,Dinner,"1/2 onion, quartered 1 (14.5 ounce) can diced ...",0.404545
101,Air Fryer Pasta Chips,Air Fryer Recipes,2 cups farfalle pasta 1 tablespoon olive oil ¬Ω...,0.404277


In [13]:
cols = ["recipe_title", "category", "subcategory", "ingredients", "num_ingredients", "num_steps"]
df_clean[cols].head(5)

Unnamed: 0,recipe_title,category,subcategory,ingredients,num_ingredients,num_steps
0,Air Fryer Potato Slices with Dipping Sauce,Air Fryer Recipes,Air Fryer Recipes,"[""3/4 cup ketchup"", ""1/2 cup beer"", ""1 tablesp...",9,5
1,Air Fryer Corn on The Cob,Air Fryer Recipes,Air Fryer Recipes,"[""\u00bc cup mayonnaise"", ""2 teaspoons crumble...",6,4
2,Air Fryer Crispy Onions,Air Fryer Recipes,Air Fryer Recipes,"[""1/4 cup all-purpose flour"", ""1/2 teaspoon ca...",5,3
3,Air Fryer Hot Honey Glazed Carrots,Air Fryer Recipes,Air Fryer Recipes,"[""nonstick cooking spray"", ""2 tablespoons butt...",4,3
4,Air Fryer Sauteed Onions,Air Fryer Recipes,Air Fryer Recipes,"[""1 large onion"", ""1 1/2 teaspoons olive oil"",...",3,3
