# FAISS

In [None]:
import faiss
import pandas as pd
import numpy as np
import ast

# Load data
pp_recipes = pd.read_csv("data/PP_recipes_updated.csv")
raw_recipes = pd.read_csv("data/RAW_recipes.csv")

# Merge
recipes = pd.merge(pp_recipes, raw_recipes, how='left', left_on='id', right_on='id')

# Drop unncessary columns
recipes.drop(['name_tokens','ingredient_tokens','steps_tokens',
              'techniques','ingredient_ids','contributor_id',
              'submitted','tags','steps','description', 'ingredients'],
              axis=1,
              inplace=True)

# Format
recipes.insert(0, 'name', recipes.pop('name'))
recipes.insert(4, 'n_ingredients', recipes.pop('n_ingredients'))
recipes['ingredient_names'] = recipes['ingredient_names'].apply(ast.literal_eval)

# Step 1: Encode Ingredients
unique_ingredients = sorted(set(ingredient for ingredients in recipes['ingredient_names'] for ingredient in ingredients))
ingredient_to_idx = {ingredient: idx for idx, ingredient in enumerate(unique_ingredients)}

def encode_ingredients(ingredients):
    vector = np.zeros(len(unique_ingredients), dtype='float32')
    for ingredient in ingredients:
        if ingredient in ingredient_to_idx:
            vector[ingredient_to_idx[ingredient]] = 1.0
    return vector

# Step 2: Build FAISS Index
vector_size = len(unique_ingredients)
index = faiss.IndexFlatL2(vector_size)  # L2 (Euclidean) distance metric

vectors = np.vstack([encode_ingredients(ing) for ing in recipes['ingredient_names']])
index.add(vectors)


Recommended Recipes: ['spanish hot chocolate', 'chocolate gelato without ice cream maker', 'spanish thick hot chocolate']


In [26]:
# Step 3: Recommendation Function
def recommend_recipes(user_ingredients, top_n=5):
    user_vector = encode_ingredients(user_ingredients).reshape(1, -1)
    _, indices = index.search(user_vector, top_n)
    return recipes.iloc[indices[0]]['name'].tolist()

In [31]:
# Extract the top 5 recipes into a dictionary
user_inputs = dict(zip(
    recipes['name'].head(5),
    recipes['ingredient_names'].head(5)
))

# Generate recommendations and store in DataFrame
output = pd.DataFrame({
    recipe_name: recommend_recipes(ingredients, top_n=11)
    for recipe_name, ingredients in user_inputs.items()
})

output

Unnamed: 0,aromatic basmati rice rice cooker,pumpkin pie a la easy,cheesy tomato soup with potatoes,mini tacos,rosemary s hanky panky s
0,aromatic basmati rice rice cooker,pumpkin pie a la easy,cheesy tomato soup with potatoes,mini tacos,rosemary s hanky panky s
1,spiced rice with fresh ginger,cross stitch cookies,stoved tatties,crock pot cheese dip,party rye sandwiches
2,basmati chaaval,heavenly apple crisp,fluffy whipped potatoes,quick and lite chicken quesadillas,dipstir crock pot dip
3,persian style steamed rice,1 2 3 apple crisp,libby s scalloped potatoes,seven layer dip,gator bait
4,basmati rice,crock pot apple crisp dessert,easy restaurant style macaroni and cheese,weight watchers mexican chicken breasts,party pizzas
5,cinnamon basmati rice,pecan pie mini muffins,caramelized scandinavian potatoes,nachos in a bowl,crock pot meaty cheese dip
6,coconut rice from zanzibar,apple crumble in a jar,easy baguettes,salsa cakes,hanky pankies
7,fragrant basmati rice with apple juice and ginger,peach crisp any fresh fruit can be substituted,egg roll wrappers,easy layered dip,oklahoma dip
8,boiled rice,janet s apple crisp,native biscuit bread,fiesta chicken,texas cheese dip
9,coconut basmati rice,praline mini muffins,weight watchers cheese fries,working mom s layered nachos,cheese mexi melt
