# FAISS

In [1]:
import faiss
import pandas as pd
import numpy as np
import ast

# Load data
pp_recipes = pd.read_csv("../../data/PP_recipes.csv")
raw_recipes = pd.read_csv("../../data/RAW_recipes.csv")

# Merge
recipes = pd.merge(pp_recipes, raw_recipes, how='left', left_on='id', right_on='id')

# Drop unncessary columns
recipes.drop(['name_tokens','ingredient_tokens','steps_tokens',
              'techniques','ingredient_ids','contributor_id',
              'submitted','tags','steps','description', 'ingredients'],
              axis=1,
              inplace=True)

# Format
recipes.insert(0, 'name', recipes.pop('name'))
recipes.insert(4, 'n_ingredients', recipes.pop('n_ingredients'))
recipes['ingredient_names'] = recipes['ingredient_names'].apply(ast.literal_eval)

# Initialize lists
calories = []
total_fat = []
sugar = []
sodium = []
protein = []
saturated_fat = []
carbs = []

# Append nutrition values
def get_nutrition(recipe):
    calories.append(recipe[0])
    total_fat.append(recipe[1])
    sugar.append(recipe[2])
    sodium.append(recipe[3])
    protein.append(recipe[4])
    saturated_fat.append(recipe[5])
    carbs.append(recipe[6])

# Get nutrition for each recipe
for index, row in recipes.iterrows():
    get_nutrition(ast.literal_eval(row['nutrition']))

# Convert into pandas columns
recipes['calories (#)'] = calories
recipes['total_fat (%DV)'] = total_fat
recipes['sugar (%DV)'] = sugar
recipes['sodium (%DV)'] = sodium
recipes['protein (%DV)'] = protein
recipes['saturated_fat (%DV)'] = saturated_fat
recipes['carbs (%DV)'] = carbs

# Drop nutrition
recipes.drop('nutrition',axis=1,inplace=True)

In [2]:
def encode_ingredients(ingredients, ingredient_to_idx, unique_ingredients):
    """
    Generate vector encodings for ingredients-to-recipes.

    Args:
    - ingredients (list): A list of ingredients in the recipe.
    - ingredient_to_idx (dict): A dict mapping ingredients to their respective ids.
    - unique_ingredients (list): A list of unique ingredients.

    Returns:
    - vector (np.array): An encoding of which ingredients are in the recipe w.r.t to all available ingredients.
    """
    vector = np.zeros(len(unique_ingredients), dtype='float32')
    for ingredient in ingredients:
        if ingredient in ingredient_to_idx:
            vector[ingredient_to_idx[ingredient]] = 1.0
    return vector

# Get unique ingredients
unique_ingredients = sorted(set(ingredient for ingredients in recipes['ingredient_names'] for ingredient in ingredients))
ingredient_to_idx = {ingredient: idx for idx, ingredient in enumerate(unique_ingredients)}

# BUILD: Initialize index for FAISS
vector_size = len(unique_ingredients)
index = faiss.IndexFlatL2(vector_size)
vectors = np.vstack([encode_ingredients(ing, ingredient_to_idx, unique_ingredients) for ing in recipes['ingredient_names']])
index.add(vectors)

In [7]:
def recommend_recipes(user_ingredients, allergens=[''], calories=None, total_fat=None, sugar=None,
                      sodium=None, protein=None, saturated_fat=None, carbs=None, top_n=5):
    """
    Gives a list of top_n recommended recipes based on the given user_ingredients.

    Args:
    - user_ingredients (list): A list of ingredients provided by the user.
    - allergens (list): A list of allergens from the user.
                        Recipes that contain ingredients in allergens will not be recommended.
    - calories (float): A user-specified calorie constraint.
    - total_fat (float): A user-specified total_fat constraint.
    - sugar (float): A user-specified sugar constraint.
    - sodium (float): A user-specified sodium constraint.
    - protein (float): A user-specified protein constraint.
    - saturated_fat (float): A user-specified saturated_fat constraint.
    - carbs (float): A user-specified carbs constraint.
    - top_n (int): The number of recommended recipes to return.

    Returns:
    - list: A list of recommended recipes.
    """
    # FILTER: Filter out index of filtered recipes
    filtered_recipes = recipes[~recipes['ingredient_names'].apply(lambda ingredients: any(item in allergens for item in ingredients))]
    filtered_recipes = nutrition_filter(filtered_recipes, calories, total_fat, sugar, sodium, protein, saturated_fat, carbs)
    filtered_ids = filtered_recipes.index
    filtered_ids  = [id_ for id_ in range(index.ntotal) if id_ in filtered_ids]
    id_selector =faiss.IDSelectorArray(filtered_ids)

    # SEARCH
    user_vector = encode_ingredients(user_ingredients, ingredient_to_idx, unique_ingredients).reshape(1,-1)
    _, filtered_indices = index.search(user_vector, k=top_n, params=faiss.SearchParametersIVF(sel=id_selector))
    return recipes.iloc[filtered_indices[0]]['name'].tolist()

def nutrition_filter(recipes_df, calories, total_fat, sugar, sodium, protein, saturated_fat, carbs):
    """
    Filter recipes based on user-specified nutritional constraints.

    Args:
    - recipes_df (DataFrame): The dataframe containing all relevant recipes.
    - calories (float): A user-specified calorie constraint.
    - total_fat (float): A user-specified total_fat constraint.
    - sugar (float): A user-specified sugar constraint.
    - sodium (float): A user-specified sodium constraint.
    - protein (float): A user-specified protein constraint.
    - saturated_fat (float): A user-specified saturated_fat constraint.
    - carbs (float): A user-specified carbs constraint.

    Returns:
    - DataFrame: A dataframe that only contains recipes within the specified constraint.
    """

    # Calculate the min-max ranges for each nutritional component
    calorie_min, calorie_max = get_min_max_calories(calories)
    total_fat_min, total_fat_max = get_min_max(total_fat)
    sugar_min, sugar_max = get_min_max(sugar)
    sodium_min, sodium_max = get_min_max(sodium)
    protein_min, protein_max = get_min_max(protein)
    saturated_fat_min, saturated_fat_max = get_min_max(saturated_fat)
    carbs_min, carbs_max = get_min_max(carbs)

    # Filter recipes within the specified range for any nutritional component
    filtered_recipes = recipes_df[
        (recipes_df['calories (#)'] > calorie_min) & (recipes_df['calories (#)'] < calorie_max) &
        (recipes_df['total_fat (%DV)'] > total_fat_min) & (recipes_df['total_fat (%DV)'] < total_fat_max) &
        (recipes_df['sugar (%DV)'] > sugar_min) & (recipes_df['sugar (%DV)'] < sugar_max) &
        (recipes_df['sodium (%DV)'] > sodium_min) & (recipes_df['sodium (%DV)'] < sodium_max) &
        (recipes_df['protein (%DV)'] > protein_min) & (recipes_df['protein (%DV)'] < protein_max) &
        (recipes_df['saturated_fat (%DV)'] > saturated_fat_min) & (recipes_df['saturated_fat (%DV)'] < saturated_fat_max) &
        (recipes_df['carbs (%DV)'] > carbs_min) & (recipes_df['carbs (%DV)'] < carbs_max)
    ]
    return filtered_recipes

def get_min_max_calories(value):
    """
    Calculate the minimum and maximum calorie range.

    This function returns a range of values within ±10% of the given value.
    If the input value is None, it defaults to the range (0, 10000).

    Args:
    - value (float or None): The calorie value to calculate the range for.

    Returns:
    - tuple: A tuple containing the minimum and maximum values.
             If value is not None, returns (value * 0.9, value * 1.1).
             Otherwise, returns (0, 10000).
    """
    return (value * 0.9, value * 1.1) if value is not None else (0, 10000)

def get_min_max(value):
    """
    Calculate the minimum and maximum nutritional range.

    This function returns a range of values within ±50% of the given value.
    If the input value is None, it defaults to the range (0, 10000).

    Args:
    - value (float or None): The nutritional value to calculate the range for.

    Returns:
    - tuple: A tuple containing the minimum and maximum values.
             If value is not None, returns (value * 0.9, value * 1.1).
             Otherwise, returns (0, 10000).
    """
    return (value * 0.5, value * 1.5) if value is not None else (0, 10000)

In [9]:
# Example recommendation
recs = recommend_recipes(user_ingredients=['basmati rice', 'water', 'salt', 'cinnamon stick', 'green cardamom pod'],
                         allergens=['taro','raw cashew'],
                         calories=225,  # range = (202.5, 247.5)
                         sugar=2,
                         top_n=11)

recipes[recipes['name'].isin(recs)]

Unnamed: 0,name,id,technique_names,calorie_level,n_ingredients,ingredient_names,minutes,n_steps,calories (#),total_fat (%DV),sugar (%DV),sodium (%DV),protein (%DV),saturated_fat (%DV),carbs (%DV)
0,aromatic basmati rice rice cooker,424415,"['combine', 'drain', 'strain']",0,5,"[basmati rice, water, salt, cinnamon stick, gr...",61,6,228.2,2.0,2.0,8.0,9.0,1.0,15.0
10960,steamin mussels,482033,"['boil', 'combine']",0,5,"[chicken broth, dry white wine, salt, pepper, ...",20,5,235.5,9.0,2.0,78.0,60.0,6.0,3.0
24560,browned rice,18445,"['bake', 'melt']",0,7,"[butter, long grain rice, salt, pepper, consom...",80,6,244.5,19.0,2.0,28.0,12.0,26.0,9.0
28433,fried barley,321152,"['fry', 'melt']",0,5,"[butter, onion, cooked barley, salt, pepper]",35,3,206.0,16.0,2.0,11.0,4.0,32.0,8.0
29214,basmati chaaval,6586,"['boil', 'combine', 'drain', 'pour', 'soak']",0,4,"[basmati rice, salt, unsalted butter, water]",0,10,245.1,5.0,2.0,12.0,9.0,7.0,15.0
65731,nuked basmati rice,156190,"['drain', 'microwave']",0,7,"[basmati rice, cumin seed, ground coriander, s...",23,10,230.4,2.0,2.0,8.0,10.0,1.0,15.0
74166,whole roasted garlic with goat cheese,80111,"['bake', 'broil', 'pour']",0,5,"[garlic head, olive oil, salt, fresh ground bl...",70,10,215.7,27.0,2.0,3.0,9.0,24.0,3.0
76987,perfect oven baked brown rice,407156,"['boil', 'steam']",0,4,"[brown rice, water, salt, vegetable oil]",70,13,244.1,5.0,2.0,10.0,9.0,2.0,15.0
124467,hungarian cream cheese spread,201751,[],0,4,"[cream cheese, hungarian paprika, green pepper...",5,5,205.8,30.0,2.0,7.0,9.0,62.0,1.0
125663,crunchy nut pie pastry,146594,"['blend', 'combine']",0,5,"[nut, salt, unbleached flour, butter, ice water]",10,6,230.5,21.0,2.0,6.0,9.0,28.0,7.0


In [6]:
# Extract the top 5 recipes into a dictionary
user_inputs = dict(zip(
    recipes['name'].head(5),
    recipes['ingredient_names'].head(5)
))

# Generate recommendations and store in DataFrame
output = pd.DataFrame({
    recipe_name: recommend_recipes(ingredients, top_n=11)
    for recipe_name, ingredients in user_inputs.items()
})

output

Unnamed: 0,aromatic basmati rice rice cooker,pumpkin pie a la easy,cheesy tomato soup with potatoes,mini tacos,rosemary s hanky panky s
0,aromatic basmati rice rice cooker,pumpkin pie a la easy,cheesy tomato soup with potatoes,mini tacos,crock pot meaty cheese dip
1,spiced rice with fresh ginger,cross stitch cookies,fluffy whipped potatoes,crock pot cheese dip,hanky pankies
2,basmati chaaval,heavenly apple crisp,easy restaurant style macaroni and cheese,quick and lite chicken quesadillas,oklahoma dip
3,persian style steamed rice,1 2 3 apple crisp,easy baguettes,seven layer dip,texas cheese dip
4,basmati rice,crock pot apple crisp dessert,native biscuit bread,weight watchers mexican chicken breasts,yummy queso
5,coconut rice from zanzibar,pecan pie mini muffins,weight watchers cheese fries,nachos in a bowl,cheesy breakfast casserole
6,fragrant basmati rice with apple juice and ginger,peach crisp any fresh fruit can be substituted,rick s mashed potatoes,working mom s layered nachos,college girl s best queso ever
7,coconut basmati rice,janet s apple crisp,whitley goose,salsa wraps stolen from sharon,sausage velveeta slow cooker dip
8,baked boston peanuts,praline mini muffins,garlic potatoes gspeed,incredible and easy taco dip,velveeta rye party squares
9,easy baguettes,apple filled crepes,smoky cheese bites,david s taco salad,party rye sludge


## Evaluation

### Mean Average Ingrement Matching (MAIM)

In [None]:
def ingredient_similarity_score(user_ingredients, recommended_recipe_ingredients):
    common_items = set(user_ingredients) & set(recommended_recipe_ingredients)
    total_unique_items = set(user_ingredients) | set(recommended_recipe_ingredients)
    ratio = len(common_items) / len(total_unique_items)
    return ratio

def average_ingredient_similarity(recipe_example):
    # Get list of unique recommended_recipes
    the_list = list(set(output[recipe_example]))

    # Filter recipes based on the_list
    filtered_recipes = recipes[recipes['name'].isin(the_list)].sort_values(
        by='name',
        key=lambda x: pd.Categorical(x, categories=the_list, ordered=True)
    ).reset_index(drop=True)[['name','ingredient_names']]

    # Get average sim score
    average_sim_score = 0
    for index, row in filtered_recipes.iterrows():
        if index!= 0:
            average_sim_score += ingredient_similarity_score(recipes.iloc[0]['ingredient_names'], row['ingredient_names'])

    return average_sim_score / (len(filtered_recipes)-1)

# Calculation
mean_average_sim_score = 0
for col in output.columns:
    mean_average_sim_score += average_ingredient_similarity(col)
print("Mean average similarity score is:")
mean_average_sim_score / len(output.columns)


In [None]:
def mean_reciprocal_rank(y_true, y_pred):
    """
    Calculate Mean Reciprocal Rank (MRR)

    Parameters:
    y_true (list of lists): Ground truth relevant items per user
    y_pred (list of lists): Predicted ranked items per user

    Returns:
    float: MRR score
    """
    reciprocal_ranks = []
    for true_items, pred_items in zip(y_true, y_pred):
        rank = 0
        for idx, item in enumerate(pred_items, start=1):
            if item in true_items:
                rank = idx
                break
        reciprocal_ranks.append(1 / rank if rank > 0 else 0)
    return np.mean(reciprocal_ranks)


def mean_average_precision(y_true, y_pred, k=10):
    """
    Calculate Mean Average Precision (MAP) at K

    Parameters:
    y_true (list of lists): Ground truth relevant items per user
    y_pred (list of lists): Predicted ranked items per user
    k (int): Cutoff for precision calculation

    Returns:
    float: MAP score
    """
    average_precisions = []
    for true_items, pred_items in zip(y_true, y_pred):
        hits = 0
        sum_precisions = 0
        for i, item in enumerate(pred_items[:k], start=1):
            if item in true_items:
                hits += 1
                sum_precisions += hits / i
        if hits > 0:
            average_precisions.append(sum_precisions / hits)
        else:
            average_precisions.append(0)
    return np.mean(average_precisions)


# Example usage
y_true = [[1, 3], [2], [4, 5]]  # Ground truth relevant items
y_pred = [[3, 2, 1], [1, 2, 3], [5, 4, 6]]  # Predicted ranked items

mrr_score = mean_reciprocal_rank(y_true, y_pred)
map_score = mean_average_precision(y_true, y_pred, k=3)

print(f"Mean Reciprocal Rank (MRR): {mrr_score:.4f}")
print(f"Mean Average Precision (MAP)@3: {map_score:.4f}")
