In [1]:
import pandas as pd
import numpy as np
from collections import Counter
import re
from preprocessing import *
from white_list import *

In [2]:
df_all = load_data("RAW_recipes.csv")
df_food = filter_tags(df_all,["beverages"])
df_food.reset_index(drop=True, inplace=True)

In [3]:
ingredients_white_list = get_white_list(df_food)
ingredients_dict = {string: index for index, string in enumerate(ingredients_white_list)}    #chatgpt


In [6]:
def ingredient_to_int(ingredients_dict, ingredients):
    result = [ingredients_dict.setdefault(i, -1) for i in ingredients]
    result.sort()
    return result

def encode_ingredients_df(df,ingredients_dict):
    df['ingredients_encoded'] = df['ingredients'].apply(lambda x: ingredient_to_int(ingredients_dict, x))
    return df

def encode_ingredients_arr(arr,ingredients_dict):
    arr_encoded = ingredient_to_int(ingredients_dict, arr)
    return arr_encoded

""" def levenstein_dist(arr1,arr2,score):

    if not len(arr1) or not len(arr2):
        # one of them is empty
        return score
    
    if arr1[0] == arr2[0]:
        return levenstein_dist(arr1[1:], arr2[1:], score)
    
    else:
        return min(levenstein_dist(arr1[1:],    arr2,       score+1),
                   levenstein_dist(arr1,        arr2[1:],   score+1),
                   levenstein_dist(arr1[1:],    arr2[1:],   score+1)) """

def levenstein_dist(arr1,arr2,score):

    if not len(arr1) and not len(arr2):
        # both are empty
        return score
    
    if not len(arr1):
        # one of them is empty
        return score+len(arr2)
    
    if not len(arr2):
        # one of them is empty
        return score+len(arr1)
    
    if arr1[0] == arr2[0]:
        return levenstein_dist(arr1[1:], arr2[1:], score)
    
    while True:
        if arr1[0] < arr2[0]:
            return levenstein_dist(arr1[1:], arr2, score+1)
        else:
            return levenstein_dist(arr1, arr2[1:], score+1)
    
def get_recipe_levenstein_dists(df,ingredients):
    df_vecs = df['ingredients_encoded']
    dists = df_vecs.apply(lambda x: levenstein_dist(x, ingredients, 0))
    return dists

def get_recipes_levenstein(df,user_ingredients,ingredients_dict,n):
    
    if not "ingredients_encoded" in df.columns:
        df = encode_ingredients_df(df,ingredients_dict)

    user_ingredients_encoded = encode_ingredients_arr(user_ingredients,ingredients_dict)

    dists = get_recipe_levenstein_dists(df,user_ingredients_encoded)

    top_recipes_ids = dists.sort_values()[:n].index
    #top_recipes_ids = df.iloc[top_recipes.index]["ingredients"].index
    return df.iloc[top_recipes_ids]

In [7]:
user_ingredients = np.array(['garlic', 'onion', 'onions', 'tomatoes'])
df_food = encode_ingredients_df(df_food,ingredients_dict)

top_recipes = get_recipes_levenstein(df_food, user_ingredients, ingredients_dict, 10)

In [8]:
top_recipes

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,ingredients_encoded
22171,big batch cooked ground beef,94535,35,1533,2004-06-28,"[60-minutes-or-less, time-to-make, course, mai...","[410.4, 41.0, 7.0, 5.0, 68.0, 53.0, 1.0]",9,"['cook ground beef in stockpot or dutch oven ,...",freeze in 1 to 2 cup portion containers or in ...,"[ground beef, onions, garlic]",3,"[2212, 2412, 3498]"
178312,simple roasted tomato and garlic sauce,179148,60,314719,2006-07-25,"[60-minutes-or-less, time-to-make, course, mai...","[43.4, 1.0, 13.0, 0.0, 3.0, 0.0, 2.0]",12,"['slice tomatoes in half', 'slice top 1 / 2 in...",so simple and good for you! perfect sauce for...,"[tomatoes, garlic, olive oil]",3,"[2212, 3483, 5002]"
21551,best marinara sauce,10878,10,10175,2001-08-11,"[15-minutes-or-less, time-to-make, course, mai...","[78.5, 5.0, 25.0, 0.0, 4.0, 2.0, 3.0]",6,"['saute onion in oil until translucent', 'add ...",so many people have posted crappy spaghetti sa...,"[tomatoes, onion, garlic, fresh basil leaf, ex...",5,"[1742, 1917, 2212, 3486, 5002]"
74318,easy roasted garlic salsa fresca,42217,15,56645,2002-10-03,"[15-minutes-or-less, time-to-make, course, mai...","[65.7, 0.0, 28.0, 7.0, 5.0, 0.0, 4.0]",8,"['place garlic cloves in a small , heavy botto...",this is adapted from rick bayless's mexican co...,"[tomatoes, onion, garlic, cilantro, salt]",5,"[1040, 2212, 3486, 4324, 5002]"
1190,3 ingredient fabulous salsa delicious fres...,468765,45,2076275,2011-11-22,"[60-minutes-or-less, time-to-make, course, cui...","[38.1, 0.0, 13.0, 0.0, 3.0, 0.0, 2.0]",9,"['core the tomatoes', 'dig the cloves from the...","born and raised in texas, i am a snob when it ...","[tomatoes, garlic, jalapenos]",3,"[2212, 2776, 5002]"
157363,potsie s famous fried bolony sandwich,483916,6,183964,2012-07-20,"[15-minutes-or-less, time-to-make, course, pre...","[152.2, 3.0, 13.0, 17.0, 9.0, 2.0, 9.0]",4,"[""it ain't rocket science or quantum physics"",...",i was raised on fried bolony sandwiches. i rem...,"[white bread, yellow mustard, onion, tomatoes]",4,"[3486, 5002, 5290, 5457]"
104525,how to peel a head of garlic in less than 10 s...,475026,1,527607,2012-02-26,"[15-minutes-or-less, time-to-make, course, pre...","[89.4, 0.0, 2.0, 0.0, 7.0, 0.0, 6.0]",4,['smash the head of garlic with the heel of yo...,here is an unbelievable way to peel a whole he...,"[garlic, water]",2,"[2212, 5253]"
141109,onion stuffed hamburgers,301148,30,592575,2008-04-29,"[30-minutes-or-less, time-to-make, course, mai...","[340.5, 34.0, 6.0, 4.0, 56.0, 44.0, 1.0]",5,"['split the burger into 8 parts', 'make into b...",hamburger pattie surrounding a slab of uncooke...,"[ground beef, onions]",2,"[2412, 3498]"
112020,kikkoman chops,78531,25,66919,2003-12-10,"[30-minutes-or-less, time-to-make, course, mai...","[255.3, 21.0, 5.0, 77.0, 52.0, 24.0, 1.0]",6,"['in a large bowl , place the chops in and add...",this is another quick and easy recipe to make....,"[pork chops, garlic, onion, kikkoman soy sauce]",4,"[2212, 2835, 3486, 3832]"
186193,spicy cheese dip microwave,391223,22,383346,2009-09-21,"[30-minutes-or-less, time-to-make, course, mai...","[791.5, 84.0, 96.0, 172.0, 84.0, 180.0, 10.0]",4,"['mix cheese , tomato and onion in a bowl', 'a...",this is from qu'est-ce qu'on mange volume 3. ...,"[velveeta cheese, tomatoes, onion, salsa]",4,"[3486, 4321, 5002, 5221]"
