# kNN

In [1]:
# Imports
import pandas as pd
import numpy as np
import ast
from sklearn.neighbors import NearestNeighbors

## Load Data

In [2]:
# Load data
pp_recipes = pd.read_csv("data/PP_recipes_updated.csv")
raw_recipes = pd.read_csv("data/RAW_recipes.csv")

# Merge
recipes = pd.merge(pp_recipes, raw_recipes, how='left', left_on='id', right_on='id')

# Drop unncessary columns
recipes.drop(['name_tokens','ingredient_tokens','steps_tokens',
              'techniques','ingredient_ids','contributor_id',
              'submitted','tags','steps','description', 'ingredients'],
              axis=1,
              inplace=True)

# Format
recipes.insert(0, 'name', recipes.pop('name'))
recipes.insert(4, 'n_ingredients', recipes.pop('n_ingredients'))

# Display
recipes

Unnamed: 0,name,id,technique_names,calorie_level,n_ingredients,ingredient_names,minutes,nutrition,n_steps
0,aromatic basmati rice rice cooker,424415,"['combine', 'drain', 'strain']",0,5,"['basmati rice', 'water', 'salt', 'cinnamon st...",61,"[228.2, 2.0, 2.0, 8.0, 9.0, 1.0, 15.0]",6
1,pumpkin pie a la easy,146223,"['bake', 'combine', 'melt', 'pour', 'refrigera...",0,12,"['flmy', 'oat', 'brown sugar', 'pecan', 'butte...",55,"[249.4, 16.0, 92.0, 8.0, 11.0, 27.0, 11.0]",10
2,cheesy tomato soup with potatoes,312329,"['boil', 'crush', 'melt', 'pour', 'simmer']",1,15,"['chicken broth', 'water', 'salt', 'black pepp...",25,"[351.3, 34.0, 15.0, 50.0, 25.0, 70.0, 8.0]",6
3,mini tacos,74301,"['bake', 'drain', 'simmer']",0,8,"['wonton wrapper', 'hamburger', 'taco seasonin...",15,"[79.7, 5.0, 2.0, 11.0, 11.0, 7.0, 2.0]",8
4,rosemary s hanky panky s,76272,"['combine', 'drain', 'fry']",0,4,"['ground beef', 'sausage', 'velveeta cheese', ...",20,"[240.7, 29.0, 9.0, 28.0, 27.0, 42.0, 0.0]",5
...,...,...,...,...,...,...,...,...,...
178260,sugar free snickerdoodles,323143,"['bake', 'smooth']",1,7,"['butter', 'artificial sweetener', 'egg', 'flm...",23,"[304.1, 30.0, 0.0, 11.0, 10.0, 60.0, 8.0]",6
178261,sausage pancake strata,149114,"['bake', 'pour']",0,7,"['egg', 'milk', 'cheddar', 'salt', 'sausage', ...",70,"[235.9, 26.0, 3.0, 19.0, 35.0, 37.0, 0.0]",14
178262,baked beef patties,34200,"['bake', 'combine', 'pour', 'skillet']",2,14,"['dried thyme', 'water', 'garlic salt', 'dried...",55,"[577.5, 51.0, 26.0, 38.0, 84.0, 83.0, 8.0]",15
178263,good and garlicky sweet and sour pork,30618,"['boil', 'combine', 'drain', 'fry', 'simmer', ...",0,12,"['pork tenderloin', 'fresh garlic', 'pineapple...",40,"[240.1, 5.0, 96.0, 12.0, 41.0, 5.0, 10.0]",13


## Convert nutrition into individual columns

In [3]:
# Initialize lists
calories = []
total_fat = []
sugar = []
sodium = []
protein = []
saturated_fat = []
carbs = []

# Append nutrition values
def get_nutrition(recipe):
    calories.append(recipe[0])
    total_fat.append(recipe[1])
    sugar.append(recipe[2])
    sodium.append(recipe[3])
    protein.append(recipe[4])
    saturated_fat.append(recipe[5])
    carbs.append(recipe[6])

# Get nutrition for each recipe
for index, row in recipes.iterrows():
    get_nutrition(ast.literal_eval(row['nutrition']))

# Convert into pandas columns
recipes['calories (#)'] = calories
recipes['total_fat (%DV)'] = total_fat
recipes['sugar (%DV)'] = sugar
recipes['sodium (%DV)'] = sodium
recipes['protein (%DV)'] = protein
recipes['saturated_fat (%DV)'] = saturated_fat
recipes['carbs (%DV)'] = carbs

# Drop nutrition
recipes.drop('nutrition',axis=1,inplace=True)

# Display
recipes

Unnamed: 0,name,id,technique_names,calorie_level,n_ingredients,ingredient_names,minutes,n_steps,calories (#),total_fat (%DV),sugar (%DV),sodium (%DV),protein (%DV),saturated_fat (%DV),carbs (%DV)
0,aromatic basmati rice rice cooker,424415,"['combine', 'drain', 'strain']",0,5,"['basmati rice', 'water', 'salt', 'cinnamon st...",61,6,228.2,2.0,2.0,8.0,9.0,1.0,15.0
1,pumpkin pie a la easy,146223,"['bake', 'combine', 'melt', 'pour', 'refrigera...",0,12,"['flmy', 'oat', 'brown sugar', 'pecan', 'butte...",55,10,249.4,16.0,92.0,8.0,11.0,27.0,11.0
2,cheesy tomato soup with potatoes,312329,"['boil', 'crush', 'melt', 'pour', 'simmer']",1,15,"['chicken broth', 'water', 'salt', 'black pepp...",25,6,351.3,34.0,15.0,50.0,25.0,70.0,8.0
3,mini tacos,74301,"['bake', 'drain', 'simmer']",0,8,"['wonton wrapper', 'hamburger', 'taco seasonin...",15,8,79.7,5.0,2.0,11.0,11.0,7.0,2.0
4,rosemary s hanky panky s,76272,"['combine', 'drain', 'fry']",0,4,"['ground beef', 'sausage', 'velveeta cheese', ...",20,5,240.7,29.0,9.0,28.0,27.0,42.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178260,sugar free snickerdoodles,323143,"['bake', 'smooth']",1,7,"['butter', 'artificial sweetener', 'egg', 'flm...",23,6,304.1,30.0,0.0,11.0,10.0,60.0,8.0
178261,sausage pancake strata,149114,"['bake', 'pour']",0,7,"['egg', 'milk', 'cheddar', 'salt', 'sausage', ...",70,14,235.9,26.0,3.0,19.0,35.0,37.0,0.0
178262,baked beef patties,34200,"['bake', 'combine', 'pour', 'skillet']",2,14,"['dried thyme', 'water', 'garlic salt', 'dried...",55,15,577.5,51.0,26.0,38.0,84.0,83.0,8.0
178263,good and garlicky sweet and sour pork,30618,"['boil', 'combine', 'drain', 'fry', 'simmer', ...",0,12,"['pork tenderloin', 'fresh garlic', 'pineapple...",40,13,240.1,5.0,96.0,12.0,41.0,5.0,10.0


## Convert techniques into dummy variables

In [4]:
# Convert to lists
recipes['technique_names'] = recipes['technique_names'].apply(ast.literal_eval)

# Extract unique values
unique_values = set(item for sublist in recipes['technique_names'] for item in sublist)
unique_list = list(unique_values)

# Perform one-hot encoding
for category in unique_list:
    recipes[category] = recipes['technique_names'].apply(lambda x: 1 if category in x else 0)

# Display
recipes

Unnamed: 0,name,id,technique_names,calorie_level,n_ingredients,ingredient_names,minutes,n_steps,calories (#),total_fat (%DV),...,pour,slow cook,parboil,distill,thicken,shred,fry,devein,ferment,broil
0,aromatic basmati rice rice cooker,424415,"[combine, drain, strain]",0,5,"['basmati rice', 'water', 'salt', 'cinnamon st...",61,6,228.2,2.0,...,0,0,0,0,0,0,0,0,0,0
1,pumpkin pie a la easy,146223,"[bake, combine, melt, pour, refrigerate, smoot...",0,12,"['flmy', 'oat', 'brown sugar', 'pecan', 'butte...",55,10,249.4,16.0,...,1,0,0,0,0,0,0,0,0,0
2,cheesy tomato soup with potatoes,312329,"[boil, crush, melt, pour, simmer]",1,15,"['chicken broth', 'water', 'salt', 'black pepp...",25,6,351.3,34.0,...,1,0,0,0,0,0,0,0,0,0
3,mini tacos,74301,"[bake, drain, simmer]",0,8,"['wonton wrapper', 'hamburger', 'taco seasonin...",15,8,79.7,5.0,...,0,0,0,0,0,0,0,0,0,0
4,rosemary s hanky panky s,76272,"[combine, drain, fry]",0,4,"['ground beef', 'sausage', 'velveeta cheese', ...",20,5,240.7,29.0,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178260,sugar free snickerdoodles,323143,"[bake, smooth]",1,7,"['butter', 'artificial sweetener', 'egg', 'flm...",23,6,304.1,30.0,...,0,0,0,0,0,0,0,0,0,0
178261,sausage pancake strata,149114,"[bake, pour]",0,7,"['egg', 'milk', 'cheddar', 'salt', 'sausage', ...",70,14,235.9,26.0,...,1,0,0,0,0,0,0,0,0,0
178262,baked beef patties,34200,"[bake, combine, pour, skillet]",2,14,"['dried thyme', 'water', 'garlic salt', 'dried...",55,15,577.5,51.0,...,1,0,0,0,0,0,0,0,0,0
178263,good and garlicky sweet and sour pork,30618,"[boil, combine, drain, fry, simmer, skillet, t...",0,12,"['pork tenderloin', 'fresh garlic', 'pineapple...",40,13,240.1,5.0,...,0,0,0,0,1,0,1,0,0,0


## Model Building

In [44]:
# Drop columns
X = recipes.drop(['name', 'id', 'technique_names', 'ingredient_names'],axis=1)
y = recipes['id']

# Fit model
knn = NearestNeighbors(n_neighbors=10)
knn.fit(X, y)

# Create user input
user_input = np.array([[0,5,20,10,500,20,10,5,10,25,30,10,
                       1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                       0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
                       0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])

# Apply knn
distances, indices = knn.kneighbors(user_input)

# Get list of recommended recipe ids
recommended_recipes_ids = list(X.iloc[indices[0]].index)

# Get recommended recipes
recommended_recipes = recipes[recipes['id'].isin(recommended_recipes_ids)]

# Display
recommended_recipes



Unnamed: 0,name,id,technique_names,calorie_level,n_ingredients,ingredient_names,minutes,n_steps,calories (#),total_fat (%DV),...,pour,slow cook,parboil,distill,thicken,shred,fry,devein,ferment,broil
8987,foul mud a la alfanoose,133315,"[combine, drain, marinate, refrigerate]",1,10,"['fava bean', 'chickpea', 'tomato', 'red onion...",20,4,308.2,8.0,...,0,0,0,0,0,0,0,0,0,0
37217,poppy seed biscotti,78101,"[bake, combine]",0,10,"['flmy', 'white sugar', 'almond', 'baking powd...",50,8,92.7,3.0,...,0,0,0,0,0,0,0,0,0,0
38694,spicy shrimp avocado salad,177198,"[blend, boil, dice]",2,12,"['firm tomato', 'tomatillo', 'jalapeno pepper'...",25,11,758.4,57.0,...,0,0,0,0,0,0,0,0,0,0
132921,end of the week sausage casserole,48377,"[combine, drain, pour, skillet]",2,9,"['sausage', 'onion', 'green pepper', 'zucchini...",50,7,755.5,39.0,...,1,0,0,0,0,0,0,0,0,0
171398,smoked egg dip spread,33314,"[blend, combine, smooth]",2,10,"['hard-boiled egg', 'mayonnaise', 'liquid smok...",10,5,579.4,67.0,...,0,0,0,0,0,0,0,0,0,0
