# kNN

In [1]:
# Imports
import pandas as pd
import numpy as np
import ast
from sklearn.neighbors import NearestNeighbors
from sklearn.discriminant_analysis import StandardScaler

## Load Data

In [2]:
# Load data
pp_recipes = pd.read_csv("data/PP_recipes_updated.csv")
raw_recipes = pd.read_csv("data/RAW_recipes.csv")

# Merge
recipes = pd.merge(pp_recipes, raw_recipes, how='left', left_on='id', right_on='id')

# Drop unncessary columns
recipes.drop(['name_tokens','ingredient_tokens','steps_tokens',
              'techniques','ingredient_ids','contributor_id',
              'submitted','tags','steps','description', 'ingredients'],
              axis=1,
              inplace=True)

# Format
recipes.insert(0, 'name', recipes.pop('name'))
recipes.insert(4, 'n_ingredients', recipes.pop('n_ingredients'))

# Display
recipes

Unnamed: 0,name,id,technique_names,calorie_level,n_ingredients,ingredient_names,minutes,nutrition,n_steps
0,aromatic basmati rice rice cooker,424415,"['combine', 'drain', 'strain']",0,5,"['basmati rice', 'water', 'salt', 'cinnamon st...",61,"[228.2, 2.0, 2.0, 8.0, 9.0, 1.0, 15.0]",6
1,pumpkin pie a la easy,146223,"['bake', 'combine', 'melt', 'pour', 'refrigera...",0,12,"['flmy', 'oat', 'brown sugar', 'pecan', 'butte...",55,"[249.4, 16.0, 92.0, 8.0, 11.0, 27.0, 11.0]",10
2,cheesy tomato soup with potatoes,312329,"['boil', 'crush', 'melt', 'pour', 'simmer']",1,15,"['chicken broth', 'water', 'salt', 'black pepp...",25,"[351.3, 34.0, 15.0, 50.0, 25.0, 70.0, 8.0]",6
3,mini tacos,74301,"['bake', 'drain', 'simmer']",0,8,"['wonton wrapper', 'hamburger', 'taco seasonin...",15,"[79.7, 5.0, 2.0, 11.0, 11.0, 7.0, 2.0]",8
4,rosemary s hanky panky s,76272,"['combine', 'drain', 'fry']",0,4,"['ground beef', 'sausage', 'velveeta cheese', ...",20,"[240.7, 29.0, 9.0, 28.0, 27.0, 42.0, 0.0]",5
...,...,...,...,...,...,...,...,...,...
178260,sugar free snickerdoodles,323143,"['bake', 'smooth']",1,7,"['butter', 'artificial sweetener', 'egg', 'flm...",23,"[304.1, 30.0, 0.0, 11.0, 10.0, 60.0, 8.0]",6
178261,sausage pancake strata,149114,"['bake', 'pour']",0,7,"['egg', 'milk', 'cheddar', 'salt', 'sausage', ...",70,"[235.9, 26.0, 3.0, 19.0, 35.0, 37.0, 0.0]",14
178262,baked beef patties,34200,"['bake', 'combine', 'pour', 'skillet']",2,14,"['dried thyme', 'water', 'garlic salt', 'dried...",55,"[577.5, 51.0, 26.0, 38.0, 84.0, 83.0, 8.0]",15
178263,good and garlicky sweet and sour pork,30618,"['boil', 'combine', 'drain', 'fry', 'simmer', ...",0,12,"['pork tenderloin', 'fresh garlic', 'pineapple...",40,"[240.1, 5.0, 96.0, 12.0, 41.0, 5.0, 10.0]",13


## Convert nutrition into individual columns

In [3]:
# Initialize lists
calories = []
total_fat = []
sugar = []
sodium = []
protein = []
saturated_fat = []
carbs = []

# Append nutrition values
def get_nutrition(recipe):
    calories.append(recipe[0])
    total_fat.append(recipe[1])
    sugar.append(recipe[2])
    sodium.append(recipe[3])
    protein.append(recipe[4])
    saturated_fat.append(recipe[5])
    carbs.append(recipe[6])

# Get nutrition for each recipe
for index, row in recipes.iterrows():
    get_nutrition(ast.literal_eval(row['nutrition']))

# Convert into pandas columns
recipes['calories (#)'] = calories
recipes['total_fat (%DV)'] = total_fat
recipes['sugar (%DV)'] = sugar
recipes['sodium (%DV)'] = sodium
recipes['protein (%DV)'] = protein
recipes['saturated_fat (%DV)'] = saturated_fat
recipes['carbs (%DV)'] = carbs

# Drop nutrition
recipes.drop('nutrition',axis=1,inplace=True)

# Display
recipes

Unnamed: 0,name,id,technique_names,calorie_level,n_ingredients,ingredient_names,minutes,n_steps,calories (#),total_fat (%DV),sugar (%DV),sodium (%DV),protein (%DV),saturated_fat (%DV),carbs (%DV)
0,aromatic basmati rice rice cooker,424415,"['combine', 'drain', 'strain']",0,5,"['basmati rice', 'water', 'salt', 'cinnamon st...",61,6,228.2,2.0,2.0,8.0,9.0,1.0,15.0
1,pumpkin pie a la easy,146223,"['bake', 'combine', 'melt', 'pour', 'refrigera...",0,12,"['flmy', 'oat', 'brown sugar', 'pecan', 'butte...",55,10,249.4,16.0,92.0,8.0,11.0,27.0,11.0
2,cheesy tomato soup with potatoes,312329,"['boil', 'crush', 'melt', 'pour', 'simmer']",1,15,"['chicken broth', 'water', 'salt', 'black pepp...",25,6,351.3,34.0,15.0,50.0,25.0,70.0,8.0
3,mini tacos,74301,"['bake', 'drain', 'simmer']",0,8,"['wonton wrapper', 'hamburger', 'taco seasonin...",15,8,79.7,5.0,2.0,11.0,11.0,7.0,2.0
4,rosemary s hanky panky s,76272,"['combine', 'drain', 'fry']",0,4,"['ground beef', 'sausage', 'velveeta cheese', ...",20,5,240.7,29.0,9.0,28.0,27.0,42.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178260,sugar free snickerdoodles,323143,"['bake', 'smooth']",1,7,"['butter', 'artificial sweetener', 'egg', 'flm...",23,6,304.1,30.0,0.0,11.0,10.0,60.0,8.0
178261,sausage pancake strata,149114,"['bake', 'pour']",0,7,"['egg', 'milk', 'cheddar', 'salt', 'sausage', ...",70,14,235.9,26.0,3.0,19.0,35.0,37.0,0.0
178262,baked beef patties,34200,"['bake', 'combine', 'pour', 'skillet']",2,14,"['dried thyme', 'water', 'garlic salt', 'dried...",55,15,577.5,51.0,26.0,38.0,84.0,83.0,8.0
178263,good and garlicky sweet and sour pork,30618,"['boil', 'combine', 'drain', 'fry', 'simmer', ...",0,12,"['pork tenderloin', 'fresh garlic', 'pineapple...",40,13,240.1,5.0,96.0,12.0,41.0,5.0,10.0


## Convert techniques into dummy variables

In [4]:
# Convert to lists
recipes['technique_names'] = recipes['technique_names'].apply(ast.literal_eval)

# Extract unique values
unique_values = set(item for sublist in recipes['technique_names'] for item in sublist)
unique_list = list(unique_values)

# Perform one-hot encoding
for category in unique_list:
    recipes[category] = recipes['technique_names'].apply(lambda x: 1 if category in x else 0)

# Display
recipes

Unnamed: 0,name,id,technique_names,calorie_level,n_ingredients,ingredient_names,minutes,n_steps,calories (#),total_fat (%DV),...,brine,barbecue,smoke,blend,griddle,distill,toss,broil,steam,deglaze
0,aromatic basmati rice rice cooker,424415,"[combine, drain, strain]",0,5,"['basmati rice', 'water', 'salt', 'cinnamon st...",61,6,228.2,2.0,...,0,0,0,0,0,0,0,0,0,0
1,pumpkin pie a la easy,146223,"[bake, combine, melt, pour, refrigerate, smoot...",0,12,"['flmy', 'oat', 'brown sugar', 'pecan', 'butte...",55,10,249.4,16.0,...,0,0,0,0,0,0,0,0,0,0
2,cheesy tomato soup with potatoes,312329,"[boil, crush, melt, pour, simmer]",1,15,"['chicken broth', 'water', 'salt', 'black pepp...",25,6,351.3,34.0,...,0,0,0,0,0,0,0,0,0,0
3,mini tacos,74301,"[bake, drain, simmer]",0,8,"['wonton wrapper', 'hamburger', 'taco seasonin...",15,8,79.7,5.0,...,0,0,0,0,0,0,0,0,0,0
4,rosemary s hanky panky s,76272,"[combine, drain, fry]",0,4,"['ground beef', 'sausage', 'velveeta cheese', ...",20,5,240.7,29.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178260,sugar free snickerdoodles,323143,"[bake, smooth]",1,7,"['butter', 'artificial sweetener', 'egg', 'flm...",23,6,304.1,30.0,...,0,0,0,0,0,0,0,0,0,0
178261,sausage pancake strata,149114,"[bake, pour]",0,7,"['egg', 'milk', 'cheddar', 'salt', 'sausage', ...",70,14,235.9,26.0,...,0,0,0,0,0,0,0,0,0,0
178262,baked beef patties,34200,"[bake, combine, pour, skillet]",2,14,"['dried thyme', 'water', 'garlic salt', 'dried...",55,15,577.5,51.0,...,0,0,0,0,0,0,0,0,0,0
178263,good and garlicky sweet and sour pork,30618,"[boil, combine, drain, fry, simmer, skillet, t...",0,12,"['pork tenderloin', 'fresh garlic', 'pineapple...",40,13,240.1,5.0,...,0,0,0,0,0,0,0,0,0,0


## Convert ingredients into dummy variables

In [None]:
# Convert to lists
recipes['ingredient_names'] = recipes['ingredient_names'].apply(ast.literal_eval)

# Extract unique values
ingredient_map = pd.read_csv("data/ingredient_map.csv")
unique_list = list(set(ingredient_map['ingredient_names']))

# Perform one-hot encoding
for category in unique_list:
    recipes[category] = recipes['ingredient_names'].apply(lambda x: 1 if category in x else 0)

## Model Building

In [6]:
# Drop columns
X = recipes.drop(['name', 'id', 'technique_names', 'ingredient_names'],axis=1)

# Scaling the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X.values)

# Fit model
knn = NearestNeighbors(n_neighbors=10)
knn.fit(X_scaled)

# Find neighbors for "aromatic basmati rice rice cooker"
user_input = recipes.drop(['name', 'id', 'technique_names', 'ingredient_names'],axis=1).iloc[0].values.reshape(1,-1)
user_input_scaled = scaler.transform(user_input)

# Apply knn
distances, indices = knn.kneighbors(user_input_scaled)

# Map indices to original DataFrame
recommended_recipes = recipes.iloc[indices[0]]

# Display
print("Input: Recipe for Aromatic Basmati Rice Rice Cooker")
recommended_recipes

Input: Recipe for Aromatic Basmati Rice Rice Cooker


Unnamed: 0,name,id,technique_names,calorie_level,n_ingredients,ingredient_names,minutes,n_steps,calories (#),total_fat (%DV),...,brine,barbecue,smoke,blend,griddle,distill,toss,broil,steam,deglaze
0,aromatic basmati rice rice cooker,424415,"[combine, drain, strain]",0,5,"['basmati rice', 'water', 'salt', 'cinnamon st...",61,6,228.2,2.0,...,0,0,0,0,0,0,0,0,0,0
38761,easy horseradish mustard sauce,71038,"[combine, drain, strain]",0,4,"['horseradish', 'dijon mustard', 'sour cream',...",2,3,42.9,6.0,...,0,0,0,0,0,0,0,0,0,0
2558,ultimate tuna salad sandwiches,282672,"[combine, drain, strain]",1,9,"['tuna in water', 'braggs liquid amino', 'mayo...",10,6,297.6,9.0,...,0,0,0,0,0,0,0,0,0,0
33056,cucumber dill salad with feta cheese,171510,"[drain, strain]",0,8,"['english cucumber', 'salt', 'scallion', 'oliv...",25,9,119.1,14.0,...,0,0,0,0,0,0,0,0,0,0
55977,danablu tomatoes,427642,"[combine, drain, strain]",0,7,"['tomato', 'golden delicious apple', 'green ol...",20,16,148.8,14.0,...,0,0,0,0,0,0,0,0,0,0
34410,ruby red rocket cocktail,286409,"[combine, strain]",0,5,"['vodka', 'triple sec', 'ruby red grapefruit j...",5,6,143.5,0.0,...,0,0,0,0,0,0,0,0,0,0
108079,margarita 1 to 1,225979,"[combine, strain]",0,5,"['lime wedge', 'kosher salt', 'tequila', 'oran...",5,6,10.3,0.0,...,0,0,0,0,0,0,0,0,0,0
134564,frappuccino the way i do it starbucks copycat,256089,"[combine, strain]",0,5,"['instant coffee granule', 'hot cocoa mix', 'h...",3,7,224.7,2.0,...,0,0,0,0,0,0,0,0,0,0
165867,meyer lemon drop martini,347238,"[combine, strain]",0,5,"['water', 'sugar', 'lemon, zest of', 'lemons, ...",10,7,169.4,0.0,...,0,0,0,0,0,0,0,0,0,0
152998,brazilian bloom,302917,"[combine, strain]",0,5,"['lime wedge', 'sugar', 'dried hibiscus flower...",3,7,166.2,0.0,...,0,0,0,0,0,0,0,0,0,0
