In [6]:
import numpy as np
import pandas as pd
import ast
import sys
# Import linear_kernel
from sklearn.metrics.pairwise import linear_kernel

from sklearn.feature_extraction.text import CountVectorizer
#Import TfIdfVectorizer (scikit-learn)
from sklearn.feature_extraction.text import TfidfVectorizer

In [7]:
# import model functions
sys.path.insert(0,'/home/david/Projects/Hackathons/DeveloperWeek2022/RecipeSuggestions/app/')
import model

# Food-com

## Load recipe data

In [3]:
raw_recipes = model.load_recipe_data()
raw_recipes.head(5)

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"[winter squash, mexican seasoning, mixed spice...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"[prepared pizza crust, sausage patty, eggs, mi...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"[ground beef, yellow onions, diced tomatoes, t...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","[spreadable cheese with garlic and herbs, new ...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"[tomato juice, apple cider vinegar, sugar, sal...",8


## Reduce columns of dataframe

In [4]:
selection = ["name", "id", "minutes", "ingredients"]
df = raw_recipes[selection]
df.head(5)

Unnamed: 0,name,id,minutes,ingredients
0,arriba baked winter squash mexican style,137739,55,"[winter squash, mexican seasoning, mixed spice..."
1,a bit different breakfast pizza,31490,30,"[prepared pizza crust, sausage patty, eggs, mi..."
2,all in the kitchen chili,112140,130,"[ground beef, yellow onions, diced tomatoes, t..."
3,alouette potatoes,59389,45,"[spreadable cheese with garlic and herbs, new ..."
4,amish tomato ketchup for canning,44061,190,"[tomato juice, apple cider vinegar, sugar, sal..."


## Get random subset of data

In [6]:
if len(df) > 30000:
    df = df.sample(30000)

## Add dummy recipe

In [7]:
my_ingredients = ['Tomato', 'Onion', 'Garlic']
# add dummy recipe built from input ingredients list to dataframe
df, idx_dummy = model.add_dummy_recipe(df=df, ingredients=my_ingredients)
df.tail(3)

Unnamed: 0,name,id,minutes,ingredients
29998,sherri s jalapeno cheese squares,320944,55,jalapenos cheddar cheese eggs
29999,turkey scaloppine with leeks currants and mar...,375367,60,leeks low sodium chicken broth sugar bay leaf ...
30000,dummy,537242,30,Tomato Onion Garlic


In [8]:
len(df)

30001

## Calculate similarities

In [9]:
count_vec = CountVectorizer()
matrix = count_vec.fit_transform(df['ingredients'])

In [10]:
matrix.shape

(30001, 2798)

In [11]:
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(matrix, matrix)

In [12]:
# get reverse mapping of indices
indices = pd.Series(df.index, index=df['ingredients']).drop_duplicates()

In [13]:
# get pairwise similarity scores and sort recipes based on them
sim_scores = list(enumerate(cosine_sim[idx_dummy]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

In [14]:
# select top 10 recommendations 
sim_scores = sim_scores[1:11] # first one is dummy recipe itself
sim_scores

[(20882, 6.0),
 (23175, 6.0),
 (659, 5.0),
 (2007, 5.0),
 (2120, 5.0),
 (3195, 5.0),
 (5104, 5.0),
 (5199, 5.0),
 (8746, 5.0),
 (9035, 5.0)]

In [15]:
# get the recipe indices
recipe_indices = [i[0] for i in sim_scores]
recipe_indices

[20882, 23175, 659, 2007, 2120, 3195, 5104, 5199, 8746, 9035]

In [16]:
recommendations_df = df.loc[recipe_indices]
recommendations_df

Unnamed: 0,name,id,minutes,ingredients
20882,nikki s spaghetti sauce,371582,55,ground beef onion salt and pepper tomato sauce...
23175,mihc s black bean enchilada bake,363590,30,tomato sauce tomato juice water ground cumin s...
659,turkey meatball pasta bake,325393,120,vegetable oil onion garlic cloves crushed toma...
2007,roasted vegetable lasagna vegetarian,244233,240,olive oil onion minced garlic cloves green pep...
2120,sunken meat burrito,16238,180,flour tortillas dried onion flakes colby chees...
3195,real fajitas a dissertation on fajitas,222092,1500,skirt steaks flour tortillas onions bell peppe...
5104,thanksgiving leftovers turkey shepherd s pie,146119,60,unsalted butter olive oil yellow onion carrots...
5199,easy spaghetti meat sauce,491594,75,lean ground beef onion garlic cloves sliced mu...
8746,simple and easy hot dog sauce,380048,40,tomato sauce tomato paste water garlic powder ...
9035,great low cal low fat chili,111513,75,ground chicken purple onion celery & leaves fr...


In [21]:
'nikki s spaghetti sauce' in recommendations_df['name'].values

True

In [26]:
newdf = recommendations_df.drop(recommendations_df[recommendations_df['name']=='nikki s spaghetti sauce'].index)
newdf

Unnamed: 0,name,id,minutes,ingredients
23175,mihc s black bean enchilada bake,363590,30,tomato sauce tomato juice water ground cumin s...
659,turkey meatball pasta bake,325393,120,vegetable oil onion garlic cloves crushed toma...
2007,roasted vegetable lasagna vegetarian,244233,240,olive oil onion minced garlic cloves green pep...
2120,sunken meat burrito,16238,180,flour tortillas dried onion flakes colby chees...
3195,real fajitas a dissertation on fajitas,222092,1500,skirt steaks flour tortillas onions bell peppe...
5104,thanksgiving leftovers turkey shepherd s pie,146119,60,unsalted butter olive oil yellow onion carrots...
5199,easy spaghetti meat sauce,491594,75,lean ground beef onion garlic cloves sliced mu...
8746,simple and easy hot dog sauce,380048,40,tomato sauce tomato paste water garlic powder ...
9035,great low cal low fat chili,111513,75,ground chicken purple onion celery & leaves fr...


In [None]:
recommendations_df.drop()

# Salad recipes

## Load preprocessed dataframe

In [8]:
df = pd.read_pickle("../data/Salad_Recipes.pkl")
df.head()

Unnamed: 0,category,cooking_method,cuisine,image,ingredients,prep_time,recipe_name,serves,tags,parsed_ingredients
0,,['Set the racks in the middle and upper thirds...,['American'],https://www.skinnytaste.com/wp-content/uploads...,"['1 tablespoons extra virgin olive oil', '1 cu...",20 minutes,Mediterranean Sea Bass,4 servings,"Dairy Free, Gluten Free, Keto Recipes, Kid Fri...",onion garlic tomato wine kalamata chilean bass...
1,,['Place the eggs in the air fryer basket and c...,['American'],https://www.skinnytaste.com/wp-content/uploads...,"['4 large eggs', 'Salt (black pepper, everythi...",15 minutes,Air Fryer Hard Boiled Eggs,4 eggs,"Air Fryer Recipes, Dairy Free, Gluten Free, Ke...",egg bagel
2,,"['Air Fryer directions:', 'Preheat air fryer t...",['American'],https://www.skinnytaste.com/wp-content/uploads...,"['olive oil spray', '4 about 5 ounce each salm...",5 minutes,Air Fryer Basil-Parmesan Salmon,4 servings,"Air Fryer Recipes, Gluten Free, Keto Recipes, ...",salmon lemon mayonnaise like sir parmesan cheese
3,,"['Preheat the oven to 400F.', 'Pour 2 tablespo...",['American'],https://www.skinnytaste.com/wp-content/uploads...,['1/2 cup freshly grated Parmesan (not pre-gra...,15 minutes,Everything Parmesan Crisps,4 servings,"Gluten Free, Keto Recipes, Kid Friendly, Low C...",parmesan pre using hole box sesame minced onio...
4,,['Cook potatoes in a large pot of salted water...,['American'],https://www.skinnytaste.com/wp-content/uploads...,['3 1/2 pounds new potatoes (about 10 peeled a...,10 minutes,Potato and Green Bean Salad,12 servings,"Dairy Free, Gluten Free, Kid Friendly, Vegetar...",potato bean bean scallion


## Load preparations steps

In [16]:
steps = ast.literal_eval(df.iloc[5]['cooking_method'])
i = 1
for step in steps:
    print("Step ", i, ": \n", step)
    i+=1

Step  1 : 
 To Make the soup:
Step  2 : 
 Heat 1 tablespoon of oil in a large pot over medium-high heat.
Step  3 : 
 When it shimmers, add the onions, chopped fennel bulb, and 14 teaspoon of salt, and cook, stirring occasionally, until the onions just begin to brown, 3 to 5 minutes.
Step  4 : 
 Stir in the kale and cook, stirring occasionally, until it wilts, 3 to 4 minutes. If any browned bits stick to the bottom of the pot while cooking, add 1 tablespoon water at a time to loosen them up.
Step  5 : 
 Pour in the vegetable broth and bring to a boil, then reduce the heat to low and cover.
Step  6 : 
 Simmer, covered, for 10 minutes. Remove the pot from the heat.
Step  7 : 
 Immediately stir in two-thirds of the minced garlic, then let the soup cool, uncovered, for about 10 minutes.
Step  8 : 
 Meanwhile, make the gremolata:
Step  9 : 
 Heat the remaining 1 tablespoon of oil in a medium skillet over medium heat.
Step  10 : 
 When it shimmers, stir in the hemp seeds, remaining 14 teaspoo

## Exclude recipes with far more ingredients than user input

In [19]:
df.iloc[2]['parsed_ingredients']

'salmon lemon mayonnaise like sir parmesan cheese'