In [30]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
import plotly_express as px
import warnings
import collections
warnings.filterwarnings("ignore")
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity



##  <font color='CornflowerBlue'><center><strong>I. <ins>Creating the NLP Model for the Recipe Recommender </ins> </strong><center><font color='blue'></font>


In [31]:
df = pd.read_csv("ALL_RECIPES.csv", index_col=0)
#df2 = pd.read_csv("ALL_RECIPES.csv", index_col=0)

In [32]:
df.columns

Index(['recipe_id', 'recipe', 'url', 'list_ingredients_raw',
       'instructions_raw', 'category_id', 'serves', 'time_prep',
       'nutrients_table', 'diet_id', 'time_cook', 'list_ingredients',
       'list_instructions', 'website_id', 'description', 'serving_size',
       'steps', 'tags', 'search_terms', 'meal', 'ingredients_raw', 'category',
       'instructions', 'meal_id'],
      dtype='object')

In [33]:
df.dropna(subset=['recipe'], inplace=True)
df.reset_index

<bound method DataFrame.reset_index of        recipe_id                                        recipe  \
0            JO1                             Tortilla frittata   
1            JO2                      Stuffed folded flatbread   
2            JO3                                  Island salad   
3            JO4                       Magnolia blossom pickle   
4            JO5                             Tortelli al magro   
...          ...                                           ...   
496339     VN594                SWEET POTATO & WATERCRESS SOUP   
496340     VN595  WHITE MISO SUMMER SOUP WITH WAKAME & SAFFRON   
496341     VN596                             SOBA NOODLE SALAD   
496342     VN597                            SESAME GINGER TOFU   
496343     VN598           SWEET & SOUR WAKAME WITH RICE SALAD   

                                                      url  \
0       https://www.jamieoliver.com/recipes/lunch-reci...   
1       https://www.jamieoliver.com/recipes/lu

In [34]:
df["recipe"] = df["recipe"].str.lower()
df["description"] = df["description"].str.lower()
df["list_ingredients"] = df["list_ingredients"].str.lower()
df['list_instructions'] = df['list_instructions'].str.lower()

df["website_id"].fillna("3")
df.fillna("")

Unnamed: 0,recipe_id,recipe,url,list_ingredients_raw,instructions_raw,category_id,serves,time_prep,nutrients_table,diet_id,...,description,serving_size,steps,tags,search_terms,meal,ingredients_raw,category,instructions,meal_id
0,JO1,tortilla frittata,https://www.jamieoliver.com/recipes/lunch-reci...,1 onion ...,Method\n\n ...,undefined,6,undefined,['Calories \n\n...,2.0,...,,,,,,,,,,
1,JO2,stuffed folded flatbread,https://www.jamieoliver.com/recipes/lunch-reci...,700g frozen chargrilled Mediterranean veg ...,Method\n\n ...,undefined,4,undefined,['Calories \n\n...,2.0,...,,,,,,,,,,
2,JO3,island salad,https://www.jamieoliver.com/recipes/fruit-reci...,1 cucumber ...,Method\n\n ...,undefined,2,undefined,['Calories \n\n...,2.0,...,,,,,,,,,,
3,JO4,magnolia blossom pickle,https://www.jamieoliver.com/recipes/sides-and-...,100 ...,Method\n\nWash the blossom leaves and shake dr...,undefined,r,undefined,['Calories \n\n...,1.0,...,,,,,,,,,,
4,JO5,tortelli al magro,https://www.jamieoliver.com/recipes/pasta-reci...,FOR THE PASTA DOUGH ...,Method\n\nFirst make the pasta dough. Place th...,undefined,error,undefined,error,2.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496339,VN594,sweet potato & watercress soup,https://veganuary.com/recipes/vegan-fish-fingers/,,,5.0,4,15M,,1.0,...,these vegan fish fingers are so yummy and burs...,,,diner,,dinner,,seafood,These vegan fish fingers are so yummy and burs...,3.0
496340,VN595,white miso summer soup with wakame & saffron,https://veganuary.com/recipes/easy-vegan-mozza...,,,0.0,5,10M,,1.0,...,yes! you can make your own vegan cheese at home.,,,diner,,dinner,,,Yes! You can make your own vegan cheese at hom...,3.0
496341,VN596,soba noodle salad,https://veganuary.com/recipes/vegan-spinach-gn...,,,0.0,6,10M,,1.0,...,find out how to make your own vegan spinach gn...,,,diner,,dinner,,,Find out how to make your own Vegan Spinach Gn...,3.0
496342,VN597,sesame ginger tofu,https://veganuary.com/recipes/applewood-vegan-...,,,0.0,3,5M,,1.0,...,makes 3 stuffed peppers,,,diner,,dinner,,,Makes 3 stuffed peppers\nInspired by this reci...,3.0


In [35]:
df.dropna(subset=["recipe_id"], inplace=True)

df["recipe_id"] = df["recipe_id"].astype(str)

In [36]:
def custom_sort_key(x):
    if x[0].isalpha():
        return (0, x)  
    else:
        return (1, x)  

df_sorted = df.iloc[df['recipe_id'].apply(custom_sort_key).argsort()]




In [37]:
df_sorted.shape

(496342, 24)

In [38]:
def combine_features(row):
    combined = ' '.join([
        str(row['recipe']), 
        str(row['list_ingredients']), 
        str(row['list_instructions']), 
        str(row['meal']), 
        str(row['category']), 
        str(row['tags']), 
        str(row['search_terms']),
        str(row['description']) 
    ])
    return combined

df_sorted['combined_features'] = df_sorted.apply(combine_features, axis=1)

In [39]:
df.head()

Unnamed: 0,recipe_id,recipe,url,list_ingredients_raw,instructions_raw,category_id,serves,time_prep,nutrients_table,diet_id,...,description,serving_size,steps,tags,search_terms,meal,ingredients_raw,category,instructions,meal_id
0,JO1,tortilla frittata,https://www.jamieoliver.com/recipes/lunch-reci...,1 onion ...,Method\n\n ...,undefined,6,undefined,['Calories \n\n...,2.0,...,,,,,,,,,,
1,JO2,stuffed folded flatbread,https://www.jamieoliver.com/recipes/lunch-reci...,700g frozen chargrilled Mediterranean veg ...,Method\n\n ...,undefined,4,undefined,['Calories \n\n...,2.0,...,,,,,,,,,,
2,JO3,island salad,https://www.jamieoliver.com/recipes/fruit-reci...,1 cucumber ...,Method\n\n ...,undefined,2,undefined,['Calories \n\n...,2.0,...,,,,,,,,,,
3,JO4,magnolia blossom pickle,https://www.jamieoliver.com/recipes/sides-and-...,100 ...,Method\n\nWash the blossom leaves and shake dr...,undefined,r,undefined,['Calories \n\n...,1.0,...,,,,,,,,,,
4,JO5,tortelli al magro,https://www.jamieoliver.com/recipes/pasta-reci...,FOR THE PASTA DOUGH ...,Method\n\nFirst make the pasta dough. Place th...,undefined,error,undefined,error,2.0,...,,,,,,,,,,


In [62]:
dfs = df_sorted.iloc[:10000]


In [77]:
dfs.shape
dfs.reset_index(drop=True, inplace=True)


In [78]:
dfs["website_id"].value_counts()

website_id
1.0    573
2.0    521
4.0    285
Name: count, dtype: int64

In [43]:
#cv_char = CountVectorizer(analyzer='char_wb', ngram_range = (2,2))

In [67]:
cv_char = CountVectorizer(analyzer='char_wb', ngram_range = (2,2))

In [68]:
user_recipe = 'tacos' 

In [69]:
list_recipes = list(dfs["recipe"])

In [70]:
len(list_recipes)

10000

In [71]:
list_recipes.append(user_recipe)

In [72]:
count_matrix = cv_char.fit_transform(list_recipes)


In [90]:
cosine_sim = cosine_similarity(count_matrix)
similar_recipes = list(enumerate(cosine_sim[-1]))
sorted_similar_recipes = sorted(similar_recipes,key=lambda x:x[1],reverse=True)[1:]
suggested_ids = [d[0] for d in sorted_similar_recipes[:5]]
dfs.loc[suggested_ids]

Unnamed: 0,recipe_id,recipe,url,list_ingredients_raw,instructions_raw,category_id,serves,time_prep,nutrients_table,diet_id,...,serving_size,steps,tags,search_terms,meal,ingredients_raw,category,instructions,meal_id,combined_features
5786,104284,beef tacos,,"[""1 tablespoon olive oil"",""1/2 cup cho...",,,4,,,,...,1 (305 g),"['Coat a frying pan with 1 Tbsp olive oil.', '...","['60-minutes-or-less', 'time-to-make', 'course...","{'beef', 'dinner', 'low-sodium', 'low-carb'}","['60-minutes-or-less', 'time-to-make', 'course...",,,,,"beef tacos ['olive oil', 'onion', 'ground sirl..."
8942,107343,easy tacos,,"[""1 lb lean ground beef"",""1 yellow ...",,,4,,,,...,1 (134 g),"['Brown beef with onion and garlic, and drain ...","['60-minutes-or-less', 'time-to-make', 'main-i...","{'mexican', 'low-calorie', 'dinner', 'low-carb'}","['60-minutes-or-less', 'time-to-make', 'main-i...",,,,,"easy tacos ['lean ground beef', 'yellow onion'..."
1481,100098,couscous tacos,,"[""1 small onion"",""1 (15 ounce) can di...",,,2,,,,...,1 (419 g),"['Dice onion.', 'Place diced onions, diced tom...","['15-minutes-or-less', 'time-to-make', 'course...","{'dinner', 'vegetarian', 'lunch'}",lunch,,,,,"couscous tacos ['onion', 'diced tomatoes', 'wa..."
3637,102225,tacos in no time,,"[""1/2 lb ground beef"",""1 (8 ounce) can ...",,,2,,,,...,1 (281 g),"['In an 8"" skillet, over medium high heat, coo...","['30-minutes-or-less', 'time-to-make', 'course...","{'breakfast', 'dinner', 'mexican'}",breakfast,,,,,"tacos in no time ['ground beef', 'tomato sauce..."
5117,103639,chicken tacos,,"[""3/4 cup shredded monterey jack cheese or ...",,,6,,,,...,1 (265 g),['Sprinkle 1 Tbs cheese in bottom of each taco...,"['15-minutes-or-less', 'time-to-make', 'course...","{'mexican', 'dinner', 'low-carb', 'chicken'}","['15-minutes-or-less', 'time-to-make', 'course...",,,,,"chicken tacos ['monterey jack cheese', 'taco s..."


In [93]:
cosine_sim = cosine_similarity(count_matrix)
similar_recipes = list(enumerate(cosine_sim[-1]))
sorted_similar_recipes = sorted(similar_recipes,key=lambda x:x[1],reverse=True)[1:]
suggested_ids = [d[0] for d in sorted_similar_recipes[:1]]
dfs.loc[suggested_ids]

Unnamed: 0,recipe_id,recipe,url,list_ingredients_raw,instructions_raw,category_id,serves,time_prep,nutrients_table,diet_id,...,serving_size,steps,tags,search_terms,meal,ingredients_raw,category,instructions,meal_id,combined_features
5786,104284,beef tacos,,"[""1 tablespoon olive oil"",""1/2 cup cho...",,,4,,,,...,1 (305 g),"['Coat a frying pan with 1 Tbsp olive oil.', '...","['60-minutes-or-less', 'time-to-make', 'course...","{'beef', 'dinner', 'low-sodium', 'low-carb'}","['60-minutes-or-less', 'time-to-make', 'course...",,,,,"beef tacos ['olive oil', 'onion', 'ground sirl..."
