In [468]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
import pickle

In [469]:
recipes = pd.read_csv('recipes.csv')

In [470]:
recipes.shape

(522517, 28)

In [471]:
recipes.isnull().sum()

RecipeId                           0
Name                               0
AuthorId                           0
AuthorName                         0
CookTime                       82545
PrepTime                           0
TotalTime                          0
DatePublished                      0
Description                        5
Images                             1
RecipeCategory                   751
Keywords                       17237
RecipeIngredientQuantities         3
RecipeIngredientParts              0
AggregatedRating              253223
ReviewCount                   247489
Calories                           0
FatContent                         0
SaturatedFatContent                0
CholesterolContent                 0
SodiumContent                      0
CarbohydrateContent                0
FiberContent                       0
SugarContent                       0
ProteinContent                     0
RecipeServings                182911
RecipeYield                   348071
R

In [472]:
recipes.dropna(inplace=True)

In [473]:
recipes.isnull().sum()

RecipeId                      0
Name                          0
AuthorId                      0
AuthorName                    0
CookTime                      0
PrepTime                      0
TotalTime                     0
DatePublished                 0
Description                   0
Images                        0
RecipeCategory                0
Keywords                      0
RecipeIngredientQuantities    0
RecipeIngredientParts         0
AggregatedRating              0
ReviewCount                   0
Calories                      0
FatContent                    0
SaturatedFatContent           0
CholesterolContent            0
SodiumContent                 0
CarbohydrateContent           0
FiberContent                  0
SugarContent                  0
ProteinContent                0
RecipeServings                0
RecipeYield                   0
RecipeInstructions            0
dtype: int64

In [474]:
recipes.shape

(28014, 28)

In [475]:
recipes = recipes.drop_duplicates(subset=['Name'])

In [476]:
recipes = recipes.reset_index()

In [477]:
recipes['Keywords'] = recipes['Keywords'] + ' ' + recipes['RecipeCategory'] + ' ' + recipes['RecipeIngredientParts'] + ' ' + recipes['Name']

In [478]:
recipes['Keywords'] = recipes['Keywords'].str.translate(str.maketrans('','','(c",)'))

In [479]:
recipes.iloc[0]['Keywords']

"Beans Vegetable Low Cholesterol Weeknight Broil/Grill Oven Soy/Tofu extra firm tofu eggplant zuhini mushrooms soy saue low sodium soy saue olive oil maple syrup honey red wine vinegar lemon juie garli loves mustard powder blak pepper Carina's Tofu-Vegetable Kebabs"

In [480]:
cv = CountVectorizer(max_features=1000, stop_words='english')

In [481]:
vector = cv.fit_transform(recipes['Keywords'].values.astype('U')).toarray()

In [482]:
similar = cosine_similarity(vector)

In [483]:
distance = sorted(list(enumerate(similar[2])), reverse=True, key=lambda vector:vector[1])
for i in distance[0:5]:
    print(recipes.iloc[i[0]].Name)

Carrot Cake
Freeman Allen's Carrot Cake
1940's Best Carrot Cake Recipe
Family Heirloom Carrot Cake
Kathy's Carrot Cake


In [505]:
def recommend(recipes_input):
    index = recipes[recipes['Name']==recipes_input].index[0]
    distance = sorted(list(enumerate(similar[index])), reverse=True, key=lambda vector:vector[1])
    for i in distance[0:5]:
        print(recipes.iloc[i[0]].Name)
        print(recipes.iloc[i[0]].Images)

In [485]:
recipes.head(5)

Unnamed: 0,index,RecipeId,Name,AuthorId,AuthorName,CookTime,PrepTime,TotalTime,DatePublished,Description,...,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings,RecipeYield,RecipeInstructions
0,3,41,Carina's Tofu-Vegetable Kebabs,1586,Cyclopz,PT20M,PT24H,PT24H20M,1999-09-03T14:54:00Z,This dish is best prepared a day in advance to...,...,3.8,0.0,1558.6,64.2,17.3,32.1,29.3,2.0,4 kebabs,"c(""Drain the tofu, carefully squeezing out exc..."
1,5,43,Best Blackbottom Pie,34879,Barefoot Beachcomber,PT2H,PT20M,PT2H20M,1999-08-21T10:35:00Z,Make and share this Best Blackbottom Pie recip...,...,10.9,94.3,267.6,58.0,1.8,42.5,7.0,8.0,1 9-inch pie,"c(""Graham Cracker Crust: In small bowl, combin..."
2,16,54,Carrot Cake,1535,Marg CaymanDesigns,PT50M,PT45M,PT1H35M,1999-09-13T15:20:00Z,This is one of the few recipes my husband ever...,...,4.9,69.8,534.8,67.0,1.6,47.9,5.0,12.0,1 bundt,"c(""Beat together the eggs, oil, and white suga..."
3,26,64,Almond Pound Cake,125579,GrandmaIsCooking,PT1H,PT15M,PT1H15M,1999-08-07T16:33:00Z,Make and share this Almond Pound Cake recipe f...,...,9.0,118.9,159.4,56.0,1.3,29.0,6.9,10.0,1 cake,"c(""Preheat oven to 350 degrees Fahrenheit."", ""..."
4,54,94,Blueberry Buttertarts,1556,Strawberry Girl,PT25M,PT15M,PT40M,1999-09-12T05:46:00Z,Make and share this Blueberry Buttertarts reci...,...,4.7,17.6,125.7,40.2,1.2,11.5,4.2,12.0,12 tarts,"c(""Preheat oven to 375F."", ""Leave shells in fo..."


In [488]:
recommend("Carrot Cake")

Carrot Cake
Freeman Allen's Carrot Cake
1940's Best Carrot Cake Recipe
Family Heirloom Carrot Cake
Kathy's Carrot Cake


In [506]:
recommend("Blueberry Buttertarts")

Blueberry Buttertarts
character(0)
Blueberry Pie
c("https://img.sndimg.com/food/image/upload/w_555,h_416,c_fit,fl_progressive,q_95/v1/img/recipes/24/52/1/BTwby3XUTuaSEL4kDrbq_blueberry%20pie.JPG", "https://img.sndimg.com/food/image/upload/w_555,h_416,c_fit,fl_progressive,q_95/v1/img/recipes/24/52/1/uKRhI1DGTGSALk8cOBFP_Blueberry%20Pie.jpg", "https://img.sndimg.com/food/image/upload/w_555,h_416,c_fit,fl_progressive,q_95/v1/img/recipes/24/52/1/fsIqH6UvROYzTIGjc6X5_1506802820188558617535.jpg", "https://img.sndimg.com/food/image/upload/w_555,h_416,c_fit,fl_progressive,q_95/v1/img/recipes/24/52/1/ps3zGkhXRkaUEPNURRtg_1502050180870248498672.jpg", 
"https://img.sndimg.com/food/image/upload/v1/img/feed/24521/2FZoBcxSvOvu3Lr7nEfw_20170627_195827.jpg", "https://img.sndimg.com/food/image/upload/v1/img/feed/24521/QwZxtIksS6e0WTJ9M2yK_Pie%20in%20the%20Sky.png", "https://img.sndimg.com/food/image/upload/w_555,h_416,c_fit,fl_progressive,q_95/v1/img/recipes/24/52/1/nhPUTJBQwiLTfGMc45DQ_image.jpeg", "h

In [493]:
def get_images(recipes_input):
    index = recipes[recipes['Name']==recipes_input].index[0]
    distance = sorted(list(enumerate(similar[index])), reverse=True, key=lambda vector:vector[1])
    for i in distance[0:1]:
        print(recipes.iloc[i[0]].Images)

In [494]:
get_images("Carrot Cake")

c("https://img.sndimg.com/food/image/upload/w_555,h_416,c_fit,fl_progressive,q_95/v1/img/recipes/54/picQ2X4D8.jpg", "https://img.sndimg.com/food/image/upload/w_555,h_416,c_fit,fl_progressive,q_95/v1/img/recipes/54/pic3oloIV.jpg", "https://img.sndimg.com/food/image/upload/w_555,h_416,c_fit,fl_progressive,q_95/v1/img/recipes/54/picf0dw0o.jpg")


In [489]:
pickle.dump(recipes, open('recipes.pkl', 'wb'))
pickle.dump(similar, open('similar.pkl', 'wb'))