# Epicurious Data Prep
## This notebook contains EDA for the Epicurious data 
### It also contains functions to prepare the data for word vectorization

In [1]:
import json
import csv
import re
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
import string
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances
from sklearn.feature_extraction.text import TfidfVectorizer

## Using some stopwords from https://github.com/AlludedCrabb/sound-tasty

In [2]:
stopwords_loc = "../write_data/food_stopwords.csv"
with open(stopwords_loc, "r") as myfile:
    reader = csv.reader(myfile)
    food_stopwords = [col for row in reader for col in row]

stopwords_list = stopwords.words('english') + list(string.punctuation) + food_stopwords
lemmatizer = WordNetLemmatizer()

In [3]:
stopwords_list

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

# Define functions to use

In [4]:
def cuisine_namer(text):
    if text == 'Central American/Caribbean':
        return 'Caribbean'
    elif text == 'Jewish':
        return 'Kosher'
    elif text == 'Eastern European/Russian':
        return 'Eastern European'
    elif text in ['Spanish/Portuguese', 'Greek']:
        return 'Mediterranean'
    elif text == 'Central/South American':
        return 'Latin American'
    elif text == 'Sushi':
        return 'Japanese'
    elif text == 'Southern Italian':
        return 'Italian'
    elif text in ['Southern', 'Tex-Mex']:
        return 'American'
    elif text in ['Southeast Asian', 'Korean']:
        return 'Asian'
    else:
        return text

In [5]:
filename = "../raw_data/recipes-en-201706/epicurious-recipes_m2.json"

In [6]:
with open(filename, 'r') as f:
    datastore = json.load(f)

In [7]:
def load_data(filepath, test_size=0.1, random_state=10):
    """ This function uses a filepath, test_size, and random_state
    to load the Epicurious JSON into a dataframe and then split into 
    train/test sets."""
    with open(filepath, 'r') as f:
        datastore = json.load(f)
    datastore_df = pd.DataFrame(datastore)
    X_train, X_test = train_test_split(datastore_df,
                                       test_size=test_size,
                                       random_state=random_state)
    return X_train, X_test

In [8]:
def prep_data(X):
    """ This function takes a dataframe X, drops columns that will not be used,
    expands the hierarchical column into the dataframe, renames the columns
    to be more human-readable, and drops one column created during dataframe
    expansion"""
    X.drop(['pubDate', 'author', 'type', 'aggregateRating', 'reviewsCount', 
            'willMakeAgainPct', 'dateCrawled'], 
           axis=1,
           inplace=True)
    
    concat = pd.concat([X.drop(['tag'], axis=1), 
                        X['tag'].apply(pd.Series)], 
                       axis=1)
    concat.drop([0, 'photosBadgeAltText', 'photosBadgeFileName', 'photosBadgeID',
                 'photosBadgeRelatedUri'], 
                axis=1, 
                inplace=True)
    
    cols = ['id', 'description', 'title', 'url', 'photo_data', 'ingredients', 
            'steps', 'category', 'name', 'remove']
    
    concat.columns = cols
    
    concat.drop('remove', axis=1, inplace=True)
    
    cuisine_only = concat[concat['category'] == 'cuisine']
    cuisine_only.dropna(axis=0, inplace=True)
    cuisine_only['imputed_label'] = cuisine_only['name'].apply(cuisine_namer)
    
    return cuisine_only

In [9]:
def fit_transform_tfidf_matrix(X_df, stopwords_list):
    tfidf = sklearn.feature_extraction.text.TfidfVectorizer(stop_words=stopwords_list,
                                                            min_df=2,
                                                            token_pattern=r'(?u)\b[a-zA-Z]{2,}\b', 
                                                            preprocessor=lemmatizer.lemmatize, 
                                                            )
    
    temp = X_df['ingredients'].apply(' '.join).str.lower()
    tfidf.fit(temp)
    response = tfidf.transform(temp)
    print(response.shape)
    word_matrix = pd.DataFrame(response.toarray(),
                               columns=tfidf.get_feature_names(),
                               index=X_df.index)
    
    return tfidf, word_matrix

In [10]:
def transform_tfidf(tfidf, recipe):
    response = tfidf.transform(recipe['ingredients'])
    
    transformed_recipe = pd.DataFrame(response.toarray(),
                                 columns=tfidf.get_feature_names(),
                                 index=recipe.index)
    return transformed_recipe

In [11]:
def transform_from_test_tfidf(tfidf, df, idx):
    recipe = [' '.join(df.iloc[idx]['ingredients'])]
    response = tfidf.transform(recipe)
    transformed_recipe = pd.DataFrame(response.toarray(),
                                      columns=tfidf.get_feature_names())
    return transformed_recipe

In [12]:
def filter_out_cuisine(ingred_word_matrix, X_df, cuisine_name, tfidf):
    combo = pd.concat([ingred_word_matrix, X_df['imputed_label']], axis=1)
    filtered_ingred_word_matrix = combo[combo['imputed_label'] != cuisine_name].drop('imputed_label', 
                                                                     axis=1)
    return filtered_ingred_word_matrix

In [13]:
def find_closest_recipes(filtered_ingred_word_matrix, recipe_tfidf, X_df):
    search_vec = np.array(recipe_tfidf).reshape(1,-1)
    res_cos_sim = cosine_similarity(filtered_ingred_word_matrix, search_vec)
    top_five = np.argsort(res_cos_sim.flatten())[-5:][::-1]
    proximity = res_cos_sim[top_five]
    recipe_ids = [filtered_ingred_matrix.iloc[idx].name for idx in top_five]
    suggest_df = X_df.loc[recipe_ids]
    return suggest_df, proximity

# Create the dataframe

In [14]:
X_train, X_test = load_data(filename)

In [15]:
prepped = prep_data(X_train)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [16]:
prepped.head()

Unnamed: 0,id,description,title,url,photo_data,ingredients,steps,category,name,imputed_label
16343,54a438226529d92b2c01925e,This dip is our version of a Catalan sauce tha...,Romesco Sauce,/recipes/food/views/romesco-sauce-232504,"{'id': '560de5a3f3a00aeb2f1d549c', 'filename':...","[1 large tomato (1/2 lb), cored, 1 (1/2-oz) dr...",[Put oven rack in middle position and preheat ...,cuisine,Spanish/Portuguese,Mediterranean
11650,54a42f1e6529d92b2c012d34,Casônsèi della Val Camonica\nCasônsèi dates ba...,Casônsèi from Val Camonica,/recipes/food/views/casonsei-from-val-camonica...,"{'id': '5609a7a66a59cdb91b5ff773', 'filename':...","[2 3/4 cups all-purpose flour, plus extra for ...",[Make the pasta dough. Sift the flour into a m...,cuisine,Italian,Italian
21621,54a4533619925f464b38f6f9,Dried pears lend a subtle sweetness to the dee...,Wild Rice Stuffing with Wild Mushrooms,/recipes/food/views/wild-rice-stuffing-with-wi...,"{'id': '560ea89df3a00aeb2f1d727d', 'filename':...","[8 tablespoons (1 stick) butter, 4 large onion...",[Melt 4 tablespoons butter in heavy large pot ...,cuisine,American,American
9853,54a42c1a6529d92b2c010564,Editor's note: The recipe below is excerpted f...,Beet and Apple Salad,/recipes/food/views/beet-and-apple-salad-233504,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[2 tablespoons honey, 2 tablespoons apple cide...","[1. Make dressing: Whisk together honey, vineg...",cuisine,American,American
23180,54a455c06529d92b2c021e24,What makes these moist bars double cherry? The...,Double-Cherry Streusel Bars,/recipes/food/views/double-cherry-streusel-bar...,"{'id': '560ea1737b55306961bff0ec', 'filename':...",[1 cup dried Bing (sweet) cherries (about 6 ou...,"[Combine dried cherries, tart cherry preserves...",cuisine,American,American


In [17]:
print(prepped['ingredients'].apply(' '.join))

16343    1 large tomato (1/2 lb), cored 1 (1/2-oz) drie...
11650    2 3/4 cups all-purpose flour, plus extra for d...
21621    8 tablespoons (1 stick) butter 4 large onions ...
9853     2 tablespoons honey 2 tablespoons apple cider ...
23180    1 cup dried Bing (sweet) cherries (about 6 oun...
                               ...                        
24420    2 cups all purpose flour 1/4 cup plus 2 tables...
2102     1/2 pound seedless red grapes two 1/4-ounce pa...
31210    2 1/4 cups water 1 cup cider vinegar 1 medium ...
17904    2 cups (packed) golden brown sugar 2 cups wate...
10201    1/2 cup tamarind pulp (from a pliable block) 1...
Name: ingredients, Length: 13335, dtype: object


# Create the ingredients TFIDF matrix that will be the database

In [18]:
ingred_tfidf, ingred_word_matrix = fit_transform_tfidf_matrix(prepped, stopwords_list)

  'stop_words.' % sorted(inconsistent))


(13335, 2017)


In [19]:
ingred_word_matrix

Unnamed: 0,aceto,achiote,acid,acini,acorn,adobo,adrianascaravan,adzuki,african,agave,...,yuzu,za,zabaglione,zealand,zest,zested,zinfandel,zingermans,ziti,zucchini
16343,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11650,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21621,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9853,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23180,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24420,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
31210,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17904,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# X_test is using the test split from train_test_split to return test recipes

In [20]:
X_test

Unnamed: 0,id,dek,hed,pubDate,author,type,url,photoData,tag,aggregateRating,ingredients,prepSteps,reviewsCount,willMakeAgainPct,dateCrawled
12590,54a430de19925f464b383ac3,"Brandi Neuwirth of Cary, North Carolina, write...",Watermelon-Mint Ice Cubes,2006-06-13T16:02:42.000Z,[{'name': 'Brandi Neuwirth'}],recipe,/recipes/food/views/watermelon-mint-ice-cubes-...,"{'id': '560dd76c7b55306961bfaa0b', 'filename':...","{'category': 'cuisine', 'name': 'American', 'u...",4.00,[6 cups 1-inch cubes seeded watermelon (about ...,[Puree watermelon in processor (there should b...,1,100,1498548022
32007,54a47d4b19925f464b39bff7,,Chicken Piccata,2004-08-20T12:48:48.000Z,[],recipe,/recipes/food/views/chicken-piccata-5154,"{'id': '578d20ad95824bf90525e55a', 'filename':...","{'category': 'cuisine', 'name': 'Italian', 'ur...",3.33,"[4 skinless boneless chicken breast halves, 3 ...",[Place chicken between 2 large sheets of plast...,244,92,1498548435
27434,54a4671019925f464b396a4a,Can be prepared in 45 minutes or less.,"Roasted Red Pepper, Pepper Jack, and Pepperoni...",2004-08-20T12:48:48.000Z,[{'name': 'Kenneth Walther'}],recipe,/recipes/food/views/roasted-red-pepper-pepper-...,"{'id': '56746183b47c050a284a4e15', 'filename':...","{'category': 'type', 'name': 'Sandwich', 'url'...",3.14,[1 round loaf sourdough bread (about 7 inches ...,"[Preheat oven to 375° F., Cut eight 1/2-inch-t...",7,100,1498548377
2233,54a4189d6529d92b2c006364,,Oriental Pudding,2004-08-20T04:00:00.000Z,[{'name': 'James Beard'}],recipe,/recipes/food/views/oriental-pudding-20049,"{'id': '56746183b47c050a284a4e15', 'filename':...","{'category': 'cuisine', 'name': 'Asian', 'url'...",2.67,"[1 8-ounce package pitted dates, 1/2 cup sugar...",[Cut the dates into small pieces and combine t...,3,100,1498548860
15258,54a4353219925f464b3872d9,This dish is great alongside the Lemony Chicke...,Spicy Spinach Linguine with Olive Oil and Garlic,2007-01-04T04:25:28.000Z,[],recipe,/recipes/food/views/spicy-spinach-linguine-wit...,"{'id': '560d9943f9a84192308a1930', 'filename':...","{'category': 'cuisine', 'name': 'Italian', 'ur...",3.47,"[12 ounces spinach linguine, 6 tablespoons ext...",[Cook linguine in large pot of boiling salted ...,29,89,1498547930
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34193,581793b758b6c2cf3d999849,"For deep apple flavor, the fruit in this pie i...",Classic Apple Pie,2016-11-01T19:44:38.491Z,[{'name': 'Paula Haney'}],recipe,/recipes/food/views/classic-apple-pie,"{'id': '5818a37eee26df88234c36e0', 'filename':...","{'category': 'type', 'name': 'Pie', 'url': '',...",3.67,"[1/2 teaspoon all-purpose flour, 1/2 teaspoon ...","[In a small bowl, whisk together flour and sug...",3,67,1498546548
15695,54a435e66529d92b2c01841e,"Katy Hees of Santa Fe, New Mexico, writes: ""Ea...",Fried Eggs on Toast with Pepper Jack and Avocado,2006-02-01T04:00:00.000Z,[{'name': 'Katy Hees'}],recipe,/recipes/food/views/fried-eggs-on-toast-with-p...,"{'id': '5674617e47d1a28026045e4f', 'filename':...","{'category': 'cuisine', 'name': 'American', 'u...",3.14,"[3 tablespoons butter, room temperature, divid...",[Preheat broiler. Melt 1 tablespoon butter in ...,5,100,1498551821
1867,54a415ef6529d92b2c005c45,,Buffalo Meat Loaf,2004-08-20T04:00:00.000Z,[],recipe,/recipes/food/views/buffalo-meat-loaf-103049,"{'id': '561025bb7b55306961bffe6b', 'filename':...","{'category': 'ingredient', 'name': 'Game', 'ur...",3.54,"[1 cup chopped onion, 2 celery ribs, cut into ...","[Preheat oven to 375°F., Cook onion, celery, c...",34,89,1498548853
29544,54a470fb19925f464b39911c,,Pumpkin-Seed Brittle,2004-08-20T04:00:00.000Z,[],recipe,/recipes/food/views/pumpkin-seed-brittle-15334,"{'id': '56746183b47c050a284a4e15', 'filename':...","{'category': 'type', 'name': 'Candy', 'url': '...",1.89,"[1/3 cup sugar, 1/4 cup water, 1/2 cup toasted...",[In a heavy skillet combine the sugar and the ...,13,43,1498549053


In [21]:
test_prepped = prep_data(X_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [22]:
test_prepped.head()

Unnamed: 0,id,description,title,url,photo_data,ingredients,steps,category,name,imputed_label
12590,54a430de19925f464b383ac3,"Brandi Neuwirth of Cary, North Carolina, write...",Watermelon-Mint Ice Cubes,/recipes/food/views/watermelon-mint-ice-cubes-...,"{'id': '560dd76c7b55306961bfaa0b', 'filename':...",[6 cups 1-inch cubes seeded watermelon (about ...,[Puree watermelon in processor (there should b...,cuisine,American,American
32007,54a47d4b19925f464b39bff7,,Chicken Piccata,/recipes/food/views/chicken-piccata-5154,"{'id': '578d20ad95824bf90525e55a', 'filename':...","[4 skinless boneless chicken breast halves, 3 ...",[Place chicken between 2 large sheets of plast...,cuisine,Italian,Italian
2233,54a4189d6529d92b2c006364,,Oriental Pudding,/recipes/food/views/oriental-pudding-20049,"{'id': '56746183b47c050a284a4e15', 'filename':...","[1 8-ounce package pitted dates, 1/2 cup sugar...",[Cut the dates into small pieces and combine t...,cuisine,Asian,Asian
15258,54a4353219925f464b3872d9,This dish is great alongside the Lemony Chicke...,Spicy Spinach Linguine with Olive Oil and Garlic,/recipes/food/views/spicy-spinach-linguine-wit...,"{'id': '560d9943f9a84192308a1930', 'filename':...","[12 ounces spinach linguine, 6 tablespoons ext...",[Cook linguine in large pot of boiling salted ...,cuisine,Italian,Italian
19843,54a44e7b6529d92b2c01d98b,This slaw would pair nicely with grilled chick...,Curried Coleslaw with Green Onions and Currants,/recipes/food/views/curried-coleslaw-with-gree...,"{'id': '5674617e47d1a28026045e4f', 'filename':...","[1/2 cup light mayonnaise, 2 tablespoons fresh...","[Whisk mayonnaise, lime juice, and curry powde...",cuisine,American,American


In [23]:
sample_recipe = test_prepped.iloc[300]
sample_recipe

id                                        54a46d016529d92b2c028812
description      An easy-to-make sauce to accompany grilled chi...
title                                              Avocado "Pesto"
url                         /recipes/food/views/avocado-pesto-2011
photo_data       {'id': '56746182accb4c9831e45e0a', 'filename':...
ingredients      [1 medium avocado, peeled, halved, 1 cup light...
steps            [Place avocado, basil, 1/2 cup broth, garlic a...
category                                                   cuisine
name                                                       Italian
imputed_label                                              Italian
Name: 28699, dtype: object

In [24]:
sample_recipe['ingredients']

['1 medium avocado, peeled, halved',
 '1 cup lightly packed fresh basil leaves',
 '1/2 cup (or more) canned chicken broth',
 '4 large garlic cloves',
 '2 tablespoons fresh lime juice',
 '1/2 cup vegetable oil']

In [25]:
remove_cuisine = sample_recipe['imputed_label']
remove_cuisine

'Italian'

In [26]:
sample_words = transform_from_test_tfidf(ingred_tfidf, test_prepped, 300)

In [27]:
sample_words

Unnamed: 0,aceto,achiote,acid,acini,acorn,adobo,adrianascaravan,adzuki,african,agave,...,yuzu,za,zabaglione,zealand,zest,zested,zinfandel,zingermans,ziti,zucchini
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


 # Make a sub DataFrame without the target's cuisine style

In [28]:
filtered_ingred_matrix = filter_out_cuisine(ingred_word_matrix, 
                                            prepped, 
                                            remove_cuisine, 
                                            ingred_tfidf)

In [29]:
filtered_ingred_matrix

Unnamed: 0,aceto,achiote,acid,acini,acorn,adobo,adrianascaravan,adzuki,african,agave,...,yuzu,za,zabaglione,zealand,zest,zested,zinfandel,zingermans,ziti,zucchini
16343,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21621,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9853,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23180,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
27156,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25550,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24420,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
31210,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17904,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Calculate Cosine Similarity between provided recipe and database

In [30]:
filtered_ingred_matrix.iloc[10201].name

18788

In [31]:
res_cos_sim, proximity = find_closest_recipes(filtered_ingred_matrix, 
                                              sample_words, 
                                              prepped)

In [32]:
res_cos_sim

Unnamed: 0,id,description,title,url,photo_data,ingredients,steps,category,name,imputed_label
27135,54a465976529d92b2c026a40,,Black Bean Ancho Chili,/recipes/food/views/black-bean-ancho-chili-11354,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[1 pound (about 2 1/2 cups) dried black beans,...",[Halve and peel the avocado and cut it into 1/...,cuisine,Mexican,Mexican
19328,54a44c9c19925f464b38c6a6,,Basil Lime Syrup,/recipes/food/views/basil-lime-syrup-107041,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[3/4 cup sugar, Zest of 1 lime, removed in str...","[Bring sugar, zest, juice, and water to a boil...",cuisine,American,American
2031,54a416b16529d92b2c005f6b,"Margaritas, crisp Mexican beer or sangria woul...",Tortilla Soup,/recipes/food/views/tortilla-soup-103113,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[6 6-inch-diameter corn tortillas, Nonstick ve...",[Preheat oven to 350°F. Cut 2 tortillas into m...,cuisine,Mexican,Mexican
11527,54a42ee96529d92b2c012a55,,Tomato-Avocado Salsa,/recipes/food/views/tomato-avocado-salsa-238809,"{'id': '56746182accb4c9831e45e0a', 'filename':...","[One 15-ounce container purchased salsa, 1 dic...","[Mix salsa with avocado, lime juice, olive oil...",cuisine,Mexican,Mexican
26330,54a461c36529d92b2c025ac7,"(Spicy Chicken Broth with Chicken, Vegetables,...",Caldo Tlalpeno,/recipes/food/views/caldo-tlalpeno-13156,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[8 cups chicken broth, a 1 1/4-pound whole chi...",[In a large saucepan bring the broth just to a...,cuisine,Mexican,Mexican


In [33]:
proximity

array([[0.57024383],
       [0.50456492],
       [0.4883199 ],
       [0.48273546],
       [0.45938696]])

In [34]:
sample_recipe['ingredients'], [res_cos_sim['ingredients'].iloc[idx] for idx in range(0,5)]

(['1 medium avocado, peeled, halved',
  '1 cup lightly packed fresh basil leaves',
  '1/2 cup (or more) canned chicken broth',
  '4 large garlic cloves',
  '2 tablespoons fresh lime juice',
  '1/2 cup vegetable oil'],
 [['1 pound (about 2 1/2 cups) dried black beans, picked over and rinsed',
   '2 medium onions, chopped',
   '6 garlic cloves',
   '3 tablespoons vegetable oil',
   '1 tablespoon ground cumin',
   '7 1/2 cups water',
   '3 ounces (about 6) dried ancho chilies*, stemmed, seeded, and torn into pieces (wear rubber gloves)',
   'a 28-ounce can tomatoes including the juice, puréed coarse',
   '1 cup chicken broth',
   '1 red bell pepper, chopped',
   '1 teaspoon dried orégano, crumbled',
   '1/3 cup chopped fresh coriander, or to taste',
   '2 tablespoons fresh lime juice, or to taste',
   'avocado salsa (recipe follows) as an accompaniment if desired',
   'sour cream as an accompaniment if desired',
   '1 avocado (preferably California)',
   '1 1/2 tablespoons fresh lime juic

In [35]:
avo_pesto_query = [27135, 19328, 2031, 11527, 26330]

In [36]:
prepped.loc[avo_pesto_query]

Unnamed: 0,id,description,title,url,photo_data,ingredients,steps,category,name,imputed_label
27135,54a465976529d92b2c026a40,,Black Bean Ancho Chili,/recipes/food/views/black-bean-ancho-chili-11354,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[1 pound (about 2 1/2 cups) dried black beans,...",[Halve and peel the avocado and cut it into 1/...,cuisine,Mexican,Mexican
19328,54a44c9c19925f464b38c6a6,,Basil Lime Syrup,/recipes/food/views/basil-lime-syrup-107041,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[3/4 cup sugar, Zest of 1 lime, removed in str...","[Bring sugar, zest, juice, and water to a boil...",cuisine,American,American
2031,54a416b16529d92b2c005f6b,"Margaritas, crisp Mexican beer or sangria woul...",Tortilla Soup,/recipes/food/views/tortilla-soup-103113,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[6 6-inch-diameter corn tortillas, Nonstick ve...",[Preheat oven to 350°F. Cut 2 tortillas into m...,cuisine,Mexican,Mexican
11527,54a42ee96529d92b2c012a55,,Tomato-Avocado Salsa,/recipes/food/views/tomato-avocado-salsa-238809,"{'id': '56746182accb4c9831e45e0a', 'filename':...","[One 15-ounce container purchased salsa, 1 dic...","[Mix salsa with avocado, lime juice, olive oil...",cuisine,Mexican,Mexican
26330,54a461c36529d92b2c025ac7,"(Spicy Chicken Broth with Chicken, Vegetables,...",Caldo Tlalpeno,/recipes/food/views/caldo-tlalpeno-13156,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[8 cups chicken broth, a 1 1/4-pound whole chi...",[In a large saucepan bring the broth just to a...,cuisine,Mexican,Mexican


In [37]:
prepped.loc[27135]['ingredients']

['1 pound (about 2 1/2 cups) dried black beans, picked over and rinsed',
 '2 medium onions, chopped',
 '6 garlic cloves',
 '3 tablespoons vegetable oil',
 '1 tablespoon ground cumin',
 '7 1/2 cups water',
 '3 ounces (about 6) dried ancho chilies*, stemmed, seeded, and torn into pieces (wear rubber gloves)',
 'a 28-ounce can tomatoes including the juice, puréed coarse',
 '1 cup chicken broth',
 '1 red bell pepper, chopped',
 '1 teaspoon dried orégano, crumbled',
 '1/3 cup chopped fresh coriander, or to taste',
 '2 tablespoons fresh lime juice, or to taste',
 'avocado salsa (recipe follows) as an accompaniment if desired',
 'sour cream as an accompaniment if desired',
 '1 avocado (preferably California)',
 '1 1/2 tablespoons fresh lime juice, or to taste',
 '1/2 cup finely chopped red onion',
 '1 fresh or pickled jalapeño, seeded and minced (wear rubber gloves)',
 '*available at Hispanic markets, some specialty foods shops, and some supermarkets']

---

In [38]:
brint = 74

In [39]:
brian_tries = test_prepped.iloc[brint]

In [40]:
brian_tries

id                                        54a42b4819925f464b37f61d
description      Lemon juice and sugar bring out the natural ju...
title                                        Strawberry Shortcakes
url               /recipes/food/views/strawberry-shortcakes-242601
photo_data       {'id': '560d788d7b55306961bf3424', 'filename':...
ingredients      [4 pints strawberries, lightly rinsed, hulled ...
steps            [1. Preheat the oven to 400°F. Grease a baking...
category                                                   cuisine
name                                                      American
imputed_label                                             American
Name: 9400, dtype: object

In [42]:
brian_sample_words = transform_from_test_tfidf(ingred_tfidf, 
                                               test_prepped, 
                                               brint)

In [43]:
brian_filtered_ingred_matrix = filter_out_cuisine(ingred_word_matrix, 
                                                  prepped, 
                                                  remove_cuisine, 
                                                  ingred_tfidf)

In [44]:
br_res_cos_sim, br_proximity = find_closest_recipes(brian_filtered_ingred_matrix, 
                                                    brian_sample_words, 
                                                    prepped)

In [45]:
br_res_cos_sim

Unnamed: 0,id,description,title,url,photo_data,ingredients,steps,category,name,imputed_label
1021,54a4101d6529d92b2c004a90,,Strawberry-Cheesecake Ice Cream,/recipes/food/views/strawberry-cheesecake-ice-...,"{'id': '56746182accb4c9831e45e0a', 'filename':...","[3/4 pound (1 quart) strawberries, 8 ounces so...","[To make the ice cream, purée the strawberries...",cuisine,American,American
15663,54a435da6529d92b2c01837c,This recipe is an accompaniment for Vanilla Mo...,Strawberry Coulis,/recipes/food/views/strawberry-coulis-234239,"{'id': '57ab6bfa5dc5a16b3e65cb4c', 'filename':...","[1 cup frozen unsweetened strawberries, 1/2 cu...","[1. In a medium saucepan, combine the strawber...",cuisine,American,American
17480,54a43ca919925f464b38a227,"Almost any berry, or combination of berries, c...",Summer Strawberry Jam,/recipes/food/views/summer-strawberry-jam-234745,"{'id': '56746182accb4c9831e45e0a', 'filename':...","[1 1/2 cups sugar, 1/2 cup water, 1 tablespoon...",[Bring first 3 ingredients to boil in large sa...,cuisine,American,American
32402,54a47f6519925f464b39c7a1,Fool is a classic English dessert made by comb...,Strawberry Fool Tartlets,/recipes/food/views/strawberry-fool-tartlets-1290,"{'id': '56746182accb4c9831e45e0a', 'filename':...","[2 cups unbleached all purpose flour, 1/4 teas...",[Mix four and salt in processor. Add butter an...,cuisine,English,English
16603,54a4392319925f464b388fac,Actress Marissa Matrone shares a recipe her gr...,Sour Cream-Strawberry Surprise,/recipes/food/views/sour-cream-strawberry-surp...,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[2 pints strawberries, hulled and halved, 16 o...",[Mix strawberries into sour cream. Add sugar t...,cuisine,American,American


In [46]:
br_proximity

array([[0.79190194],
       [0.77745165],
       [0.72749184],
       [0.71655004],
       [0.71207769]])

In [47]:
br_res_cos_sim['ingredients'].iloc[4]

['2 pints strawberries, hulled and halved',
 '16 oz lowfat sour cream',
 'Raw sugar']

---

In [48]:
eint = 200
erin_tries = test_prepped.iloc[eint]
erin_tries

id                                        54a432836529d92b2c01577b
description                                                       
title                                      Apple Prune Brown Betty
url              /recipes/food/views/apple-prune-brown-betty-10...
photo_data       {'id': '56746182accb4c9831e45e0a', 'filename':...
ingredients      [3/4 cup pitted prunes (dried plums; 6 ounces)...
steps            [Put oven rack in middle position and preheat ...
category                                                   cuisine
name                                                      American
imputed_label                                             American
Name: 13595, dtype: object

In [49]:
erin_sample_words = transform_from_test_tfidf(ingred_tfidf, 
                                               test_prepped, 
                                               eint)
erin_filtered_ingred_matrix = filter_out_cuisine(ingred_word_matrix, 
                                                  prepped, 
                                                  remove_cuisine, 
                                                  ingred_tfidf)
er_res_cos_sim, er_proximity = find_closest_recipes(erin_filtered_ingred_matrix, 
                                                    erin_sample_words, 
                                                    prepped)

In [50]:
er_res_cos_sim

Unnamed: 0,id,description,title,url,photo_data,ingredients,steps,category,name,imputed_label
13042,54a4319319925f464b3843f7,"Sprinkled with cinnamon sugar, these little pr...",Sugar-Glazed Prune Tartlets,/recipes/food/views/sugar-glazed-prune-tartlet...,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[Butter for greasing baking sheet, 1 frozen pu...",[Put oven rack in upper third of oven and preh...,cuisine,American,American
19743,54a44e156529d92b2c01d73e,"""I'm a social worker running my own practice,""...",Cinnamon-Sugar Plum Cake,/recipes/food/views/cinnamon-sugar-plum-cake-1...,"{'id': '5674617eb47c050a284a4e11', 'filename':...","[1 1/4 cups all purpose flour, 1 teaspoon baki...",[Preheat oven to 350°F. Butter 9-inch-diameter...,cuisine,American,American
20891,54a451dd6529d92b2c01ef45,This recipe is an accompaniment for Apple Cris...,Prune Tequila Ice Cream,/recipes/food/views/prune-tequila-ice-cream-10...,"{'id': '56746182accb4c9831e45e0a', 'filename':...","[3/4 cup prunes (dried plums; about 5 oz), 1/3...",[Pack prunes into a 1/2-pint jar and add tequi...,cuisine,Mexican,Mexican
20225,54a44fda19925f464b38d9c6,Oregon's Willamette Valley produces great wine...,Apple-Prune Crisp with Hazelnut Topping,/recipes/food/views/apple-prune-crisp-with-haz...,"{'id': '560ea13ef9a84192308a94c7', 'filename':...","[1/3 cup sugar, 3 tablespoons (packed) golden ...",[Preheat oven to 375°F. Butter 8-inch glass ba...,cuisine,American,American
16934,54a43ab16529d92b2c019efc,,Prune and Caraway Ice Cream,/recipes/food/views/prune-and-caraway-ice-crea...,"{'id': '56746183b47c050a284a4e15', 'filename':...","[6 cups half-and-half (48 fluid ounces), 2 tab...",[Bring half-and-half with caraway seeds just t...,cuisine,Eastern European/Russian,Eastern European


In [51]:
er_proximity

array([[0.50917087],
       [0.50650263],
       [0.42804867],
       [0.42274583],
       [0.41150213]])