In [30]:
import json
import joblib
import csv
import re
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize
import nltk
from nltk.corpus import stopwords
from nltk import word_tokenize, FreqDist
from nltk.stem.wordnet import WordNetLemmatizer
import string
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances
from sklearn.feature_extraction.text import TfidfVectorizer

In [37]:
stopwords_loc = "../write_data/food_stopwords.csv"
with open(stopwords_loc, "r") as myfile:
    reader = csv.reader(myfile)
    food_stopwords = [col for row in reader for col in row]

stopwords_list = stopwords.words('english') + list(string.punctuation) + food_stopwords
lemmatizer = WordNetLemmatizer()

In [38]:
stopwords_list

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [3]:
filename = "../write_data/test_api_hits.json"
with open(filename, 'r') as f:
    datastore = json.load(f)

In [4]:
datastore[0]['recipe'].keys()

dict_keys(['uri', 'label', 'image', 'source', 'url', 'shareAs', 'yield', 'dietLabels', 'healthLabels', 'cautions', 'ingredientLines', 'ingredients', 'calories', 'totalWeight', 'totalNutrients', 'totalDaily', 'digest'])

In [5]:
url = datastore[0]['recipe']['url']

In [6]:
[item['food'] for item in datastore[0]['recipe']['ingredients']]

['bread',
 'refried beans',
 'salsa',
 'mozzarella cheese',
 'red onions',
 'jalapeño peppers']

In [7]:
urls = []
labels = []
sources = []
ingreds = []

for recipe in datastore:
    recipe_path = recipe['recipe']
    urls.append(recipe_path['url'])
    labels.append(recipe_path['label'])
    sources.append(recipe_path['source'])
    ingreds.append([item['food'] for item in recipe_path['ingredients']])
    
all_recipes = {'url': urls,
               'label': labels, 
               'source': sources, 
               'ingredients': ingreds
              }

recipe_df = pd.DataFrame(all_recipes)

In [8]:
recipe_df

Unnamed: 0,url,label,source,ingredients
0,http://www.seriouseats.com/recipes/2014/03/por...,Pork Carnitas and Oaxaca Cheese Shooter's-Styl...,Serious Eats,"[bread, refried beans, salsa, mozzarella chees..."
1,http://www.marthastewart.com/1140889/carnitas,Carnitas,Martha Stewart,"[meat, coarse salt, water]"
2,http://pinchofyum.com/carnitas-tamale-pie,Carnitas Tamale Pie recipes,Pinch of Yum,"[all-purpose flour, yellow corn meal, granulat..."
3,http://www.davidlebovitz.com/carnitas/,Carnitas Recipe,David Lebovitz,"[pork shoulder, sea salt, vegetable oil, Water..."
4,http://www.myrecipes.com/recipe/beef-carnitas-...,Beef Carnitas Tacos,My Recipes,"[plum tomato, onion, avocado, cilantro, lime j..."
5,http://www.bonappetit.com/recipe/beer-braised-...,Beer-Braised Carnitas,Bon Appetit,"[guajillo chiles, pork shoulder, lager, garlic..."
6,http://www.thekitchn.com/slowcooking-in-the-su...,Heatwave Carnitas,The Kitchn,"[pork shoulder, coarse salt, cumin, black pepp..."
7,https://food52.com/recipes/12328-less-is-more-...,Less is more Carnitas,Food52,"[pork shoulder, bay leaves, Salt, red onion, c..."
8,http://honestcooking.com/crockpot-beef-carnitas/,Crockpot Beef Carnitas,Honest Cooking,"[flat iron steak, yellow onion, white onion, g..."
9,http://chezus.com/2013/05/02/pork-carnitas/,Pork Carnitas Tacos recipes,Chez Us,"[pork shoulder, sweetened condensed milk, wate..."


In [23]:
one_recipe =[]
for listing in recipe_df['ingredients']:
    for ingred in listing:
        one_recipe.append(ingred.lower())
    
one_recipe = list(set(one_recipe))
one_recipe

['onion powder',
 'beef',
 'chipotle peppers',
 'green chiles',
 'jalapeno',
 'lager',
 'flat iron steak',
 'oregano',
 'coarse salt',
 'granulated sugar',
 'bread',
 'red chili pepper',
 'corn tortillas',
 'tomato juice',
 'white onion',
 'avocado',
 'garlic',
 'cilantro',
 'chili powder',
 'plum tomato',
 'cheese',
 'baking powder',
 'garlic powder',
 'dried oregano',
 'corn',
 'salt',
 'lime',
 'black pepper',
 'sea salt',
 'water',
 'refried beans',
 'jalapeño peppers',
 'milk',
 'enchilada sauce',
 'ancho chile powder',
 'ground cinnamon',
 'vegetable oil',
 'yellow onion',
 'bay leaves',
 'red onion',
 'limes',
 'meat',
 'sweetened condensed milk',
 'orange juice',
 'salsa',
 'chile powder',
 'cumin',
 'yellow corn meal',
 'pork shoulder',
 'jalapeño',
 'mozzarella cheese',
 'egg',
 'cayenne pepper',
 'onion',
 'red onions',
 'guajillo chiles',
 'green bell pepper',
 'all-purpose flour',
 'lime juice',
 'kosher salt',
 'cinnamon stick']

In [27]:
search_recipe = pd.DataFrame(data= {'name': "carnitas", 'ingredients': [one_recipe], 'cuisine': "Mexican"})
search_recipe

Unnamed: 0,name,ingredients,cuisine
0,carnitas,"[onion powder, beef, chipotle peppers, green c...",Mexican


In [58]:
search_recipe['ingredients']

0    [onion powder, beef, chipotle peppers, green c...
Name: ingredients, dtype: object

In [31]:
with open("joblib/recipe_dataframe.joblib", "rb") as fo:
  prepped = joblib.load("joblib/recipe_dataframe.joblib")

with open("joblib/recipe_tfidf.joblib", "rb") as fo:
  ingred_tfidf = joblib.load("joblib/recipe_tfidf.joblib")

with open("joblib/recipe_word_matrix.joblib", "rb") as fo:
  ingred_word_matrix = joblib.load("joblib/recipe_word_matrix.joblib")

In [92]:
def transform_tfidf(tfidf, recipe):
    recipe = [' '.join(recipe['ingredients'][0])]
    response = tfidf.transform(recipe)
    transformed_recipe = pd.DataFrame(response.toarray(),
                                      columns=tfidf.get_feature_names())
    return transformed_recipe

In [33]:
def filter_out_cuisine(ingred_word_matrix, X_df, cuisine_name, tfidf):
    combo = pd.concat([ingred_word_matrix, X_df['imputed_label']], axis=1)
    filtered_ingred_word_matrix = combo[combo['imputed_label'] != cuisine_name].drop('imputed_label', 
                                                                     axis=1)
    return filtered_ingred_word_matrix

In [109]:
def find_closest_recipes(filtered_ingred_word_matrix, recipe_tfidf, X_df):
    search_vec = np.array(recipe_tfidf).reshape(1,-1)
    res_cos_sim = cosine_similarity(filtered_ingred_word_matrix, search_vec)
    top_five = np.argsort(res_cos_sim.flatten())[-5:][::-1]
    proximity = res_cos_sim[top_five]
    recipe_ids = [filtered_ingred_word_matrix.iloc[idx].name for idx in top_five]
    suggest_df = X_df.loc[recipe_ids]
    return suggest_df, proximity

In [90]:
carnitas_test = transform_tfidf(ingred_tfidf, search_recipe)

['onion powder beef chipotle peppers green chiles jalapeno lager flat iron steak oregano coarse salt granulated sugar bread red chili pepper corn tortillas tomato juice white onion avocado garlic cilantro chili powder plum tomato cheese baking powder garlic powder dried oregano corn salt lime black pepper sea salt water refried beans jalapeño peppers milk enchilada sauce ancho chile powder ground cinnamon vegetable oil yellow onion bay leaves red onion limes meat sweetened condensed milk orange juice salsa chile powder cumin yellow corn meal pork shoulder jalapeño mozzarella cheese egg cayenne pepper onion red onions guajillo chiles green bell pepper all-purpose flour lime juice kosher salt cinnamon stick']
  (0, 1998)	0.17598389088906438
  (0, 1979)	0.056080614380353555
  (0, 1968)	0.05285686089460608
  (0, 1932)	0.059865504719016166
  (0, 1896)	0.10794720860072066
  (0, 1888)	0.1772438351124114
  (0, 1808)	0.045450332127821846
  (0, 1796)	0.11969684737826837
  (0, 1717)	0.11875639643

In [91]:
carnitas_test

Unnamed: 0,aceto,achiote,acid,acini,acorn,adobo,adrianascaravan,adzuki,african,agave,...,yuzu,za,zabaglione,zealand,zest,zested,zinfandel,zingermans,ziti,zucchini
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [103]:
carnitas_matrix = filter_out_cuisine(ingred_word_matrix=ingred_word_matrix, 
                                     X_df=prepped, 
                                     cuisine_name=search_recipe['cuisine'].values[0], 
                                     tfidf=ingred_tfidf)

In [100]:
cuisine_name=search_recipe['cuisine'].values[0]

In [101]:
cuisine_name

'Mexican'

In [104]:
carnitas_matrix

Unnamed: 0,aceto,achiote,acid,acini,acorn,adobo,adrianascaravan,adzuki,african,agave,...,yuzu,za,zabaglione,zealand,zest,zested,zinfandel,zingermans,ziti,zucchini
16343,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11650,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21621,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9853,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23180,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24420,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
31210,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17904,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [110]:
carnitas_similar, carnitas_similar_vals = find_closest_recipes(filtered_ingred_word_matrix=carnitas_matrix, 
                                                               recipe_tfidf=carnitas_test, 
                                                               X_df=prepped)

In [111]:
carnitas_similar

Unnamed: 0,id,description,title,url,photo_data,ingredients,steps,category,name,imputed_label
9726,54a42be66529d92b2c010298,,Southwestern Shepherd's Pie,/recipes/food/views/southwestern-shepherds-pie...,"{'id': '54b3ea30460b4423363f7cd7', 'filename':...","[2 tablespoons olive oil, 1 medium onion, dice...",[1. Heat oil in a large pot over medium heat. ...,cuisine,Southwestern,Southwestern
17639,54a43dde19925f464b38a4e7,"Robyn Fuoco of Mainly Café and Bakery, Philade...",Vegetarian Black Bean Chili,/recipes/food/views/vegetarian-black-bean-chil...,"{'id': '578d1fe51edb4f17303e3f0f', 'filename':...","[1/4 cup olive oil, 2 cups chopped onions, 1 2...",[Heat oil in heavy large pot over medium-high ...,cuisine,American,American
14117,54a433566529d92b2c016251,Chipotle chiles are quite spicy; choosing a mi...,Turkey Enchiladas Adobo,/recipes/food/views/turkey-enchiladas-adobo-10...,"{'id': '56746183b47c050a284a4e15', 'filename':...","[8 6-inch-diameter corn tortillas, 2 tablespoo...",[Preheat oven to 350°F. Stack tortillas and wr...,cuisine,American,American
22671,54a454ef6529d92b2c021387,"Serve with: Warmed tortillas and steamed, butt...",Chipotle Turkey Cutlets with Charred Corn Salsa,/recipes/food/views/chipotle-turkey-cutlets-wi...,"{'id': '560df908f3a00aeb2f1d64b1', 'filename':...","[1 1/2 cups frozen corn kernels, thawed, 1 1/4...",[Char corn in heavy medium nonstick skillet ov...,cuisine,Southwestern,Southwestern
12019,54a42fc26529d92b2c013593,Editor's note: This recipe was included with t...,Tomato and Corn Salsa,/recipes/food/views/tomato-and-corn-salsa-235222,"{'id': '5674617e47d1a28026045e4f', 'filename':...","[1 small red onion, chopped, 1/2 teaspoon salt...","[Place the chopped onion in a bowl, sprinkle w...",cuisine,American,American


In [112]:
carnitas_similar_vals

array([[0.48331816],
       [0.44876434],
       [0.44436822],
       [0.43775869],
       [0.43029497]])

In [9]:
def isFloat(string):
    try:
        float(string)
        return True
    except ValueError:
        return False

In [10]:
def convert_to_float(frac_str):
    try:
        return float(frac_str)
    except ValueError:
        num, denom = frac_str.split('/')
        try:
            leading, num = num.split(' ')
            whole = float(leading)
        except ValueError:
            return frac_str
        frac = float(num) / float(denom)
        return whole - frac if whole < 0 else whole + frac

In [11]:
def tokenize_recipes(df):
    token_recipes = []
    for recipe in df['ingredients']:
        tokens = [word_tokenize(word) for word in recipe]
        flat_tkns = list(_flatten(tokens))
        stopped_tokens = [ w.lower() #re.search([0-9]+\/[0-9]+,
                          for w in flat_tkns 
                          if w not in stopwords_list 
                          if (isFloat(w) == False)]
        lemma_tokens = [lemmatizer.lemmatize(token) for token in stopped_tokens]
        token_recipes.append(lemma_tokens)
    return token_recipes

In [12]:
def _flatten(list_of_lists):
    for x in list_of_lists:
        if hasattr(x, '__iter__') and not isinstance(x, str):
            for y in _flatten(x):
                yield y
        else:
            yield x

In [13]:
all_ingreds = tokenize_recipes(recipe_df)
all_ingreds

[['bread',
  'refried',
  'bean',
  'salsa',
  'mozzarella',
  'cheese',
  'red',
  'onion',
  'jalapeño',
  'pepper'],
 ['meat', 'salt', 'water'],
 ['all-purpose',
  'flour',
  'yellow',
  'corn',
  'meal',
  'granulated',
  'sugar',
  'baking',
  'powder',
  'salt',
  'vegetable',
  'oil',
  'green',
  'chile',
  'milk',
  'egg',
  'corn',
  'enchilada',
  'sauce',
  'cheese'],
 ['pork',
  'shoulder',
  'salt',
  'vegetable',
  'oil',
  'water',
  'cinnamon',
  'stick',
  'chile',
  'powder',
  'ancho',
  'chile',
  'powder',
  'bay',
  'leaf',
  'cumin',
  'garlic'],
 ['plum',
  'tomato',
  'onion',
  'avocado',
  'cilantro',
  'lime',
  'juice',
  'salt',
  'black',
  'pepper',
  'corn',
  'tortilla',
  'beef',
  'lime'],
 ['guajillo', 'chile', 'pork', 'shoulder', 'lager', 'garlic', 'salt'],
 ['pork',
  'shoulder',
  'salt',
  'cumin',
  'black',
  'pepper',
  'oregano',
  'cinnamon',
  'cayenne',
  'pepper',
  'garlic',
  'chipotle',
  'pepper',
  'tomato',
  'juice',
  'orange',


In [14]:
'bone' in(set(list(_flatten(all_ingreds))))

False

In [15]:
list(_flatten(all_ingreds))

['bread',
 'refried',
 'bean',
 'salsa',
 'mozzarella',
 'cheese',
 'red',
 'onion',
 'jalapeño',
 'pepper',
 'meat',
 'salt',
 'water',
 'all-purpose',
 'flour',
 'yellow',
 'corn',
 'meal',
 'granulated',
 'sugar',
 'baking',
 'powder',
 'salt',
 'vegetable',
 'oil',
 'green',
 'chile',
 'milk',
 'egg',
 'corn',
 'enchilada',
 'sauce',
 'cheese',
 'pork',
 'shoulder',
 'salt',
 'vegetable',
 'oil',
 'water',
 'cinnamon',
 'stick',
 'chile',
 'powder',
 'ancho',
 'chile',
 'powder',
 'bay',
 'leaf',
 'cumin',
 'garlic',
 'plum',
 'tomato',
 'onion',
 'avocado',
 'cilantro',
 'lime',
 'juice',
 'salt',
 'black',
 'pepper',
 'corn',
 'tortilla',
 'beef',
 'lime',
 'guajillo',
 'chile',
 'pork',
 'shoulder',
 'lager',
 'garlic',
 'salt',
 'pork',
 'shoulder',
 'salt',
 'cumin',
 'black',
 'pepper',
 'oregano',
 'cinnamon',
 'cayenne',
 'pepper',
 'garlic',
 'chipotle',
 'pepper',
 'tomato',
 'juice',
 'orange',
 'juice',
 'pork',
 'shoulder',
 'bay',
 'leaf',
 'salt',
 'red',
 'onion',
 

In [None]:
test = set(all_ingreds[1]).intersection(_flatten(all_ingreds))
test

In [None]:
isFloat('1/3')

In [None]:
convert_to_float('1/3')

In [None]:
for element in recipe_df['ingredients'][1]:
    print(re.search('[0-9]+\/[0-9]+', element.lower()))

In [None]:
for element in recipe_df['ingredients'][1]:
    print(element)