In [8]:
import pandas as pd
import numpy as np

# from sentence_transformers import SentenceTransformer, util
# import torch

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
import string

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

import ast

In [10]:
combined_foodcom = pd.read_csv('combined_foodcom.csv')
# food_data

In [181]:
combined_foodcom['ingredients_x'] = combined_foodcom['ingredients_x'].apply(lambda x:ast.literal_eval(x))
combined_foodcom['NER'] = combined_foodcom['NER'].apply(lambda x:ast.literal_eval(x))

In [123]:
ingredients = 'bow-tie pasta, olive oil, tomato sauce, chedder, mushrooms, zucchini, chicken, onion, garlic'
condiments = ', salt, sugar'
query = ingredients + condiments

### Concerns
- keyword: exact item match 
- semantic: understanding of category/exchange (pasta - specific types of pasta; oil - exchange with other oil; lemon - lemon juice and lemon zest) 
- assumed condiments: salt, pepper, sugar, soy sauce(?), water
- high similarty, at the same time only 1-3 ingredients not in the input
- if in the title, key ingredient that must be included
- use only items I have / allows for a few that I don't have 
- allows exchange 


semantic search seems to assume too much based on the input

### More refinement on TFidf / keyword search
- currently it works the best 
- plus NLTK stopwords removal and stemming in name and do keyword search

### Name Entity Recognition

In [101]:
len(query.split())

15

In [182]:
data = combined_foodcom[combined_foodcom['n_ingredients'] <= 12]

In [125]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /home/jix028/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [126]:
names = data['name']
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
def clean_doc(document):
    doc_clean = re.sub(r'[^\x00-\x7F]+', ' ', document)
    doc_clean = re.sub(r'@\w+', '', doc_clean)
    doc_clean = doc_clean.lower()
    doc_clean = re.sub(r'[%s]' % re.escape(string.punctuation), ' ', doc_clean)
    doc_clean = re.sub(r'[0-9]', '', doc_clean)
    doc_clean = re.sub(r'\s{2,}', ' ', doc_clean)
    
    return doc_clean

In [127]:
name_tokens = []
for doc in names.fillna('missing'):
    doc = clean_doc(doc)
    cur_token = doc.lower().split()
    filtered_token = [lemmatizer.lemmatize(w) for w in cur_token if not w in stop_words]
    name_tokens.append(' '.join(filtered_token))
name_vectorizer = TfidfVectorizer().fit(name_tokens)
name_emb = name_vectorizer.transform(name_tokens)

In [128]:
## query part
query_name = name_vectorizer.transform([query])
cosine_sim_name = cosine_similarity(name_emb, query_name)

In [129]:
data['n_score'] = cosine_sim_name
output_name = data.sort_values(by = 'n_score', ascending=False)[:1000]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['n_score'] = cosine_sim_name


In [130]:
output_name

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,...,n_ingredients,title,ingredients_y,directions,link,source,NER,website,n_score,score
26796,bow ties with zucchini in lemon garlic sauce,190435,30,142878,2006-10-14,"['30-minutes-or-less', 'time-to-make', 'course...","[584.4, 36.0, 37.0, 4.0, 35.0, 57.0, 25.0]",11,['cook pasta in large sacuepan of boiling wate...,this is a great vegetarian recipe from the aus...,...,10,bow ties with zucchini in lemon garlic sauce,"[""375 g bow tie pasta"", ""3 medium yellow zucch...","[""Cook pasta in large sacuepan of boiling wate...",www.food.com/recipe/bow-ties-with-zucchini-in-...,Gathered,"[""pasta"", ""yellow zucchini"", ""green zucchini"",...",www.food.com,0.550898,0.641703
26762,bow tie pasta with zucchini tomato and basil,71851,25,95743,2003-09-30,"['30-minutes-or-less', 'time-to-make', 'course...","[454.8, 28.0, 17.0, 7.0, 34.0, 38.0, 18.0]",9,"['boil pasta in salted water until al dente', ...","this meatless dish is fast, easy, healthy and ...",...,7,"bow tie pasta with zucchini, tomato and basil","[""10 ounces bow tie pasta"", ""2 small zucchini,...","[""Boil pasta in salted water until al dente; d...",www.food.com/recipe/bow-tie-pasta-with-zucchin...,Gathered,"[""pasta"", ""zucchini"", ""salt"", ""olive oil"", ""go...",www.food.com,0.543916,0.699601
88454,garlic olive oil pasta with tomatoes and chicken,447723,40,1809501,2011-01-31,"['60-minutes-or-less', 'time-to-make', 'course...","[809.0, 53.0, 19.0, 6.0, 67.0, 58.0, 29.0]",9,['in a medium size skillet add 1 can chicken b...,this is an easy pasta recipe with tons of flav...,...,11,garlic olive oil pasta with tomatoes and chicken,"[""3/4 lb chicken breast, cubed"", ""1 lb spaghet...","[""In a medium size skillet add 1 can chicken b...",www.food.com/recipe/garlic-olive-oil-pasta-wit...,Gathered,"[""chicken breast"", ""garlic"", ""garlic"", ""parsle...",www.food.com,0.527381,0.177952
42289,chicken bow tie pasta,99986,45,162774,2004-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[909.9, 99.0, 7.0, 27.0, 73.0, 89.0, 15.0]",11,['cook chicken breast in 2 tbsp of oil until l...,this is one of my 11 year olds favorite recipe...,...,9,chicken bow tie pasta,"[""1/2 cup margarine"", ""1 cup half-and-half"", ""...","[""Cook chicken breast in 2 TBSP of oil until l...",www.food.com/recipe/chicken-bow-tie-pasta-99986,Gathered,"[""margarine"", ""garlic"", ""pasta"", ""chicken brea...",www.food.com,0.526300,0.517247
41605,chicken and bow tie pasta,189024,40,357591,2006-10-04,"['60-minutes-or-less', 'time-to-make', 'course...","[654.8, 53.0, 5.0, 15.0, 72.0, 64.0, 15.0]",14,"['cook pasta according to package directions',...",tender chicken tossed in a sun dried tomato wi...,...,12,chicken and bow tie pasta,"[""8 ounces bow tie pasta"", ""2 garlic cloves, m...","[""Cook pasta according to package directions; ...",www.food.com/recipe/chicken-and-bow-tie-pasta-...,Gathered,"[""pasta"", ""garlic"", ""olive oil"", ""chicken brea...",www.food.com,0.526300,0.468190
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80983,fava beans with tomato garlic sauce,34718,45,4470,2002-07-23,"['60-minutes-or-less', 'time-to-make', 'course...","[224.0, 8.0, 21.0, 2.0, 22.0, 4.0, 11.0]",4,['heat oil in a large saucepan& over medium he...,if you haven't tried fava beans you are in for...,...,10,fava beans with tomato garlic sauce,"[""2 (19 ounce) cans fava beans (use fresh if y...","[""Heat oil in a large saucepan& over medium he...",www.food.com/recipe/fava-beans-with-tomato-gar...,Gathered,"[""fava beans"", ""olive oil"", ""onions"", ""garlic""...",www.food.com,0.193606,0.166446
42302,chicken breast and white garlic sauce,386991,25,1362303,2009-08-24,"['30-minutes-or-less', 'time-to-make', 'course...","[608.8, 83.0, 22.0, 4.0, 38.0, 156.0, 4.0]",7,"['take a saucepan and add the oil , the diced ...",a nice recipe that my husband ate so fast....:...,...,9,chicken breast and white garlic sauce,"[""1 big chicken breasts or 1 big several small...","[""Take a saucepan and add the oil, the diced o...",www.food.com/recipe/chicken-breast-and-white-g...,Recipes1M,"[""chicken breasts"", ""onion"", ""garlic"", ""cream""...",www.food.com,0.193546,0.193933
129062,mashed garlic onion potatoes,10445,40,4470,2001-07-26,"['60-minutes-or-less', 'time-to-make', 'main-i...","[225.1, 6.0, 13.0, 9.0, 10.0, 8.0, 14.0]",12,"['cook potatoes until fork tender', 'while pot...",garlic lovers unite! this is a really tasty wa...,...,9,mashed garlic & onion potatoes,"[""6 medium russet potatoes, scrubbed"", ""1 tabl...","[""Cook potatoes until fork tender (about 30 mi...",www.food.com/recipe/mashed-garlic-onion-potato...,Gathered,"[""russet potatoes"", ""olive oil"", ""garlic"", ""ye...",www.food.com,0.193490,0.107996
87943,garlic onion mashed potatoes,64122,35,86093,2003-06-09,"['60-minutes-or-less', 'time-to-make', 'course...","[181.3, 8.0, 8.0, 1.0, 7.0, 17.0, 9.0]",8,"['peel potatoes , cut into large chunks and pl...",i got this recipe from a friend who comes from...,...,9,garlic & onion mashed potatoes,"[""2 lbs russet potatoes"", ""1 medium yellow oni...","[""Peel potatoes, cut into large chunks and pla...",www.food.com/recipe/garlic-onion-mashed-potato...,Gathered,"[""potatoes"", ""yellow onion"", ""garlic"", ""sour c...",www.food.com,0.193490,0.123760


In [131]:
data['name']

0           arriba   baked winter squash mexican style
1                     a bit different  breakfast pizza
3                                   alouette  potatoes
4                   amish  tomato ketchup  for canning
5                              apple a day  milk shake
                              ...                     
226884                                    zydeco sauce
226885                              zydeco shrimp wrap
226888                       zydeco ya ya deviled eggs
226889          cookies by design   cookies on a stick
226890    cookies by design   sugar shortbread cookies
Name: name, Length: 189160, dtype: object

In [185]:
input_text = []
for i in data['NER']:
    input_text.append(', '.join(i))
corpus = input_text
vectorizer = TfidfVectorizer().fit(corpus)
corpus_emb = vectorizer.transform(corpus)

In [186]:

query_emb = vectorizer.transform([query])

In [187]:
cosine_sim = cosine_similarity(corpus_emb, query_emb)
data['score'] = cosine_sim
output = data.sort_values(by = 'score', ascending=False)[:1000]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['score'] = cosine_sim


In [188]:
output[:20]

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,...,n_ingredients,title,ingredients_y,directions,link,source,NER,website,n_score,score
88198,garlic chicken pasta,307421,25,643714,2008-06-05,"['30-minutes-or-less', 'time-to-make', 'course...","[437.4, 21.0, 14.0, 5.0, 41.0, 37.0, 20.0]",10,"['cook pasta according to box directions', 'sa...","if you're a big garlic fan, you may want to in...",...,12,garlic chicken pasta,"[""2 boneless skinless chicken breasts, chopped...","[""Cook pasta according to box directions."", ""S...",www.food.com/recipe/garlic-chicken-pasta-307421,Gathered,"[chicken breasts, tomatoes, red bell pepper, r...",www.food.com,0.304217,0.576416
34443,caprese pasta salad,421785,70,622402,2010-04-26,"['time-to-make', 'preparation', '4-hours-or-le...","[291.4, 15.0, 5.0, 4.0, 17.0, 7.0, 14.0]",10,['toss the mozzarella and cherry tomatoes with...,this is a great little side dish for a bbq and...,...,9,caprese pasta salad,"[""1 1/2 cups bite-size fresh mozzarella cheese...","[""Toss the mozzarella and cherry tomatoes with...",www.food.com/recipe/caprese-pasta-salad-421785,Gathered,"[bite-size fresh mozzarella cheese, cherry tom...",www.food.com,0.088928,0.5048
61278,creamy chicken bacon tomato pasta,349029,25,346383,2009-01-11,"['30-minutes-or-less', 'time-to-make', 'course...","[267.7, 20.0, 13.0, 18.0, 62.0, 28.0, 1.0]",16,"['cook pasta as directed on package', 'meanwhi...",this recipe was featured in an email from the ...,...,8,"creamy chicken, bacon & tomato pasta","[""3 cups whole wheat bow tie pasta, uncooked (...","[""COOK pasta as directed on package."", ""MEANWH...",www.food.com/recipe/creamy-chicken-bacon-tomat...,Gathered,"[whole wheat bow tie pasta, chicken breast, ba...",www.food.com,0.210312,0.50116
213977,turkey or chicken sandwich,222284,5,464972,2007-04-12,"['15-minutes-or-less', 'time-to-make', 'course...","[206.1, 13.0, 18.0, 19.0, 7.0, 6.0, 9.0]",6,['put the turkey / chicken on your bottom piec...,"a quick yummy sandwich, that i know you'll love!",...,4,turkey/or chicken sandwich,"[""chopped chicken (amount to your liking) or t...","[""Put the turkey/chicken on your bottom piece ...",www.food.com/recipe/turkey-or-chicken-sandwich...,Recipes1M,"[chicken, chedder cheese, swiss cheese, dressi...",www.food.com,0.051144,0.402822
140716,nice easy italian chicken,392001,360,883095,2009-09-28,"['course', 'main-ingredient', 'preparation', '...","[459.9, 17.0, 80.0, 40.0, 112.0, 13.0, 10.0]",4,['combine all ingredients except pasta in slow...,this is a perfect busy weeknight meal that can...,...,7,nice easy italian chicken,"[""4 bonelelss skinless chicken breasts"", ""250 ...","[""Combine all ingredients except pasta in slow...",www.food.com/recipe/nice-easy-italian-chicken-...,Recipes1M,"[chicken breasts, mushrooms, green bell pepper...",www.food.com,0.034274,0.335119
226208,zucchini and mushroom skillet,121202,35,35526,2005-05-09,"['60-minutes-or-less', 'time-to-make', 'course...","[43.2, 0.0, 19.0, 0.0, 6.0, 0.0, 2.0]",5,['cut zucchini in half and then into 1 inch pi...,"low carb, delicious and simple to make.",...,7,zucchini and mushroom skillet,"[""2 medium zucchini"", ""8 ounces fresh mushroom...","[""Cut zucchini in half and then into 1 inch pi...",www.food.com/recipe/zucchini-and-mushroom-skil...,Gathered,"[zucchini, mushrooms, onion, olive oil, garlic...",www.food.com,0.127436,0.328996
132020,mexican zucchini,231112,40,481092,2007-05-29,"['60-minutes-or-less', 'time-to-make', 'course...","[56.1, 2.0, 14.0, 12.0, 6.0, 1.0, 3.0]",6,['saut onion and mushrooms in skillet with oil...,low fat and delicious. rotel makes everything ...,...,6,mexican zucchini,"[""3 medium zucchini, sliced into discs"", ""1/2 ...","[""Saute onion and mushrooms in skillet with oi...",www.food.com/recipe/mexican-zucchini-231112,Gathered,"[zucchini, onion, olive oil, mushrooms, Tomato...",www.food.com,0.155318,0.328054
39985,cheesy vegetable pasta,160794,35,9717,2006-03-20,"['course', 'main-ingredient', 'side-dishes', '...","[387.0, 22.0, 17.0, 8.0, 32.0, 24.0, 16.0]",5,"['heat oil in saucepan over medium heat', 'add...",another great pasta recipe. from taste of home.,...,6,cheesy vegetable pasta,"[""1 tablespoon olive oil"", ""2 medium zucchini,...","[""Heat oil in saucepan over medium heat."", ""Ad...",www.food.com/recipe/cheesy-vegetable-pasta-160794,Recipes1M,"[olive oil, zucchini, vegetable oil, zucchini,...",www.food.com,0.103365,0.32252
131658,mexican pasta sopa,144607,30,257358,2005-11-12,"['30-minutes-or-less', 'time-to-make', 'course...","[284.2, 15.0, 14.0, 38.0, 17.0, 12.0, 13.0]",8,"['heat the olive oil in a 2 qt sauce pan', 'pu...",i put sopa in the title because that is what m...,...,8,mexican pasta (sopa?),"[""2 cups of uncooked pasta (I use small pasta ...","[""Heat the olive oil in a 2 qt sauce pan."", ""P...",www.food.com/recipe/mexican-pasta-sopa-144607,Gathered,"[pasta, olive oil, onion, garlic, salt, tomato...",www.food.com,0.082181,0.321138
182486,sicilian lentil pasta sauce,49597,100,59064,2002-12-22,"['weeknight', 'time-to-make', 'course', 'main-...","[146.7, 2.0, 32.0, 19.0, 18.0, 1.0, 8.0]",12,"['in a large pot , heat oil over medium heat',...",this is a great chunky vegetarian pasta sauce ...,...,10,sicilian lentil pasta sauce,"[""2 teaspoons olive oil"", ""1 cup onion, choppe...","[""In a large pot, heat oil over medium heat."",...",www.food.com/recipe/sicilian-lentil-pasta-sauc...,Gathered,"[olive oil, onion, mushrooms, zucchini, garlic...",www.food.com,0.130618,0.319552


In [189]:
output['ingredients_x'].iloc[0]

['boneless skinless chicken breasts',
 'cherry tomatoes',
 'red bell pepper',
 'red onion',
 'butter',
 'garlic cloves',
 'flour',
 'sour cream',
 'milk',
 'lemon juice',
 'whole wheat bow tie pasta',
 'feta cheese']

In [190]:
merged_out = output_name.merge(output[['id']], left_on = 'id', right_on='id', how='inner')
merged_out

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,...,n_ingredients,title,ingredients_y,directions,link,source,NER,website,n_score,score
0,bow ties with zucchini in lemon garlic sauce,190435,30,142878,2006-10-14,"['30-minutes-or-less', 'time-to-make', 'course...","[584.4, 36.0, 37.0, 4.0, 35.0, 57.0, 25.0]",11,['cook pasta in large sacuepan of boiling wate...,this is a great vegetarian recipe from the aus...,...,10,bow ties with zucchini in lemon garlic sauce,"[""375 g bow tie pasta"", ""3 medium yellow zucch...","[""Cook pasta in large sacuepan of boiling wate...",www.food.com/recipe/bow-ties-with-zucchini-in-...,Gathered,"[""pasta"", ""yellow zucchini"", ""green zucchini"",...",www.food.com,0.550898,0.641703
1,bow tie pasta with zucchini tomato and basil,71851,25,95743,2003-09-30,"['30-minutes-or-less', 'time-to-make', 'course...","[454.8, 28.0, 17.0, 7.0, 34.0, 38.0, 18.0]",9,"['boil pasta in salted water until al dente', ...","this meatless dish is fast, easy, healthy and ...",...,7,"bow tie pasta with zucchini, tomato and basil","[""10 ounces bow tie pasta"", ""2 small zucchini,...","[""Boil pasta in salted water until al dente; d...",www.food.com/recipe/bow-tie-pasta-with-zucchin...,Gathered,"[""pasta"", ""zucchini"", ""salt"", ""olive oil"", ""go...",www.food.com,0.543916,0.699601
2,chicken bow tie pasta,99986,45,162774,2004-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[909.9, 99.0, 7.0, 27.0, 73.0, 89.0, 15.0]",11,['cook chicken breast in 2 tbsp of oil until l...,this is one of my 11 year olds favorite recipe...,...,9,chicken bow tie pasta,"[""1/2 cup margarine"", ""1 cup half-and-half"", ""...","[""Cook chicken breast in 2 TBSP of oil until l...",www.food.com/recipe/chicken-bow-tie-pasta-99986,Gathered,"[""margarine"", ""garlic"", ""pasta"", ""chicken brea...",www.food.com,0.526300,0.517247
3,bow tie pasta and vodka sauce,156582,15,1634,2006-02-20,"['15-minutes-or-less', 'time-to-make', 'course...","[583.7, 45.0, 33.0, 34.0, 15.0, 78.0, 14.0]",8,['cook the pasta in at least 2 quarts of salte...,this solo side dish can also be an excellent c...,...,8,bow tie pasta and vodka sauce,"[""1 cup bow tie pasta, uncooked"", ""1/3 cup oni...","[""Cook the pasta in at least 2 quarts of salte...",www.food.com/recipe/bow-tie-pasta-and-vodka-sa...,Gathered,"[""pasta"", ""onion"", ""tomato sauce"", ""vodka"", ""b...",www.food.com,0.459191,0.739207
4,bow ties with chicken and spinach,159958,18,55221,2006-03-14,"['30-minutes-or-less', 'time-to-make', 'course...","[529.1, 28.0, 13.0, 27.0, 61.0, 23.0, 19.0]",7,"['boil the pasta until done', 'meanwhile , coo...","a quick, tasty, and satisfying pasta dish.",...,12,bow ties with chicken and spinach,"[""16 ounces bow tie pasta"", ""1 lb chicken brea...","[""Boil the pasta until done."", ""Meanwhile, coo...",www.food.com/recipe/bow-ties-with-chicken-and-...,Gathered,"[""pasta"", ""chicken breast"", ""olive oil"", ""onio...",www.food.com,0.445603,0.667834
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,angel hair pasta with tomato lemon and garlic,248052,10,84272,2007-08-22,"['15-minutes-or-less', 'time-to-make', 'course...","[248.1, 4.0, 9.0, 5.0, 19.0, 4.0, 15.0]",6,['put the olive oil and garlic in a saute pan ...,"this is a nice, light summer dish. it's easy ...",...,8,angel hair pasta with tomato lemon and garlic,"[""1 teaspoon olive oil"", ""2 garlic cloves"", ""1...","[""Put the olive oil and garlic in a saute pan ...",www.food.com/recipe/angel-hair-pasta-with-toma...,Recipes1M,"[""olive oil"", ""garlic"", ""chicken broth"", ""toma...",www.food.com,0.198120,0.245408
87,doctored pasta sauce tomato,293142,55,720912,2008-03-20,"['60-minutes-or-less', 'time-to-make', 'course...","[143.5, 11.0, 33.0, 22.0, 5.0, 5.0, 4.0]",8,"['chop onions and garlic', 'saute onions and g...",easy sauce and it tastes like you cooked all d...,...,7,doctored pasta sauce -- tomato,"[""26 ounces tomato and basil pasta sauce (jar ...","[""Chop onions and garlic."", ""Saute onions and ...",www.food.com/recipe/doctored-pasta-sauce-tomat...,Gathered,"[""tomato"", ""olive oil"", ""sweet onion"", ""garlic...",www.food.com,0.197747,0.370428
88,roasted tomato sauce,349956,45,1063418,2009-01-15,"['60-minutes-or-less', 'time-to-make', 'main-i...","[123.3, 14.0, 24.0, 1.0, 3.0, 6.0, 3.0]",13,"['preheat oven to 160 degrees celsius', 'place...",i love the rich taste of this tomato sauce whi...,...,7,roasted tomato sauce,"[""1 kg red ripe tomato, halved and cored"", ""14...","[""Preheat oven to 160 degrees celsius."", ""Plac...",www.food.com/recipe/roasted-tomato-sauce-349956,Recipes1M,"[""red ripe tomato"", ""olive oil"", ""salt"", ""brow...",www.food.com,0.195433,0.265143
89,tuna zucchini pasta shells,278198,75,471300,2008-01-12,"['time-to-make', 'course', 'main-ingredient', ...","[413.1, 51.0, 20.0, 18.0, 38.0, 83.0, 3.0]",14,"['cook the shells until al dente , set aside t...",this is a recipe that came in a basket of food...,...,12,tuna-zucchini pasta shells,"[""16 big pasta shells"", ""1 small zucchini"", ""2...","[""Cook the shells until al dente, set aside to...",www.food.com/recipe/tuna-zucchini-pasta-shells...,Recipes1M,"[""pasta shells"", ""zucchini"", ""extra virgin oli...",www.food.com,0.194023,0.253175


In [193]:
merged_out['ingredients_y'][0]

'["375 g bow tie pasta", "3 medium yellow zucchini (360g)", "3 medium green zucchini (360g)", "30 g butter", "1 tablespoon olive oil", "2 garlic cloves, crushed", "1/3 cup vegetable stock", "1/2 cup cream", "2 teaspoons lemon rind, finely grated (I used a tablespoon of lemon juice in its place)", "1/3 cup fresh chives, coarsely chopped"]'

In [194]:
merged_out['ingredients_x'].iloc[0]

['bow tie pasta',
 'yellow zucchini',
 'green zucchini',
 'butter',
 'olive oil',
 'garlic cloves',
 'vegetable stock',
 'cream',
 'lemon rind',
 'fresh chives']

In [162]:
## from https://github.com/vishwapardeshi/NL_Parser_using_Spacy/tree/master

remove_words = ['ground','to','taste', 'and', 'or', 'powder','white','red','green','yellow', 'can', 'seed', 'into', 'cut', 'grated',\
                'leaf','package','finely','divided','a','piece','optional','inch','needed','more','drained','for','flake','juice','dry','breast',\
                'extract','yellow','thinly','boneless','skinless','cubed','bell','bunch','cube','slice','pod','beaten','seeded','broth','uncooked',\
                'root','plain','baking','heavy','halved','crumbled','sweet','with','hot','confectioner','room','temperature','trimmed',\
                'all-purpose','sauce','crumb','deveined','bulk','seasoning','jar','food','sundried','italianstyle','if','bag','mix','in',\
                'each','roll','instant','double','such','extra-virgin','frying','thawed','whipping','stock','rinsed','mild','sprig','brown',\
                'freshly','toasted','link','boiling','cooked','basmati','unsalted','container','split','cooking','thin','lengthwise','warm',\
                'softened','thick','quartered','juiced','pitted','chunk','melted','cold','coloring','puree','cored','stewed',\
                'floret','coarsely','the','clarified','blanched','zested','sweetened','powdered','longgrain','garnish','indian','dressing',\
                'soup','at','active','french','lean','chip','sour','condensed','long','smoked','ripe','skinned','fillet','from','stem','flaked',\
                'removed','zest','stalk','unsweetened','baby','cover','crust', 'extra', 'prepared', 'blend', 'of', 'ring','plus','firmly', 'packed',\
                'lightly','level','even','rounded','heaping','heaped','sifted','bushel','peck','stick','chopped','sliced','halves', 'shredded',\
                'slivered','sliced','whole','paste','whole',' fresh', 'peeled', 'diced','mashed','dried','frozen','fresh','peeled','candied',\
                'no', 'pulp','crystallized','canned','crushed','minced','julienned','clove','head', 'small','large','medium', 'good', 'quality', \
                'freshly']

In [172]:
cleaned = []
for row in data['ingredients_x']:
    cleaned_ig = []
    for ing in row:
        ing = [word for word in ing.split() if word not in remove_words]
        cleaned_ig.append(' '.join(ing))
    cleaned.append(cleaned_ig)

In [176]:
data['ingredients_x']

0         [winter squash, mexican seasoning, mixed spice...
1         [prepared pizza crust, sausage patty, eggs, mi...
3         [spreadable cheese with garlic and herbs, new ...
4         [tomato juice, apple cider vinegar, sugar, sal...
5         [milk, vanilla ice cream, frozen apple juice c...
                                ...                        
226884    [mayonnaise, prepared horseradish, worcestersh...
226885    [white rice, vegetable oil, onion, green bell ...
226888    [hard-cooked eggs, mayonnaise, dijon mustard, ...
226889    [butter, eagle brand condensed milk, light bro...
226890    [granulated sugar, shortening, eggs, flour, cr...
Name: ingredients_x, Length: 189160, dtype: object

In [183]:
data['NER']

0         [winter, seasoning, mixed spice, honey, butter...
1         [pizza crust, sausage patty, eggs, milk, salt,...
3         [spreadable cheese with garlic, new potatoes, ...
4         [tomato juice, apple cider vinegar, sugar, sal...
5         [milk, vanilla ice cream, apple juice concentr...
                                ...                        
226884    [mayonnaise, horseradish, Worcestershire sauce...
226885    [white rice, vegetable oil, onion, green bell ...
226888    [eggs, mayonnaise, Dijon mustard, mustard, sal...
226889    [butter, Milk, brown sugar, sour cream, egg, c...
226890    [sugar, shortening, eggs, flour, cream of tart...
Name: NER, Length: 189160, dtype: object

In [173]:
cleaned

[['winter squash',
  'mexican',
  'mixed spice',
  'honey',
  'butter',
  'olive oil',
  'salt'],
 ['pizza', 'sausage patty', 'eggs', 'milk', 'salt pepper', 'cheese'],
 ['spreadable cheese garlic herbs',
  'new potatoes',
  'shallots',
  'parsley',
  'tarragon',
  'olive oil',
  'wine vinegar',
  'salt',
  'pepper',
  'pepper',
  'pepper'],
 ['tomato',
  'apple cider vinegar',
  'sugar',
  'salt',
  'pepper',
  'oil',
  'cinnamon oil',
  'mustard'],
 ['milk', 'vanilla ice cream', 'apple concentrate', 'apple'],
 ['fennel seeds',
  'olives',
  'olives',
  'garlic',
  'peppercorn',
  'orange rind',
  'orange',
  'chile',
  'virgin olive oil'],
 ['chocolate sandwich style cookies',
  'chocolate syrup',
  'vanilla ice cream',
  'bananas',
  'strawberry ice cream',
  'whipped cream'],
 ['sugar',
  'butter',
  'bananas',
  'eggs',
  'lemon',
  'orange rind',
  'cake flour',
  'soda',
  'salt'],
 ['berry cranberry', 'cream', 'horseradish'],
 ['vanilla wafers',
  'butter',
  'sugar',
  'eggs',


In [171]:
cleaned_ig

[['winter', 'squash'],
 ['mexican'],
 ['mixed', 'spice'],
 ['honey'],
 ['butter'],
 ['olive', 'oil'],
 ['salt'],
 ['pizza'],
 ['sausage', 'patty'],
 ['eggs'],
 ['milk'],
 ['salt', 'pepper'],
 ['cheese'],
 ['spreadable', 'cheese', 'garlic', 'herbs'],
 ['new', 'potatoes'],
 ['shallots'],
 ['parsley'],
 ['tarragon'],
 ['olive', 'oil'],
 ['wine', 'vinegar'],
 ['salt'],
 ['pepper'],
 ['pepper'],
 ['pepper'],
 ['tomato'],
 ['apple', 'cider', 'vinegar'],
 ['sugar'],
 ['salt'],
 ['pepper'],
 ['oil'],
 ['cinnamon', 'oil'],
 ['mustard'],
 ['milk'],
 ['vanilla', 'ice', 'cream'],
 ['apple', 'concentrate'],
 ['apple'],
 ['fennel', 'seeds'],
 ['olives'],
 ['olives'],
 ['garlic'],
 ['peppercorn'],
 ['orange', 'rind'],
 ['orange'],
 ['chile'],
 ['virgin', 'olive', 'oil'],
 ['chocolate', 'sandwich', 'style', 'cookies'],
 ['chocolate', 'syrup'],
 ['vanilla', 'ice', 'cream'],
 ['bananas'],
 ['strawberry', 'ice', 'cream'],
 ['whipped', 'cream'],
 ['sugar'],
 ['butter'],
 ['bananas'],
 ['eggs'],
 ['lemon']

In [69]:
query = 'pasta, olive oil, tomato sauce, chedder, mushrooms, zucchini, chicken, onion, garlic, salt, sugar, pepper'

'pasta, olive oil, tomato sauce, chedder, mushrooms, zucchini, chicken, onion, garlic, salt, sugar, pepper'

In [143]:
data['ingredients_x']

0         [winter squash, mexican seasoning, mixed spice...
1         [prepared pizza crust, sausage patty, eggs, mi...
3         [spreadable cheese with garlic and herbs, new ...
4         [tomato juice, apple cider vinegar, sugar, sal...
5         [milk, vanilla ice cream, frozen apple juice c...
                                ...                        
226884    [mayonnaise, prepared horseradish, worcestersh...
226885    [white rice, vegetable oil, onion, green bell ...
226888    [hard-cooked eggs, mayonnaise, dijon mustard, ...
226889    [butter, eagle brand condensed milk, light bro...
226890    [granulated sugar, shortening, eggs, flour, cr...
Name: ingredients_x, Length: 189160, dtype: object

## Must contain

In [158]:
out = []
for row in data.iterrows():
#     print(row)
    ingredients = row[1]['ingredients_x']
    flag = True
    for item in ingredients:
        flag = (flag and (item in query))
    
    if flag == True:
        out.append(row)

In [153]:
True and ('flour' in query)

False

In [160]:
display(out)

[(35054,
  name                              caramelized garlic and garlic oil
  id                                                           186303
  minutes                                                          25
  contributor_id                                               290010
  submitted                                                2006-09-14
  tags              ['30-minutes-or-less', 'time-to-make', 'course...
  nutrition                      [25.1, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0]
  n_steps                                                          12
  steps             ['arrange the garlic cloves in a frying pan la...
  description       i found this recipe in peter reinhart's book a...
  ingredients_x                                   [garlic, olive oil]
  n_ingredients                                                     2
  title                             caramelized garlic and garlic oil
  ingredients_y     ["10 heads garlic, cloves separated and peeled...
  direction