In [2]:
import pandas as pd
import random
import ast

## reading random sample of csv

In [4]:
# Reading only 5% of the dataset because it represents 200 000 lines approximatively which we considered enough for our use case
random.seed(4321)
df=pd.read_csv('/kaggle/input/recipenlg/RecipeNLG_dataset.csv',
               converters={'NER': ast.literal_eval,'ingredients':ast.literal_eval,'directions':ast.literal_eval}, skiprows=lambda x: x > 0 and random.random() >=0.05)

In [5]:
print(len(df))

111636


In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER
0,6,Rhubarb Coffee Cake,"[1 1/2 c. sugar, 1/2 c. butter, 1 egg, 1 c. bu...","[Cream sugar and butter., Add egg and beat wel...",www.cookbooks.com/Recipe-Details.aspx?id=210288,Gathered,"[sugar, butter, egg, buttermilk, flour, salt, ..."
1,50,Chicken Ole,"[4 chicken breasts, cooked, 1 can cream of chi...","[Dice chicken., Mix all ingredients together.,...",www.cookbooks.com/Recipe-Details.aspx?id=445786,Gathered,"[chicken breasts, cream of chicken soup, cream..."
2,87,Fast Real Good Fudge,"[4 lb. confectioners sugar, 1 c. cocoa, 1 c. c...","[In large bowl, put confectioners sugar, cocoa...",www.cookbooks.com/Recipe-Details.aspx?id=505741,Gathered,"[confectioners sugar, cocoa, nuts, margarine, ..."
3,93,Chicken Roll-Ups,"[1 (8 oz.) can crescent rolls, 1 1/2 c. grated...",[Unroll crescent rolls; add cooked chicken and...,www.cookbooks.com/Recipe-Details.aspx?id=449613,Gathered,"[crescent rolls, cheese, hen, cream of chicken..."
4,99,Brickle Bars,"[1 box yellow cake mix, 2 eggs, 1/3 c. soft ma...","[Mix together with a fork:, cake mix, 1 egg an...",www.cookbooks.com/Recipe-Details.aspx?id=351194,Gathered,"[yellow cake mix, eggs, margarine, milk, pecans]"


In [7]:
print(type(df['ingredients'][0]))

<class 'list'>


In [8]:
count=df.explode('NER')

In [9]:
distinct=len(pd.unique(count['NER']))

In [10]:
distinct

34750

## From seperated ingredients to sentence ingredients to feed to Word2vec

In [11]:
df['sentence_ingredients']=df['NER'].apply(lambda x:' '.join(x))

In [13]:
df['sentence_ingredients']

0         sugar butter egg buttermilk flour salt soda bu...
1         chicken breasts cream of chicken soup cream of...
2         confectioners sugar cocoa nuts margarine Velve...
3         crescent rolls cheese hen cream of chicken sou...
4                yellow cake mix eggs margarine milk pecans
                                ...                        
111631    tomatoes zucchini Stove Top Stuffing Mix chedd...
111632    chicken broth water orzo pasta fresh asparagus...
111633    lemon juice Italian salad dressing salad oil c...
111634                               Chicken koji Olive oil
111635                                Eggs Water Water Salt
Name: sentence_ingredients, Length: 111636, dtype: object

In [13]:
https://medium.com/red-buffer/doc2vec-computing-similarity-between-the-documents-47daf6c828cd

In [14]:
pip install gensim==4.0.1

[0mNote: you may need to restart the kernel to use updated packages.


## Building Doc2vec model

In [3]:
import gensim
import nltk
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.tokenize import word_tokenize
from gensim.models.doc2vec import Doc2Vec



In [16]:
tagged_data = [TaggedDocument(words=word_tokenize(_d.lower()), tags=[str(i)]) for i, _d in enumerate(df['sentence_ingredients'])]

In [None]:
print (tagged_data)

In [19]:
# We chose vector_size=30 because our documents are very small: they are sentence of ingredients.
model = gensim.models.doc2vec.Doc2Vec(vector_size=30, min_count=2, epochs=80)

In [20]:
model.build_vocab(tagged_data)

In [21]:
model.train(tagged_data, total_examples=model.corpus_count, epochs=80)

In [22]:
model.save("d2v.model")

## Saving CSV for later inferences

In [18]:
df=pd.read_csv('dataset_nlp.csv',converters={'NER': ast.literal_eval,'ingredients':ast.literal_eval,'directions':ast.literal_eval})

## Loading Trained model

In [7]:
model = Doc2Vec.load("d2v.model")

## Trying dataset sample inference

In [24]:
similar_doc = model.docvecs.most_similar('0')
print(similar_doc[0])

('19184', 0.8977519869804382)


  """Entry point for launching an IPython kernel.


In [29]:
tagged_data[0]


TaggedDocument(words=['sugar', 'butter', 'egg', 'buttermilk', 'flour', 'salt', 'soda', 'buttermilk', 'rhubarb', 'vanilla'], tags=['0'])

In [7]:
df.head(1)

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER,sentence_ingredients
0,6,Rhubarb Coffee Cake,"['1 1/2 c. sugar', '1/2 c. butter', '1 egg', '...","['Cream sugar and butter.', 'Add egg and beat ...",www.cookbooks.com/Recipe-Details.aspx?id=210288,Gathered,"['sugar', 'butter', 'egg', 'buttermilk', 'flou...",sugar butter egg buttermilk flour salt soda bu...


In [28]:
tagged_data[19184]

TaggedDocument(words=['brown', 'sugar', 'shortening', 'egg', 'salt', 'buttermilk', 'vanilla', 'flour', 'rhubarb'], tags=['19184'])

In [36]:
df.iloc[[19184]]

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER,sentence_ingredients
19184,385296,Rhubarb Cake,"[1 1/2 c. brown sugar, 1/2 c. shortening, 1 eg...","[Mix ingredients (except flour and rhubarb), t...",www.cookbooks.com/Recipe-Details.aspx?id=1064570,Gathered,"[brown sugar, shortening, egg, salt, buttermil...",brown sugar shortening egg salt buttermilk van...


## User sentence inference

In [8]:
inference="Hi i have garlic, rice, tuna, and parsley"

#Using Gensim simple_preprocess function to preprocess user_sentences
inference=gensim.utils.simple_preprocess(inference, deacc=True)


#Creating a lambda function to keep only nouns
is_noun = lambda pos: pos[:2] == 'NN'

#keeping only nouns as ingredients are all nouns
nouns = [word for (word, pos) in nltk.pos_tag(inference) if is_noun(pos)] 
nouns = set(nouns)
print(list(nouns))

#Getting a vector out of user_sentence with our previously trained model
v1 = model.infer_vector(list(nouns))
print("V1_infer", v1)

['parsley', 'hi', 'rice', 'tuna']
V1_infer [-0.7002352  -0.5583579  -0.69221276 -1.1413443   0.42203835  0.7594628
 -0.971392    0.02627951 -0.20771532 -1.259438   -0.40509945  0.07357277
  1.0832782   0.5588141   0.6541464   0.17353287  1.152601   -0.25852248
 -0.5073473  -0.00888474 -0.37436202  0.43274587  0.36300123 -1.5919521
  0.39835835 -0.73046196 -0.05922333  0.21414538  0.5444254  -0.19825031]


In [9]:
#Performing most similar recipe (in the base of ingredient list)
similar_docs = model.docvecs.most_similar(positive=[v1])

  similar_docs = model.docvecs.most_similar(positive=[v1])


In [10]:
print(similar_docs)

[('45412', 0.8034965395927429), ('62961', 0.7637102007865906), ('154', 0.750877857208252), ('84023', 0.7486373782157898), ('66570', 0.7395302653312683), ('97677', 0.737420380115509), ('28322', 0.7360421419143677), ('24806', 0.7329263091087341), ('2384', 0.7326154112815857), ('27466', 0.7263097167015076)]


In [11]:
indexes=[int(i[0]) for i in similar_docs]


In [12]:
print(indexes)

[45412, 62961, 154, 84023, 66570, 97677, 28322, 24806, 2384, 27466]


In [13]:
pd.set_option("display.max_colwidth", 100)

In [14]:
#Displaying most similar recipes
for i in indexes:
    print(df.iloc[[int(i)]][['link','NER']])

                                                      link  \
45412  www.allrecipes.com/recipe/17696/shrimp-creole-bake/   

                                                                                                       NER  
45412  ['water', 'white rice', 'onions', 'mushrooms', 'carrots', 'green bell pepper', 'celery', 'zucchi...  
                                          link  \
62961  recipes-plus.com/api/v2.0/recipes/20997   

                                                                                                       NER  
62961  ['mayonnaise', 'garlic', 'lemon juice', 'shrimp', 'extra virgin olive oil', 'paprika', 'lemon pe...  
                                                 link  \
154  www.cookbooks.com/Recipe-Details.aspx?id=1017901   

                                                                                                     NER  
154  ['shrimp', 'onion', 'pepper', 'clove garlic', 'tomatoes', 'celery', 'lemon', 'salt', 'sugar', 'p...  
         

In [22]:
new_df=df.iloc[indexes]
print(nouns)
new_df['missing_ingredients']=new_df['NER'].apply(lambda lst: [ingredient for ingredient in lst if ingredient not in nouns])
new_df

{'parsley', 'hi', 'rice', 'tuna'}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['missing_ingredients']=new_df['NER'].apply(lambda lst: [ingredient for ingredient in lst if ingredient not in nouns])


Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER,sentence_ingredients,missing_ingredients
45412,910619,Shrimp Creole Bake,"[2 cups water, 1 1/4 cups uncooked white rice, 2 onions, diced, 2 cups sliced mushrooms, 5 carro...","[In a saucepan bring salted water to a boil. Add rice, reduce heat, cover and simmer for 20 minu...",www.allrecipes.com/recipe/17696/shrimp-creole-bake/,Gathered,"[water, white rice, onions, mushrooms, carrots, green bell pepper, celery, zucchini, tomato sauc...",water white rice onions mushrooms carrots green bell pepper celery zucchini tomato sauce tomatoe...,"[water, white rice, onions, mushrooms, carrots, green bell pepper, celery, zucchini, tomato sauc..."
62961,1257298,Spanish-Style Grilled Seafood With Garlic Mayo,"[3/4 cup mayonnaise, 3 cloves garlic, crushed, 1 tbsp lemon juice, 1 lb large shrimp, peeled and...","[For the mayonnaise, mix mayonnaise, 1/3 of the crushed garlic and lemon juice in a small bowl. ...",recipes-plus.com/api/v2.0/recipes/20997,Gathered,"[mayonnaise, garlic, lemon juice, shrimp, extra virgin olive oil, paprika, lemon peel, fish, zuc...",mayonnaise garlic lemon juice shrimp extra virgin olive oil paprika lemon peel fish zucchini red...,"[mayonnaise, garlic, lemon juice, shrimp, extra virgin olive oil, paprika, lemon peel, fish, zuc..."
154,2855,Shrimp With Tomato Sauce,"[1 (10 oz.) pkg. frozen medium shrimp, 1 medium onion, chopped, 1 jalapeno pepper (optional), 1 ...","[Remove stem, seeds and membrane from jalapeno; chop., Cook and stir pepper, onion and garlic in...",www.cookbooks.com/Recipe-Details.aspx?id=1017901,Gathered,"[shrimp, onion, pepper, clove garlic, tomatoes, celery, lemon, salt, sugar, pepper, parsley, ric...",shrimp onion pepper clove garlic tomatoes celery lemon salt sugar pepper parsley rice butter,"[shrimp, onion, pepper, clove garlic, tomatoes, celery, lemon, salt, sugar, pepper, butter]"
84023,1676910,Poor Mans' Kedgeree,"[1 -2 cup cooked flaked fish (or to taste, we use 2 or 3 cans tuna), 4 cups cooked white rice, h...","[Mix first 8 ingredients together thoroughly., Now this is where a lot of recipes differ., This ...",www.food.com/recipe/poor-mans-kedgeree-183477,Recipes1M,"[fish, white rice, eggs, green peas, salt, pepper, garlic salt, parsley, margarine]",fish white rice eggs green peas salt pepper garlic salt parsley margarine,"[fish, white rice, eggs, green peas, salt, pepper, garlic salt, margarine]"
66570,1327600,Cindy'S Seafood Stew,"[6 Tbsp olive oil, 1 large onion, chopped, 4 stalks of celery, 1/4 inch slices, 2 green peppers,...","[Place chopped onions, celery, green peppers and olive oil in large soup pot. Cook over medium h...",www.epicurious.com/recipes/member/views/cindys-seafood-stew-50121223,Gathered,"[olive oil, onion, stalks of celery, green peppers, bacon, tomatoes, tomatoe sauce, sweet red pe...",olive oil onion stalks of celery green peppers bacon tomatoes tomatoe sauce sweet red pepper sou...,"[olive oil, onion, stalks of celery, green peppers, bacon, tomatoes, tomatoe sauce, sweet red pe..."
97677,1949353,Easy Chunky Sweet Chilli Pumpkin Stir-Fry,"[3 tablespoons oil, 4 tablespoons water, 500 -750 g pumpkin (a small or medium sized one), 100 -...","[Chop the pumpkin, eggplant, bok choy and tomatoes into 2 inch-size chunks., Keep in separate bo...",www.food.com/recipe/easy-chunky-sweet-chilli-pumpkin-stir-fry-136876,Recipes1M,"[oil, water, eggplants, choy, tomatoes, garlic, sweet chili sauce, ginger, fish sauce, parsley, ...",oil water eggplants choy tomatoes garlic sweet chili sauce ginger fish sauce parsley oregano salt,"[oil, water, eggplants, choy, tomatoes, garlic, sweet chili sauce, ginger, fish sauce, oregano, ..."
28322,568439,Tuna And Rice Dinner,"[1 Tbsp. oleo, 1 cup chopped onions, 1 package frozen peas, defrosted 10 oz., 1 1/2 cup milk, 1 ...","[Melt oleo in skillet on medium heat., Add onion, cook and stir until tender, but not browned., ...",www.cookbooks.com/Recipe-Details.aspx?id=966100,Gathered,"[oleo, onions, frozen peas, milk, cream of chicken soup, pepper, tuna, parsley, rice]",oleo onions frozen peas milk cream of chicken soup pepper tuna parsley rice,"[oleo, onions, frozen peas, milk, cream of chicken soup, pepper]"
24806,496618,Crusty Tuna Surprise,"[1 can cheese soup, 1/2 c. milk, 2 c. cooked rice, 2 cans tuna, 1/4 c. chopped parsley, 3/4 c. c...","[Combine cheese, soup and milk., In a greased 1 1/2-quart casserole, arrange layers of rice, tun...",www.cookbooks.com/Recipe-Details.aspx?id=161542,Gathered,"[cheese soup, milk, rice, tuna, parsley, corn flake crumbs]",cheese soup milk rice tuna parsley corn flake crumbs,"[cheese soup, milk, corn flake crumbs]"
2384,48240,Shrimp Delight,"[3/4 c. margarine, 1 onion, chopped, 1 bell pepper, chopped, 2 c. raw rice, 1 can Ro-Tel tomatoe...","[Preheat oven to 350°., Melt margarine in casserole dish. Combine all ingredients in dish with m...",www.cookbooks.com/Recipe-Details.aspx?id=551453,Gathered,"[margarine, onion, bell pepper, rice, Ro-Tel tomatoes, onion soup, cream of mushroom soup, shrim...",margarine onion bell pepper rice Ro-Tel tomatoes onion soup cream of mushroom soup shrimp salt,"[margarine, onion, bell pepper, Ro-Tel tomatoes, onion soup, cream of mushroom soup, shrimp, salt]"
27466,550647,Salmon Mousse,"[8 oz. cream cheese, 1 can tomato soup, 2 pkg. plain gelatin, 1/2 c. cold water, 1/4 c. onion, m...","[Warm cheese and soup in saucepan until well blended., Dissolve gelatin in water., Add gelatin, ...",www.cookbooks.com/Recipe-Details.aspx?id=985965,Gathered,"[cream cheese, tomato soup, gelatin, cold water, onion, celery, salmon, mayonnaise, lemon juice,...",cream cheese tomato soup gelatin cold water onion celery salmon mayonnaise lemon juice salt dill,"[cream cheese, tomato soup, gelatin, cold water, onion, celery, salmon, mayonnaise, lemon juice,..."
