In [1]:
import pandas as pd
import numpy as np
from ast import literal_eval
import spacy 


In [2]:
df1 = pd.read_csv(r'RAW_recipes.csv')
df2 = pd.read_csv(r'RAW_interactions.csv')
nlp_NER = spacy.load(r'.\NER\output\model-best')

In [3]:
rating = df2.groupby('recipe_id')['rating'].mean()
rating = pd.DataFrame(rating)
rating.index.names = ['id']
rating

Unnamed: 0_level_0,rating
id,Unnamed: 1_level_1
38,4.250000
39,3.000000
40,4.333333
41,4.500000
43,1.000000
...,...
537459,5.000000
537485,5.000000
537543,0.000000
537671,0.000000


In [39]:
df = df1.merge(rating, on= 'id')
df.nutrition = df.nutrition.apply(literal_eval)
df[['calories',
    'total_fat (%DV)',
    'sugar (%DV)',
    'sodium (%DV)',
    'protein (%DV)',
    'saturated_fat (%DV)',
    'total_carbohydrate (%DV)']] = list(n for n in df.nutrition)

df = df.drop(columns= ['contributor_id', 'submitted', 'tags', 'description', 'nutrition'])
df = df[df.name.notnull()]
df = df.sort_values(by= 'rating', ascending= False)

In [22]:
def rizz(text):
    doc = nlp_NER(text)
    component = {'ingredients': [], 'style': []} 
    for ent in doc.ents:
        text = ent.text
        if ent.label_ == 'ingredient':
            if text[-3:] == 'ies':
                text = text.removesuffix('ies')
                text += 'y'
            elif text[-2:] == 'es':
                text = text.removesuffix('es')
            elif text[-1:] == 's':
                text = text.removesuffix('s')
            component['ingredients'].append(text)
            
        elif ent.label_ == 'product':
            component['style'].append(text)
    
    return component

compo = rizz('What is a filling breakfast recipe with eggs, sausage, and potato?')
compo

{'ingredients': ['egg', 'sausage', 'potato'], 'style': ['breakfast']}

In [50]:
def to_query(df_, component):
    ingredients, style = component.values()
    filtered_df = df_.copy()
    for ingredient in ingredients:
        filtered_df = filtered_df[filtered_df.ingredients.str.contains(f'{ingredient}')]

    return filtered_df

to_query(df, compo)


Unnamed: 0,name,id,minutes,n_steps,steps,ingredients,n_ingredients,rating,calories,total_fat (%DV),sugar (%DV),sodium (%DV),protein (%DV),saturated_fat (%DV),total_carbohydrate (%DV)
125264,loaded spanish omelet bites 5fix,497429,35,13,"['preheat oven to 375 degrees f', 'in a large ...","['chorizo sausage', 'olive oil', 'simply potat...",5,5.0,391.6,52.0,1.0,20.0,38.0,45.0,0.0
115957,just about everything breakfast casserole,343542,105,17,"['melt butter in large skillet', 'brown togeth...","['butter', 'shredded hash brown potatoes', 'br...",13,5.0,693.2,75.0,11.0,59.0,67.0,109.0,10.0
115660,judy s breakfast casserole,386707,40,8,"['grease a 9x13"" baking dish', 'press potatoes...","['hash brown potatoes', 'salt', 'sausage', 'on...",9,5.0,490.9,45.0,9.0,31.0,34.0,50.0,12.0
111857,irish goose with potato stuffing,159074,170,40,"['remove giblets from goose', 'wash well', 'pa...","['goose', 'potatoes', 'salt', 'lean salt pork'...",10,5.0,1115.0,126.0,4.0,49.0,140.0,144.0,6.0
112992,italian sausage potato quiche 5fix,496524,65,10,"['preheat oven to 375 degrees f', 'bake the pi...","['9-inch pie shell', 'italian sausages', 'simp...",5,5.0,783.9,95.0,4.0,57.0,58.0,122.0,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113069,italian sausage rice balls with marinara sauce...,524962,50,13,['saut onion over medium heat with olive oil i...,"['brown rice', 'cooked italian sausage', 'oliv...",14,0.0,573.6,26.0,43.0,31.0,40.0,29.0,28.0
40649,cheesy potato breakfast burro burritos,319753,40,23,"['dice your potatoes and onions into 1 / 4"" cu...","['eggs', 'milk', 'potatoes', 'white onion', 'v...",15,0.0,1874.7,206.0,21.0,122.0,167.0,240.0,26.0
117652,kielbasa cabbage potato onion diversity ca...,208354,60,18,"['heat oven to 325', 'butter or spray 9 x 13 c...","['polish sausage', 'butter', 'extra virgin oli...",17,0.0,469.6,50.0,38.0,55.0,33.0,75.0,9.0
189025,slow cooker overnight breakfast casserole,512088,510,12,['cook sausage according to package directions...,"['johnsonville hot & spicy breakfast sausage',...",11,0.0,207.4,16.0,4.0,16.0,24.0,25.0,5.0


In [100]:
words = ['Dalle', 'Apple', 'Artichoke', 'Winter Melon', 
         'Asparagus', 'Avocado', 'Bacon', 'Bamboo Shoot', 
         'Banana', 'Bean', 'Beaten Rice', 'Beef', 'Beetroot', 
         'Bethu ko Saag', 'Bitter Gourd', 'Black Lentil', 'Black bean', 
         'Calabash', 'Bread', 'Eggplant', 'Broad Bean',
         'Broccoli', 'Buffalo Meat', 'Butter', 'Cabbage', 'Capsicum', 'Carrot', 
         'Cassava', 'Cauliflower', 'Chayote', 'Cheese', 
         'Chicken', 'Chicken Gizzard', 'Chickpea', 'Chili Pepper', 
         'Chili Powder', 'Chow mein', 'Cinnamon', 'Coriander', 
         'Corn', 'Cornflake', 'Crab Meat', 'Cucumber', 'Egg', 'Pumpkin shoot', 
         'Fiddlehead Fern', 'Fish', 'Garden Pea', 'Garden cress', 
         'Garlic', 'Ginger', 'Brinjal', 'Green Lentil', 'Mint', 
         'Pea', 'Edamame', 'Gundruk', 'Ham', 'Ice', 
         'Jackfruit', 'Ketchup', 'laapsi', 'Lemon', 
         'Lime', 'Long Bean', 'Masyaura', 'Milk', 'Minced Meat', 
         'Moringa Leaves', 'Mushroom', 'Mutton', 'Nutrela Soya Chunk', 
         'Okra', 'Olive Oil', 'Onion', 'Green Onion', 'Orange', 
         'Spinach', 'Spinach', 'Paneer', 
         'Papaya', 'Pea', 'Pear', 'Pointed Gourd', 'Pork', 
         'Potato', 'Pumpkin', 'Radish', 'Pigeon pea', 'Mustard Green', 
         'Red Bean', 'Red Lentil', 'Rice', 'Moringa Drumstick', 
         'Salt', 'Sausage', 'Snake Gourd', 'Soy Sauce', 'Soybean', 
         'Luffa', 'Stinging Nettle', 'Strawberry', 'Sugar', 
         'Sweet Potato', 'Taro', 'Taro Root', 
         'Thukpa Noodles', 'Tofu', 'Tomato', 'Tori ko Saag', 'Tamarillo', 
         'Turnip', 'Walnut', 'Watermelon', 'Wheat', 'Yellow Lentil', 'kimchi', 'mayonnaise', 'noodle', 'seaweed']

In [101]:
for word in words:
    if not df.ingredients.str.contains(word.lower()).any():
        print(word)

Dalle
Bethu ko Saag
Calabash
Cornflake
Pumpkin shoot
Garden Pea
Gundruk
laapsi
Masyaura
Moringa Leaves
Nutrela Soya Chunk
Pointed Gourd
Moringa Drumstick
Snake Gourd
Luffa
Stinging Nettle
Thukpa Noodles
Tori ko Saag
