In [1]:
import pandas as pd
import numpy as np
from ast import literal_eval
import spacy 


In [4]:
df1 = pd.read_csv(r'RAW_recipes.csv')
df2 = pd.read_csv(r'RAW_interactions.csv')
nlp_NER = spacy.load(r'.\NER\output\model-best')

In [5]:
rating = df2.groupby('recipe_id')['rating'].mean()
rating = pd.DataFrame(rating)
rating.index.names = ['id']
rating

Unnamed: 0_level_0,rating
id,Unnamed: 1_level_1
38,4.250000
39,3.000000
40,4.333333
41,4.500000
43,1.000000
...,...
537459,5.000000
537485,5.000000
537543,0.000000
537671,0.000000


In [6]:
df = df1.merge(rating, on= 'id')
df.nutrition = df.nutrition.apply(literal_eval)
df[['calories',
    'total_fat (%DV)',
    'sugar (%DV)',
    'sodium (%DV)',
    'protein (%DV)',
    'saturated_fat (%DV)',
    'total_carbohydrate (%DV)']] = list(n for n in df.nutrition)

df = df.drop(columns= ['contributor_id', 'submitted', 'tags', 'description', 'nutrition'])
df = df[df.name.notnull()]
df = df.sort_values(by= 'rating', ascending= False)

In [8]:
df.to_csv("Recipes.csv", sep='\t', encoding='utf-8', index=False, header=True)

In [7]:
def rizz(text):
    doc = nlp_NER(text)
    component = {'ingredients': [], 'style': []} 
    for ent in doc.ents:
        text = ent.text
        if ent.label_ == 'ingredient':
            if text[-3:] == 'ies':
                text = text.removesuffix('ies')
                text += 'y'
            elif text[-2:] == 'es':
                text = text.removesuffix('es')
            elif text[-1:] == 's':
                text = text.removesuffix('s')
            component['ingredients'].append(text)
            
        elif ent.label_ == 'product':
            component['style'].append(text)
    
    return component

compo = rizz('What is a filling breakfast recipe with eggs, sausages, and potatoes?')
compo

{'ingredients': ['egg', 'sausag', 'potato'], 'style': ['breakfast']}

In [9]:
def to_query(df_, component):
    ingredients, style = component.values()
    filtered_df = df_.copy()
    for ingredient in ingredients:
        filtered_df = filtered_df[filtered_df.ingredients.str.contains(f'{ingredient}')]

    for s in style:
        filtered_df = filtered_df[filtered_df.name.str.contains(f'{s}')]

    return filtered_df

to_query(df, compo)


Unnamed: 0,name,id,minutes,n_steps,steps,ingredients,n_ingredients,rating,calories,total_fat (%DV),sugar (%DV),sodium (%DV),protein (%DV),saturated_fat (%DV),total_carbohydrate (%DV)
115957,just about everything breakfast casserole,343542,105,17,"['melt butter in large skillet', 'brown togeth...","['butter', 'shredded hash brown potatoes', 'br...",13,5.000000,693.2,75.0,11.0,59.0,67.0,109.0,10.0
115660,judy s breakfast casserole,386707,40,8,"['grease a 9x13"" baking dish', 'press potatoes...","['hash brown potatoes', 'salt', 'sausage', 'on...",9,5.000000,490.9,45.0,9.0,31.0,34.0,50.0,12.0
117003,kentucky breakfast casserole,273458,75,15,['place thawed hashbrown potatoes between seve...,"['cooking spray', 'bread', 'eggs', 'evaporated...",10,5.000000,628.2,60.0,5.0,45.0,68.0,89.0,11.0
142782,napa dave s individual breakfast casseroles,314968,45,6,['grease 4 individual sized baking dishes with...,"['country sausage', 'eggs', 'cheddar cheese', ...",5,5.000000,886.9,91.0,10.0,51.0,96.0,125.0,12.0
151317,overnight hash browns breakfast casserole,274324,85,14,['spray a 9 x 13 pan with non-stick cooking sp...,"['breakfast sausage', 'croutons', 'cheddar che...",9,5.000000,801.2,76.0,11.0,73.0,67.0,84.0,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28634,breakfast sausage pizza,357332,35,6,"['brown and break up sausage , drain well', 'r...","['sausage', 'cheddar cheese', 'hash brown pota...",9,2.000000,354.3,42.0,2.0,31.0,32.0,52.0,3.0
115269,jimmy dean s sausage and egg breakfast pizza,420680,40,14,"['preheat oven to 375', 'in large skillet , co...","['pork sausage', 'refrigerated crescent dinner...",10,1.666667,378.1,36.0,6.0,32.0,36.0,46.0,7.0
81220,everything but the kitchen sink breakfast cass...,463250,65,13,"['preheat oven to 350', 'cook breakfast sausag...","['breakfast sausage', 'red potatoes', 'white b...",10,0.000000,456.8,43.0,16.0,47.0,45.0,57.0,9.0
40649,cheesy potato breakfast burro burritos,319753,40,23,"['dice your potatoes and onions into 1 / 4"" cu...","['eggs', 'milk', 'potatoes', 'white onion', 'v...",15,0.000000,1874.7,206.0,21.0,122.0,167.0,240.0,26.0


In [100]:
words = ['Dalle', 'Apple', 'Artichoke', 'Winter Melon', 
         'Asparagus', 'Avocado', 'Bacon', 'Bamboo Shoot', 
         'Banana', 'Bean', 'Beaten Rice', 'Beef', 'Beetroot', 
         'Bethu ko Saag', 'Bitter Gourd', 'Black Lentil', 'Black bean', 
         'Calabash', 'Bread', 'Eggplant', 'Broad Bean',
         'Broccoli', 'Buffalo Meat', 'Butter', 'Cabbage', 'Capsicum', 'Carrot', 
         'Cassava', 'Cauliflower', 'Chayote', 'Cheese', 
         'Chicken', 'Chicken Gizzard', 'Chickpea', 'Chili Pepper', 
         'Chili Powder', 'Chow mein', 'Cinnamon', 'Coriander', 
         'Corn', 'Cornflake', 'Crab Meat', 'Cucumber', 'Egg', 'Pumpkin shoot', 
         'Fiddlehead Fern', 'Fish', 'Garden Pea', 'Garden cress', 
         'Garlic', 'Ginger', 'Brinjal', 'Green Lentil', 'Mint', 
         'Pea', 'Edamame', 'Gundruk', 'Ham', 'Ice', 
         'Jackfruit', 'Ketchup', 'laapsi', 'Lemon', 
         'Lime', 'Long Bean', 'Masyaura', 'Milk', 'Minced Meat', 
         'Moringa Leaves', 'Mushroom', 'Mutton', 'Nutrela Soya Chunk', 
         'Okra', 'Olive Oil', 'Onion', 'Green Onion', 'Orange', 
         'Spinach', 'Spinach', 'Paneer', 
         'Papaya', 'Pea', 'Pear', 'Pointed Gourd', 'Pork', 
         'Potato', 'Pumpkin', 'Radish', 'Pigeon pea', 'Mustard Green', 
         'Red Bean', 'Red Lentil', 'Rice', 'Moringa Drumstick', 
         'Salt', 'Sausage', 'Snake Gourd', 'Soy Sauce', 'Soybean', 
         'Luffa', 'Stinging Nettle', 'Strawberry', 'Sugar', 
         'Sweet Potato', 'Taro', 'Taro Root', 
         'Thukpa Noodles', 'Tofu', 'Tomato', 'Tori ko Saag', 'Tamarillo', 
         'Turnip', 'Walnut', 'Watermelon', 'Wheat', 'Yellow Lentil', 'kimchi', 'mayonnaise', 'noodle', 'seaweed']

In [101]:
for word in words:
    if not df.ingredients.str.contains(word.lower()).any():
        print(word)

Dalle
Bethu ko Saag
Calabash
Cornflake
Pumpkin shoot
Garden Pea
Gundruk
laapsi
Masyaura
Moringa Leaves
Nutrela Soya Chunk
Pointed Gourd
Moringa Drumstick
Snake Gourd
Luffa
Stinging Nettle
Thukpa Noodles
Tori ko Saag
