In [1]:
import pandas as pd
import spacy
import os
import spacy
import mlflow
import mlflow.spacy
from pathlib import Path
import json
from spacy.util import minibatch, compounding
from spacy.cli.train import train
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URL"))
mlflow.set_experiment("jupyter_NER")

<Experiment: artifact_location='mlflow-artifacts:/466776196716148841', creation_time=1741610247334, experiment_id='466776196716148841', last_update_time=1741610247334, lifecycle_stage='active', name='jupyter_NER', tags={}>

In [4]:
model_uri = "models:/recipe_NER@prod"
ner = mlflow.spacy.load_model(model_uri=model_uri)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|████████████████████████████████████████████████████████| 17/17 [00:09<00:00,  1.86it/s]


In [5]:
nlp = spacy.load('en_core_web_sm', disable=['ner', 'parser'])

In [142]:
nlg_ds = pd.read_csv('../data/recipenlg/RecipeNLG_dataset.csv', converters={'ingredients': pd.eval}, nrows=10)

In [84]:
nlg_ds['ingredients'][0]

['1 c. firmly packed brown sugar',
 '1/2 c. evaporated milk',
 '1/2 tsp. vanilla',
 '1/2 c. broken nuts (pecans)',
 '2 Tbsp. butter or margarine',
 '3 1/2 c. bite size shredded rice biscuits']

In [8]:
out = [ner(line) for line in nlg_ds['ingredients'][5]]

In [31]:
def process_ingredient(ingredient_doc: spacy.tokens.doc.Doc, include_variety: bool = False):
    food_lemmas = []
    prep_lemmas = [[]]
    optional = False
    if include_variety:
        var_lemmas = [[]]
    for t in ingredient_doc.ents:
        if t.label_ == 'Food':
            lemmas = [w.lemma_.lower() for w in nlp(str(t))]
            food_lemmas.append(lemmas)
            if len(prep_lemmas[-1]) > 0:
                prep_lemmas.append([])  # Add a new list of preparation that corresponds to a new food item on the same line
            if include_variety and len(var_lemmas[-1]) > 0:
                var_lemmas.append([]) 
        elif t.label_ == 'Preparation':
            lemmas = [w.lemma_.lower() for w in nlp(str(t))]
            prep_lemmas[-1] = prep_lemmas[-1] + lemmas
        elif include_variety and t.label_ == 'Variety':
            lemmas = [w.lemma_.lower() for w in nlp(str(t))]
            var_lemmas[-1] = var_lemmas[-1] + lemmas
        elif t.label_ == "Optional":
            optional = True
    ret_obj =  {'food': food_lemmas,
                'prep': prep_lemmas,
                'optional': optional}
    if include_variety:
        ret_obj['var': var_lemmas]
    return ret_obj

In [32]:
print('\n'.join([str(o) for o in out]))
for ingredient_line in out:
    process_obj = process_ingredient(ingredient_line)
    food_lemmas = process_obj['food']
    prep_lemmas = process_obj['prep']
    print('----')
    print(food_lemmas)
    print(prep_lemmas)


6 baking potatoes
1 lb. of extra lean ground beef
2/3 c. butter or margarine
6 c. milk
3/4 tsp. salt
1/2 tsp. pepper
1 1/2 c (6 oz.) shredded Cheddar cheese, divided
12 sliced bacon, cooked, crumbled and divided
4 green onion, chopped and divided
1 (8 oz.) carton sour cream (optional)
----
[['bake', 'potato']]
[[]]
----
[['beef']]
[['ground'], []]
----
[['butter']]
[[]]
----
[['milk']]
[[]]
----
[['salt']]
[[]]
----
[['pepper']]
[[]]
----
[['cheddar', 'cheese']]
[['shred'], []]
----
[['bacon']]
[['slice'], ['cook', 'crumble']]
----
[['green', 'onion']]
[['chop']]
----
[['sour', 'cream']]
[[]]


In [54]:
vocab = set()

In [134]:
def transform_ingredients_to_tokens(ingredients: list, create_vocab=False):  # Assume recipeNLG dataset, where each ingredient line is a list item
    ner_lines = [ner(ingredient_line) for ingredient_line in ingredients]
    foods = []
    preparations = {}
    optionals = []
    for line in ner_lines:
        processed_line_obj = process_ingredient(line)
        if len(processed_line_obj['food']) == 0: # No food found, continue
            continue
        food = "_".join(processed_line_obj['food'][0])  # Only take first food item in a line
        foods.append(food)
        if len(processed_line_obj['food']) == 1:  # If there's only one food, all prep would belong to that food
            preparation_items = [item for sublist in processed_line_obj['prep'] for item in sublist]
        else:  #Otherwise, we assume only the first list of preparations corresponds to the first food
            preparation_items = processed_line_obj['prep'][0]
        if len(preparation_items) > 0:
            preparations[food] = preparation_items
        optionals.append(processed_line_obj['optional'])
        if create_vocab:
            global vocab
            vocab.add(food)
    datapoint_obj = {
        'foods': foods,
        'preps': preparations,
        'optionals': optionals
        }
    return datapoint_obj


In [145]:
def transform_ds_to_BOW(ds: pd.core.frame.DataFrame, create_vocab=False):  # Assume recipeNLG dataset, where each ingredient line is a list item
    ds['BOW_tokens'] = ""
    ds['BOW_tokens'] = ds['BOW_tokens'].astype('object')
    ds['preps'] = ""
    ds['optionals'] = ""
    ds['optionals'] = ds['optionals'].astype('object')
    for i, ingredients in enumerate(ds['ingredients']):
        datapoint_obj = transform_ingredients_to_tokens(ingredients, create_vocab)
        ds.at[i, 'BOW_tokens'] = datapoint_obj['foods']
        ds.at[i, 'preps'] = datapoint_obj['preps']
        ds.at[i, 'optionals'] = datapoint_obj['optionals']

In [147]:
transform_ds_to_BOW(nlg_ds, create_vocab=True)

In [152]:
nlg_ds['BOW_tokens'][1]

['beef', 'chicken_breast', 'cream_of_mushroom_soup', 'sour_cream']

In [149]:
nlg_ds

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER,BOW_tokens,preps,optionals
0,0,No-Bake Nut Cookies,"[1 c. firmly packed brown sugar, 1/2 c. evapor...","[""In a heavy 2-quart saucepan, mix brown sugar...",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,"[""brown sugar"", ""milk"", ""vanilla"", ""nuts"", ""bu...","[brown_sugar, milk, nut, butter]",{},"[False, False, False, False]"
1,1,Jewell Ball'S Chicken,"[1 small jar chipped beef, cut up, 4 boned chi...","[""Place chipped beef on bottom of baking dish....",www.cookbooks.com/Recipe-Details.aspx?id=699419,Gathered,"[""beef"", ""chicken breasts"", ""cream of mushroom...","[beef, chicken_breast, cream_of_mushroom_soup,...","{'beef': ['chip', 'cut', 'up']}","[False, False, False, False]"
2,2,Creamy Corn,"[2 (16 oz.) pkg. frozen corn, 1 (8 oz.) pkg. c...","[""In a slow cooker, combine all ingredients. C...",www.cookbooks.com/Recipe-Details.aspx?id=10570,Gathered,"[""frozen corn"", ""cream cheese"", ""butter"", ""gar...","[corn, cream_cheese, butter, garlic_powder, sa...","{'cream_cheese': ['cube'], 'butter': ['cube']}","[False, False, False, False, False, False]"
3,3,Chicken Funny,"[1 large whole chicken, 2 (10 1/2 oz.) cans ch...","[""Boil and debone chicken."", ""Put bite size pi...",www.cookbooks.com/Recipe-Details.aspx?id=897570,Gathered,"[""chicken"", ""chicken gravy"", ""cream of mushroo...","[chicken, chicken_gravy, cream_of_mushroom_soup]",{},"[False, False, False]"
4,4,Reeses Cups(Candy),"[1 c. peanut butter, 3/4 c. graham cracker cru...","[""Combine first four ingredients and press in ...",www.cookbooks.com/Recipe-Details.aspx?id=659239,Gathered,"[""peanut butter"", ""graham cracker crumbs"", ""bu...","[peanut_butter, graham_cracker_crumb, butter, ...",{},"[False, False, False, False, False]"
5,5,Cheeseburger Potato Soup,"[6 baking potatoes, 1 lb. of extra lean ground...","[""Wash potatoes; prick several times with a fo...",www.cookbooks.com/Recipe-Details.aspx?id=20115,Gathered,"[""baking potatoes"", ""extra lean ground beef"", ...","[bake_potato, beef, butter, milk, salt, pepper...","{'beef': ['ground'], 'cheddar_cheese': ['shred...","[False, False, False, False, False, False, Fal..."
6,6,Rhubarb Coffee Cake,"[1 1/2 c. sugar, 1/2 c. butter, 1 egg, 1 c. bu...","[""Cream sugar and butter."", ""Add egg and beat ...",www.cookbooks.com/Recipe-Details.aspx?id=210288,Gathered,"[""sugar"", ""butter"", ""egg"", ""buttermilk"", ""flou...","[sugar, butter, egg, buttermilk, flour, salt, ...","{'rhubarb': ['finely', 'cut']}","[False, False, False, False, False, False, Fal..."
7,7,Scalloped Corn,"[1 can cream-style corn, 1 can whole kernel co...","[""Mix together both cans of corn, crackers, eg...",www.cookbooks.com/Recipe-Details.aspx?id=876969,Gathered,"[""cream-style corn"", ""whole kernel corn"", ""cra...","[cream_-_style, corn, saltine_cracker, egg, bu...","{'saltine_cracker': ['crush'], 'egg': ['beat']}","[False, False, False, False, False, False]"
8,8,Nolan'S Pepper Steak,"[1 1/2 lb. round steak (1-inch thick), cut int...","[""Roll steak strips in flour."", ""Brown in skil...",www.cookbooks.com/Recipe-Details.aspx?id=375254,Gathered,"[""tomatoes"", ""water"", ""onions"", ""Worcestershir...","[tomato, worcestershire_sauce, green_pepper]","{'tomato': ['drain', 'cut', 'up'], 'green_pepp...","[False, False, False]"
9,9,Millionaire Pie,"[1 large container Cool Whip, 1 large can crus...","[""Empty Cool Whip into a bowl."", ""Drain juice ...",www.cookbooks.com/Recipe-Details.aspx?id=794547,Gathered,"[""pineapple"", ""condensed milk"", ""lemons"", ""pec...","[pineapple, condense_milk, pecan, graham_crack...",{'pineapple': ['crush']},"[False, False, False, False]"
