In [1]:
import os
import spacy
import polars as pl
import mlflow
import mlflow.spacy
from pathlib import Path
import json
from spacy.util import minibatch, compounding
from spacy.cli.train import train
from dotenv import load_dotenv
import sys
sys.path.insert(0, os.path.abspath('..'))
sys.path.insert(0, os.path.abspath('../..'))
from app.data_handling.NER_parquet import process_ingredient, transform_ingredients_to_tokens, transform_ds_to_tokens, construct_ingredient_query

# Create Dataframe and save as parquet

In [2]:
load_dotenv()

True

In [3]:
mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URL"))

In [4]:
model_uri = "models:/recipe_NER@prod"
ner = mlflow.spacy.load_model(model_uri=model_uri)


  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|████████████████████████████████████████████████████████| 17/17 [00:03<00:00,  4.87it/s]


In [5]:
type(ner)

spacy.lang.en.English

In [6]:
nlg_ds = pl.read_csv('../../data/recipenlg/RecipeNLG_dataset.csv',  n_rows=1000)
nlg_ds = nlg_ds.with_columns(pl.col("ingredients").str.json_decode())
nlg_ds = nlg_ds.with_columns(pl.col("directions").str.json_decode())
nlg_ds = nlg_ds.with_columns(pl.arange(pl.len()).alias("index"))

In [7]:
nlg_ds.head()

Unnamed: 0_level_0,title,ingredients,directions,link,source,NER,index
i64,str,list[str],list[str],str,str,str,i64
0,"""No-Bake Nut Cookies""","[""1 c. firmly packed brown sugar"", ""1/2 c. evaporated milk"", … ""3 1/2 c. bite size shredded rice biscuits""]","[""In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine."", ""Stir over medium heat until mixture bubbles all over top."", … ""Let stand until firm, about 30 minutes.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""brown sugar"", ""milk"", ""vanil…",0
1,"""Jewell Ball'S Chicken""","[""1 small jar chipped beef, cut up"", ""4 boned chicken breasts"", … ""1 carton sour cream""]","[""Place chipped beef on bottom of baking dish."", ""Place chicken on top of beef."", ""Mix soup and cream together; pour over chicken. Bake, uncovered, at 275° for 3 hours.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""beef"", ""chicken breasts"", ""c…",1
2,"""Creamy Corn""","[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg. cream cheese, cubed"", … ""1/4 tsp. pepper""]","[""In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours or until heated through and cheese is melted. Stir well before serving. Yields 6 servings.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""frozen corn"", ""cream cheese""…",2
3,"""Chicken Funny""","[""1 large whole chicken"", ""2 (10 1/2 oz.) cans chicken gravy"", … ""4 oz. shredded cheese""]","[""Boil and debone chicken."", ""Put bite size pieces in average size square casserole dish."", … ""Sprinkle shredded cheese on top and bake at 350° for approximately 20 minutes or until golden and bubbly.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""chicken"", ""chicken gravy"", ""…",3
4,"""Reeses Cups(Candy) ""","[""1 c. peanut butter"", ""3/4 c. graham cracker crumbs"", … ""1 large pkg. chocolate chips""]","[""Combine first four ingredients and press in 13 x 9-inch ungreased pan."", ""Melt chocolate chips and spread over mixture. Refrigerate for about 20 minutes and cut into pieces before chocolate gets hard."", ""Keep in refrigerator.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""peanut butter"", ""graham crac…",4


In [8]:
nlg_ds.shape

(1000, 8)

In [9]:
nlg_ds = transform_ds_to_tokens(nlg_ds, ner, create_vocab=True)

In [10]:
nlg_ds.write_parquet('test.parquet')

# Test some queries using DuckDB and parquet

In [11]:
import duckdb

In [12]:
duckdb.sql("SELECT * FROM 'test.parquet' AS tbl WHERE list_contains(tbl.tokens, 'egg')").pl()

C0,title,ingredients,directions,link,source,NER,index,tokens,preps,optionals
i64,str,list[str],list[str],str,str,str,i64,list[str],str,list[bool]
6,"""Rhubarb Coffee Cake""","[""1 1/2 c. sugar"", ""1/2 c. butter"", … ""1 tsp. vanilla""]","[""Cream sugar and butter."", ""Add egg and beat well."", … ""Pour into greased 9 x 13-inch pan and add Topping.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""sugar"", ""butter"", ""egg"", ""bu…",6,"[""sugar"", ""butter"", … ""rhubarb""]","""{""rhubarb"": [""finely"", ""cut""]}""","[false, false, … false]"
7,"""Scalloped Corn""","[""1 can cream-style corn"", ""1 can whole kernel corn"", … ""pepper to taste""]","[""Mix together both cans of corn, crackers, egg, 2 teaspoons of melted butter and pepper and place in a buttered baking dish."", ""Dot with remaining 4 teaspoons of butter."", ""Bake at 350° for 1 hour.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""cream-style corn"", ""whole ke…",7,"[""cream_-_style"", ""corn"", … ""pepper""]","""{""saltine_cracker"": [""crush""],…","[false, false, … false]"
20,"""Grandma Hanrath'S Banana Bread…","[""1 c. sugar"", ""1/2 c. shortening"", … ""2 over-ripe bananas (chopped)""]","[""Cream sugar and shortening."", ""Add eggs, salt and soda, then bananas and flour."", … ""Bake in loaf pan at 350° for 1 hour.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""sugar"", ""shortening"", ""eggs""…",20,"[""sugar"", ""egg"", … ""banana""]","""{""banana"": [""chop""]}""","[false, false, … false]"
21,"""Chocolate Frango Mints""","[""1 pkg. devil's food cake mix"", ""1 pkg. chocolate fudge pudding mix (instant)"", … ""6 oz. chopped Frango mints""]","[""Mix ingredients together for 5 minutes."", ""Scrape bowl often. Last fold in chocolate chip mints."", ""Bake at 350° for 35 to 40 minutes or until done (cake mix directions).""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""cake mix"", ""chocolate fudge …",21,"[""cake_mix"", ""sour_cream"", ""egg""]","""{}""","[false, false, false]"
24,"""Prize-Winning Meat Loaf""","[""1 1/2 lb. ground beef"", ""1 c. tomato juice"", … ""1 1/2 tsp. salt""]","[""Mix well."", ""Press firmly into an 8 1/2 x 4 1/2 x 2 1/2-inch loaf pan."", … ""Makes 8 servings.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""ground beef"", ""tomato juice""…",24,"[""beef"", ""tomato_juice"", … ""salt""]","""{""beef"": [""ground""], ""egg"": [""…","[false, false, … false]"
…,…,…,…,…,…,…,…,…,…,…
987,"""French Toast And Sauce""","[""4 eggs, beaten"", ""1/2 c. milk"", … ""8 slices day-old bread""]","[""In a wide shallow bowl, beat 1st four ingredients. For each slice, melt 1 tablespoon of butter in skillet. Dip bread in egg mixture; turn and coat the other side. Cook slices in butter, browning on both sides. Serve with Sauce as follows.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""eggs"", ""milk"", ""frozen lemon…",987,"[""egg"", ""milk"", … ""bread""]","""{""egg"": [""beat""]}""","[false, false, … false]"
988,"""Spaghetti Pie""","[""6 oz. spaghetti"", ""2 Tbsp. olive oil"", … ""1/2 c. shredded Mozzarella (2 oz.)""]","[""Cook spaghetti (about 3 cups)."", ""Stir olive oil into hot spaghetti."", … ""Form spaghetti mixture into a ""crust"" in a 10-inch pie plate sprayed with Pam. Spread cottage cheese over bottom of spaghetti crust.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""spaghetti"", ""olive oil"", ""Pa…",988,"[""olive_oil"", ""parmesan_cheese"", … ""mozzarella""]","""{""parmesan_cheese"": [""grate""],…","[false, false, … false]"
991,"""Favorite Chocolate Cake""","[""1 3/4 c. flour"", ""2 eggs"", … ""1 c. buttermilk""]","[""Sift all dry ingredients in a large mixing bowl."", ""Stir."", … ""Tube pan takes 40 minutes.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""flour"", ""eggs"", ""baking powd…",991,"[""egg"", ""bake_powder"", … ""buttermilk""]","""{}""","[false, false, … false]"
992,"""Blue Muffins""","[""1 egg"", ""1 c. milk"", … ""2 c. blueberries""]","[""Mix thoroughly."", ""Pour into muffin pans and bake at 350°.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""egg"", ""milk"", ""shortening"", …",992,"[""egg"", ""milk"", … ""blueberry""]","""{}""","[false, false, … false]"


In [13]:
duckdb.sql("COPY(SELECT preps::JSON AS preps, * EXCLUDE (preps) FROM 'test.parquet') TO 'test.parquet' (FORMAT PARQUET, OVERWRITE TRUE)")

In [14]:
ingredients = ['butter']
preps = {'butter': ['cube'], 'cream_cheese': ['cube']}

In [15]:
list(preps.keys())

['butter', 'cream_cheese']

In [16]:
duckdb.sql(construct_ingredient_query('test.parquet', ingredients, preps)).pl()

preps,C0,title,ingredients,directions,link,source,NER,index,tokens,optionals,prep_lists
str,i64,str,list[str],list[str],str,str,str,i64,list[str],list[bool],list[str]
"""{""cream_cheese"": [""cube""], ""bu…",2,"""Creamy Corn""","[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg. cream cheese, cubed"", … ""1/4 tsp. pepper""]","[""In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours or until heated through and cheese is melted. Stir well before serving. Yields 6 servings.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""frozen corn"", ""cream cheese""…",2,"[""corn"", ""cream_cheese"", … ""pepper""]","[false, false, … false]","[""[""cube""]"", ""[""cube""]""]"


In [17]:
ingredients = ['almond']
preps = {}
sql = construct_ingredient_query('test.parquet', ingredients, preps)
results = duckdb.sql(sql).pl()
print("###############################")
print("####### WITHOUT PREPS #########")
print("###############################")
for title, ingr in results['title', 'ingredients'].iter_rows():
    print()
    print(f"---- {title} ----")
    print("\n".join(ingr))
    
ingredients = ['almond']
preps = {'almond': ['chop']}
sql = construct_ingredient_query('test.parquet', ingredients, preps)
results = duckdb.sql(sql).pl()
print("##############################")
print("######## WITH PREPS ##########")
print("##############################")
for title, ingr in results['title', 'ingredients'].iter_rows():
    print()
    print(f"---- {title} ----")
    print("\n".join(ingr))

###############################
####### WITHOUT PREPS #########
###############################

---- Honey Almond Squares ----
1 Duncan Hines white pudding cake mix
1 (8 oz.) pkg. cream cheese (at room temperature)
1/4 c. alfalfa honey
3 to 4 oz. sliced almonds

---- Caramel Pie ----
16 oz. Cool Whip
2 c. coconut
3/4 margarine, melted
1 c. slivered almonds, chopped fine
1 can Eagle Brand milk
3 graham cracker crusts
8 oz. cream cheese
1 jar caramel topping

---- Almond Roco ----
1 lb. butter (not margarine)
2 c. granulated sugar
1/2 c. water
1/2 c. chopped almonds
3 chocolate candy bars
1/4 c. chopped almonds or as desired (for topping)

---- Crabmeat Au Gratin ----
1 can cream of shrimp soup
1/2 can milk (use soup can)
1 lb. faux crabmeat
4 c. celery, chopped
1/2 c. green pepper, chopped
2 pimentos, chopped
2 Tbsp. onions, chopped
1/3 c. slivered almonds
4 hard-cooked eggs, chopped
1 c. shredded sharp Cheddar cheese
buttered bread crumbs

---- Fresh Broccoli Salad ----
2 bunches fres