# | default_exp Testing Polars for EDA, migrating cleaned data to Vespa

In [None]:
# | hide
import adbc_driver_postgresql.dbapi
from datetime import datetime
from enum import Enum
import json
import polars as pl
import requests
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import vespa

1. Migrate data from postgres to vespa
   a. Need to do some data cleaning on the cuisines since there's typos
   Think postgres database can be a rawer form of the scrapes

In [None]:
# make sure postgres is running!
# load postgres credentials
postgres_key_path = "../secrets/postgres_login.json"
with open(postgres_key_path, "r") as fo:
    postgres_key = json.loads(fo.read())
    user = postgres_key["user"]
    password = postgres_key["password"]
    host = postgres_key["host"]


In [None]:
# try conncting to Postgresql with ADBC
uri = f"postgresql://{user}:{password}@{host}/mealeon"
conn = adbc_driver_postgresql.dbapi.connect(uri)
with conn.cursor() as cur:
    cur.execute("SELECT * FROM recipe_scrapes LIMIT 2")
    print(cur.fetchone())

('AfricanBites-3f1a4fc7e099375adc6f06fbf4389396c3ad5bdfe4661a1980f0ee1143fe0317', 'English', '629883', 'Smoked Spatchcock Turkey', 'AfricanBites', 'https://www.africanbites.com/smoked-spatchcock-turkey/', ['salt and pepper for seasoning', '1 15-pound turkey', '4 tablespoons your choice of turkey rub (Creole, Italian, poultry, or your choice of seasoning)'], 'https://www.africanbites.com/wp-content/uploads/2021/08/IMG_9551-Copy-650x650.jpg', "Enjoy fall-off-the-bone goodness with flavorful juiciness in every bite. Simple smoking delivers flavor penetrating deep into your holiday bird without drying it out. This is your go-to guide for a deliciously tender and juicy turkey that's perfect any time of year!", ['Remove the giblet package.', 'Use your handy kitchen shears to cut the left side of the backbone from the tail to the neck. Do the same to the right side of the backbone. (A sharp knife will work as well.)', 'Flip the turkey over and press on it to break the breast bone and flatten 

In [None]:
# first test, counting number of recipes from each site

query = """
    SELECT
        origin
        , COUNT(DISTINCT mealeon_id) AS num_recipes
    FROM recipe_scrapes
    GROUP BY origin
"""

df = pl.read_database_uri(query, uri, engine="adbc")

df.head()

origin,num_recipes
str,i64
"""AfricanBites""",1491
"""AllRecipes""",4781
"""Panlasang_Pinoy""",1938


In [None]:
# second test: unnesting the cuisine array and counting recipes per cuisine

query = """
    SELECT
        UNNEST(cuisines) AS cuisine
        , COUNT(DISTINCT mealeon_id) AS num_recipes
    FROM recipe_scrapes
    GROUP BY cuisine
"""

df = pl.read_database_uri(query, uri, engine="adbc")

df

cuisine,num_recipes
str,i64
"""AFRICAN CARIBBEAN""",6
"""African""",448
"""African-American""",1
"""African/Carribean""",3
"""American""",638
…,…
"""World""",2
"""Zealander""",89
"""dinner""",1
"""soul""",9


In [None]:
with pl.Config(tbl_rows = -1):
    print(df)

shape: (61, 2)
┌───────────────────┬─────────────┐
│ cuisine           ┆ num_recipes │
│ ---               ┆ ---         │
│ str               ┆ i64         │
╞═══════════════════╪═════════════╡
│ AFRICAN CARIBBEAN ┆ 6           │
│ African           ┆ 448         │
│ African-American  ┆ 1           │
│ African/Carribean ┆ 3           │
│ American          ┆ 638         │
│ Argentinian       ┆ 1           │
│ Asian             ┆ 29          │
│ Australian        ┆ 90          │
│ Brazilian         ┆ 4           │
│ British           ┆ 18          │
│ Cajun             ┆ 11          │
│ Canadian          ┆ 446         │
│ Caribbean         ┆ 119         │
│ Carribean         ┆ 3           │
│ Chinese           ┆ 331         │
│ Creole            ┆ 6           │
│ Cuban             ┆ 3           │
│ Danish            ┆ 2           │
│ English           ┆ 10          │
│ Ethiopian         ┆ 2           │
│ European          ┆ 8           │
│ Filipino          ┆ 1940        │
│ French     

In [None]:
rename_cuisines = {
    "AFRICAN CARIBBEAN": "African-Caribbean",
    "African/Carribean": "African-Caribbean",
    "Carribean": "Caribbean",
    "Southern": "US Southern", 
    "soul": "Soul",
    "thanksgiving": "American",
    "dinner": "American",
    "New": "Fusion"
}

# should World and International fall under Fusion?
# creation of super sets? ie: British has English, Irish, Scottish, Welsh. US Southern has Soul

In [None]:
df.filter(pl.col('cuisine') == "dinner")

cuisine,num_recipes
str,i64
"""dinner""",1


In [None]:
df.filter(pl.col('cuisine') == "World")

cuisine,num_recipes
str,i64
"""World""",2


In [None]:
query = """
    SELECT
        mealeon_id
        , title
        , UNNEST(cuisines) AS cuisine
    FROM recipe_scrapes
"""

whole_df = pl.read_database_uri(query, uri, engine="adbc")

whole_df.filter(pl.col('cuisine') == 'dinner')

mealeon_id,title,cuisine
str,str,str
"""AfricanBites-d7711d944dfac2608…","""Smoked Beer Can Chicken""","""dinner"""


In [None]:
# title "Smoked Beer Can Chicken" should become American

In [None]:
whole_df.filter(pl.col('cuisine') == 'World')

mealeon_id,title,cuisine
str,str,str
"""AfricanBites-f6d10c8a7661af70d…","""How to Cook Jasmine Rice""","""World"""
"""AfricanBites-37edc9ea46a786a3b…","""How to Debone Chicken Thighs""","""World"""


In [None]:
whole_df.filter(pl.col('cuisine') == 'thanksgiving')

mealeon_id,title,cuisine
str,str,str
"""AfricanBites-95b6d113f5e590a53…","""Smoked Turkey Legs""","""thanksgiving"""
"""AfricanBites-e94c35b5d5d62c1da…","""Smoked Turkey Breast""","""thanksgiving"""


In [None]:
whole_df.filter(pl.col('cuisine') == 'soul')

mealeon_id,title,cuisine
str,str,str
"""AfricanBites-f3e0779d8b9fa3c85…","""Chitterlings (Chitlins)""","""soul"""
"""AfricanBites-00073465322187887…","""Hog Maw""","""soul"""
"""AfricanBites-1bc8aa91a96c82547…","""Crispy Pork Jowl""","""soul"""
"""AfricanBites-c75aa5fb1e5a28aed…","""Ham Casserole""","""soul"""
"""AfricanBites-0076e7f4c79efd1e1…","""Pickled Pigs’ Feet""","""soul"""
"""AfricanBites-37a94e9f4db8d1dc3…","""Sweet Potato Biscuits""","""soul"""
"""AfricanBites-6f81180e7903637b2…","""Chicken Corn Chowder""","""soul"""
"""AfricanBites-4e1d69c66f031e599…","""Hoppin John""","""soul"""
"""AfricanBites-8761b8303d6685612…","""Air-Fryer Pork Chops""","""soul"""


In [None]:
whole_df.filter(pl.col('cuisine') == 'New')

mealeon_id,title,cuisine
str,str,str
"""AllRecipes-af169286bdd939a9e5e…","""Kiwi Fruit Salsa""","""New"""
"""AllRecipes-3440eb9d648fa5bea9a…","""Yummy Pikelets""","""New"""
"""AllRecipes-11d30e76f14355834af…","""Shearers' Mince and Potato Hot…","""New"""
"""AllRecipes-e7642daa54b6e84c683…","""Dad's New Zealand Mince Stew""","""New"""
"""AllRecipes-7c91bff5430c2a2c412…","""Barbequed Thai Style Chicken""","""New"""
…,…,…
"""AllRecipes-1a4a1cafe5f2f5e9ee8…","""Keligun Chicken""","""New"""
"""AllRecipes-9f5d8a8347394ba3782…","""Skite Cake""","""New"""
"""AllRecipes-467c30863817f7dd199…","""Turkey Stir Fry with Lychees""","""New"""
"""AllRecipes-d72d9be6ebb2a8f01c3…","""Hamburger Sarah Style""","""New"""


In [None]:
with pl.Config(tbl_rows = -1):
    print(whole_df.filter(pl.col('cuisine') == 'Missing Cuisine'))

shape: (12, 3)
┌─────────────────────────────────┬──────────────────────────────┬─────────────────┐
│ mealeon_id                      ┆ title                        ┆ cuisine         │
│ ---                             ┆ ---                          ┆ ---             │
│ str                             ┆ str                          ┆ str             │
╞═════════════════════════════════╪══════════════════════════════╪═════════════════╡
│ AfricanBites-4f7db24458bd4cf96… ┆ Ground Beef Recipes          ┆ Missing Cuisine │
│ AfricanBites-011af919cbbc71c72… ┆ Oven-Roasted Corn on the Cob ┆ Missing Cuisine │
│ AfricanBites-bd03d540ad197d9c9… ┆ Beef Brisket (Slow Cooker)   ┆ Missing Cuisine │
│ AfricanBites-6b9823a01cc0514dd… ┆ Chicken Sharwama             ┆ Missing Cuisine │
│ AfricanBites-be3b1e83cfe0a6c94… ┆ Pizza Rolls                  ┆ Missing Cuisine │
│ AfricanBites-6fa7523526e58e30b… ┆ How To Boil Eggs             ┆ Missing Cuisine │
│ AfricanBites-dca52946f68474c6e… ┆ Steak Dinner M

In [None]:
# leave Missing Cuisine as is, could be used as test data?

# now that we unpacked the cuisines, we actually want to fix some of these inconstencies

In [None]:
rename_cuisines = {
    "AFRICAN CARIBBEAN": "African-Caribbean",
    "African/Carribean": "African-Caribbean",
    "Carribean": "Caribbean",
    "Southern": "US Southern", 
    "soul": "Soul",
    "thanksgiving": "American",
    "dinner": "American",
    "New": "Fusion"
}

whole_df = whole_df.with_columns(replaced=pl.col("cuisine").replace(rename_cuisines))

In [None]:
whole_df

mealeon_id,title,cuisine,replaced
str,str,str,str
"""AfricanBites-3f1a4fc7e099375ad…","""Smoked Spatchcock Turkey""","""Southern""","""US Southern"""
"""AfricanBites-ad9870f6689604624…","""How to Brine a Turkey""","""American""","""American"""
"""AfricanBites-9b119ba4403faa097…","""Refried Beans""","""Mexican""","""Mexican"""
"""AfricanBites-378e4cd9d9469919c…","""Lemon Blueberry Scones""","""American""","""American"""
"""AfricanBites-378e4cd9d9469919c…","""Lemon Blueberry Scones""","""British""","""British"""
…,…,…,…
"""AllRecipes-61175d60f820650b5ba…","""Indian Vegetarian Curried Caul…","""Indian""","""Indian"""
"""AllRecipes-d1a8c59c8b37c79b1be…","""Curry Orange Swai""","""Indian""","""Indian"""
"""AllRecipes-6936625d8919fa0d2aa…","""Fruited Tofu Curry Salad""","""Indian""","""Indian"""
"""AllRecipes-5a5d70c42381defe6c0…","""Taste of India Roasted Root Ve…","""Indian""","""Indian"""


In [None]:
with pl.Config(tbl_rows = -1):
    q = (
        whole_df.lazy()
        .group_by("replaced")
        .agg(
            pl.len().alias("recipe_count"),
            pl.col("cuisine", "title"),
        )
        .with_columns(pl.col("cuisine").list.unique().alias("unique_cuisines"), 
            )
        .sort("recipe_count", descending=True)
    )

    temp = q.collect()
    print(temp.select(pl.col('replaced', 'unique_cuisines')))

shape: (56, 2)
┌───────────────────┬─────────────────────────────────┐
│ replaced          ┆ unique_cuisines                 │
│ ---               ┆ ---                             │
│ str               ┆ list[str]                       │
╞═══════════════════╪═════════════════════════════════╡
│ Mexican           ┆ ["Mexican"]                     │
│ Filipino          ┆ ["Filipino"]                    │
│ Indian            ┆ ["Indian"]                      │
│ American          ┆ ["dinner", "American", "thanks… │
│ African           ┆ ["African"]                     │
│ Canadian          ┆ ["Canadian"]                    │
│ French            ┆ ["French"]                      │
│ Chinese           ┆ ["Chinese"]                     │
│ US Southern       ┆ ["Southern"]                    │
│ Fusion            ┆ ["Fusion", "New"]               │
│ Caribbean         ┆ ["Caribbean", "Carribean"]      │
│ Australian        ┆ ["Australian"]                  │
│ Zealander         ┆ ["Zealander

In [None]:
# want to see how many recipes have more than one cuisine
# look in replaced, count the number in replaced, order by count descending

with pl.Config(tbl_rows = -1):
    q = (
        whole_df.lazy()
        # .group_by("replaced")
        # .agg(
        #     pl.len().alias("recipe_count"),
        #     pl.col("cuisine", "title"),
        # )
        .with_columns(pl.col("replaced").list.len().alias("num_cuisines"), 
            )
        .sort("num_cuisines", descending=True)
    )

    temp = q.collect()
    print(temp)

SchemaError: invalid series dtype: expected `List`, got `str` for series with name `replaced`

In [None]:
with pl.Config(tbl_rows = -1):
    q = (
        whole_df.lazy()
        # .group_by("replaced")
        # .agg(
        #     pl.len().alias("recipe_count"),
        #     pl.col("cuisine", "title"),
        # )
        .with_columns(pl.col("cuisine").list.len().alias("num_cuisines"), 
            )
        .sort("num_cuisines", descending=True)
    )

    temp = q.collect()
    print(temp)

SchemaError: invalid series dtype: expected `List`, got `str` for series with name `cuisine`

Unclear if there are actually multiple cuisines in some recipes or not. Scraper should've allowed that behavior since it's a list of strings, but let's look in postgres

In [None]:
# unnesting the cuisine array and counting cuisines per recipe

query = """
    -- group recipe counts by cuisines counts
    SELECT
        num_cuisines
        , COUNT(DISTINCT mealeon_id) AS num_recipes
    FROM (
        -- unnest the cuisine array and count number of cuisines per recipe
        SELECT
            mealeon_id
            , CARDINALITY(cuisines) AS num_cuisines
        FROM recipe_scrapes
        GROUP BY mealeon_id
    ) cuisine_count
    GROUP BY num_cuisines
    ORDER BY num_cuisines DESC
"""

cuisine_counter_df = pl.read_database_uri(query, uri, engine="adbc")

cuisine_counter_df

num_cuisines,num_recipes
i32,i64
,1
4.0,3
3.0,107
2.0,202
1.0,7897


In [None]:
# unnesting the cuisine array and counting cuisines per recipe

query = """
    -- group recipe counts by cuisines counts
    SELECT
        *
    FROM (
        -- unnest the cuisine array and count number of cuisines per recipe
        SELECT
            *
            , CARDINALITY(cuisines) AS num_cuisines
        FROM recipe_scrapes
        GROUP BY mealeon_id
    ) cuisine_count
    WHERE num_cuisines > 1 OR num_cuisines IS NULL
"""

cuisine_counter_df = pl.read_database_uri(query, uri, engine="adbc")

cuisine_counter_df

mealeon_id,language,source_id,title,origin,url,ingredients,photo_url,description,steps,cuisines,num_cuisines
str,str,str,str,str,str,list[str],str,str,list[str],list[str],i32
"""AfricanBites-e94c35b5d5d62c1da…","""English""","""638607""","""Smoked Turkey Breast""","""AfricanBites""","""https://www.africanbites.com/s…","[""4 pound turkey breast"", ""salt, to taste"", ""seasoning of choice, Creole, poultry, blackened, or Italian seasoning""]","""https://www.africanbites.com/w…","""Moist, flavorful, and perfectl…","["" – Remove turkey breast from packaging and put it on a baking sheet."", "" – Let it brine for around 3-4 hours. ("", … "" When the turkey is done, remove it from the smoker and immediately pair it with the cranberry sauce and a side of mashed potatoes.""]","[""American"", ""Southern"", ""thanksgiving""]",3
"""AllRecipes-fc4aa0807dab7430516…","""English""","""260013""","""Chef John's Sausage Rolls""","""AllRecipes""","""https://www.allrecipes.com/rec…","[""1 pound ground pork"", ""2 tablespoons finely minced onion"", … ""2 teaspoons Sesame seeds for garnish""]","""https://images.media-allrecipe…","""People often bake the sausage …","[""Place ground pork, onions, garlic, sage, bread crumbs, salt, pepper, coriander, thyme, cayenne pepper, and nutmeg in a mixing bowl. Mix with a fork until ingredients are evenly distributed, 2 to 3 minutes. Divide mixture in half and place each half on a length of plastic wrap. Use your damp fingers to shape each half into a cylinder, approximately the same length as the puff pastry. Roll up in the plastic and form into a round roll. Refrigerate until ready to use."", ""Whisk egg with water to make the egg wash."", … ""If time is tight, go ahead and just take some prepared sausage out of the casings, and use that, but by making your own, not only do you get to season it any way you want, but you also know exactly what you're eating.""]","[""Australian"", ""New"", ""Zealander""]",3
"""AfricanBites-9fbf399e09e4c6c1a…","""English""","""618164""","""Tembleque""","""AfricanBites""","""https://www.africanbites.com/t…","[""⅔ cup (130g) granulated sugar"", ""½ cup (80g) cornstarch"", … ""toasted coconut for garnish""]","""https://www.africanbites.com/w…","""An incredible coconut pudding …","[""Grease eight 4-ounce molds or one large 4-cup mold (ramekins, baking pans, custard cups, etc.) with a small spritz of baking spray or cooking oil."", ""In a large, heavy-bottomed saucepan, whisk the sugar, cornstarch, cinnamon, and salt."", … ""Garnish with ground cinnamon and toasted coconut flakes if desired.""]","[""Caribbean"", ""Puerto Rican""]",2
"""AfricanBites-7f035acc338d29038…","""English""","""567650""","""Callaloo Recipe""","""AfricanBites""","""https://www.africanbites.com/c…","[""1 1.5-pound bunch (680g) callaloo (or kale or collards)"", ""2 thick strips bacon, cut in pieces"", … ""cooking spray (or very little cooking oil)""]","""https://www.africanbites.com/w…","""A vibrant, healthy, and fresh …","[""Cut leaves and soft stems from the callaloo branches, then soak in a bowl of cold water for about 5-10 minutes or until finished with prep."", ""Proceed to slice the onions, mince the garlic, and dice the tomatoes. Set aside."", … ""Remove and serve with callaloo.""]","[""African"", ""Caribbean""]",2
"""AllRecipes-68f2730272cd2d6e336…","""English""","""45825""","""Ki See Ming""","""AllRecipes""","""https://www.allrecipes.com/rec…","[""2 tablespoons butter"", ""1 large onion, sliced"", … ""2 tablespoons soy sauce""]","""https://images.media-allrecipe…","""This is a family recipe that c…","[""Melt butter in a large heavy skillet over medium heat. Saute onion until soft and translucent, but don't brown. Stir in celery, frozen beans, and cabbage. Add ground sirloin, and cook until the meat is evenly browned. Add soup mix, enough water to barely cover and soy sauce. Reduce heat, and simmer for 15 minutes.""]","[""Australian"", ""New"", ""Zealander""]",3
…,…,…,…,…,…,…,…,…,…,…,…
"""AfricanBites-c6b0899745ba2444d…","""English""","""701065""","""Apple Pecan Salad""","""AfricanBites""","""https://www.africanbites.com/a…","[""⅓ cup (80ml) olive oil"", ""¼ cup (60ml) balsamic vinegar "", … ""¼-⅓ cup (28-40g) sliced red onions""]","""https://www.africanbites.com/w…","""This fall salad is always a hi…","[""Whisk olive oil, balsamic vinegar, minced garlic, lemon juice, honey, and Dijon mustard until fully combined. Add salt and pepper to taste."", ""Adjust ingredients to taste: if it's too tart, add more honey; if it's too sweet, add more lemon juice or vinegar. Use immediately or store in the refrigerator, covered, for up to 3 days."", … ""Right before serving, pour the balsamic vinaigrette dressing over the salad and mix to combine. Adjust dressing to preference with more or less as needed. Enjoy!""]","[""American"", ""Southern""]",2
"""AfricanBites-0c544b4d3293bb96a…","""English""","""682016""","""Buffalo Chicken Mac and Cheese…","""AfricanBites""","""https://www.africanbites.com/b…","[""2 cups cooked chicken, chopped (or rotisserie chicken)"", ""8 ounces elbow macaroni"", … ""Green onion for garnish""]","""https://www.africanbites.com/w…","""Decadent cheese sauce smotheri…","[""Preheat the oven to 375°F (195℃)."", ""Cook macaroni according to the package directions, drain, and set aside until ready."", … ""Bake at 375F°/190℃ for 15-20 minutes or until bubbly and the top is slightly golden. Garnish with green onion, serve, and enjoy.""]","[""American"", ""Southern""]",2
"""AfricanBites-a3631ee1f02005f86…","""English""","""622357""","""Steak Kabobs""","""AfricanBites""","""https://www.africanbites.com/s…","[""2 pounds (907g) beef, cut into 1-1½ inches cubes"", ""1 large white or red onion, cut into chunks"", … ""3 tablespoons (30ml) or more extra virgin olive oil (sub with vegetable oil)""]","""https://www.africanbites.com/w…","""An easy grilled steak kabob re…","[""Start by cutting your beef into cubes or chunks about 1-1½ inches squared. You do not want to cut them too thin. This might dry out the steak. Repeat this process with the vegetables; you want them to be the same size as the beef. Set aside."", ""In a medium bowl, mix garlic, cumin, thyme, oregano, nutmeg, allspice, black pepper, pepper flakes, soy sauce, and olive oil. Reserve about a tablespoon for basting."", … ""Grill steak for 4-5 minutes per side or until the desired internal temperature has been reached. Baste frequently with reserved marinade. If cooking for a crowd, it's best to grill until the center of the steak registers about 140-145℉ (60-63℃) for medium doneness. Serve hot with couscous or a salad.""]","[""Mediterranean"", ""Middle East"", ""Turkish""]",3
"""AfricanBites-7e4987e60cc4514ed…","""English""","""699155""","""Brown Stewed Fish""","""AfricanBites""","""https://www.africanbites.com/b…","[""2 whole fish, cut into pieces (I used snapper)"", ""1 teaspoon (3g) fish bouillon (or chicken bouillon)"", … ""1-2 cups (240-475ml) chicken broth (or water)""]","""https://www.africanbites.com/w…","""Savory and perfectly spiced, l…","[""Wash the fish thoroughly, drain excess water, and dry the fish with a paper towel."", ""Cut it into pieces. If using them whole, make three diagonal cuts on each side, all the way to the bone, and squeeze some lemon all over them."", … ""Turn off the stove, remove the stew, and serve it with your favorite rice recipe.""]","[""AFRICAN CARIBBEAN"", ""Jamaican""]",2


So we know that there are recipes with more than 1 cuisines, `replace` text may be overriding? Need to make sure that replacement text goes back into an array of strings

In [None]:
rename_cuisines = {
    "AFRICAN CARIBBEAN": "African-Caribbean",
    "African/Carribean": "African-Caribbean",
    "Carribean": "Caribbean",
    "Southern": "US Southern", 
    "soul": "Soul",
    "thanksgiving": "American",
    "dinner": "American",
    "New": "Fusion"
}

whole_df = whole_df.with_columns(replaced_eval=pl.col("cuisine")
                                 .list.eval(pl.element().replace(rename_cuisines))
                                 )

SchemaError: invalid series dtype: expected `List`, got `str` for series with name `cuisine`

In [None]:
with pl.Config(tbl_rows = -1):
    q = (
        whole_df.lazy()
        .group_by("mealeon_id")
        .agg(
            pl.col("replaced")
            .alias("cleaned_cuisines"),
        )
    )

    temp = q.collect()
    print(temp)

shape: (8_209, 2)
┌─────────────────────────────────┬─────────────────────────────────┐
│ mealeon_id                      ┆ cleaned_cuisines                │
│ ---                             ┆ ---                             │
│ str                             ┆ list[str]                       │
╞═════════════════════════════════╪═════════════════════════════════╡
│ AllRecipes-7fc2a39e24dcc71c251… ┆ ["Mexican"]                     │
│ AllRecipes-9a5dc3fa2511284deeb… ┆ ["Mexican"]                     │
│ AfricanBites-6be46c6fab47f2c62… ┆ ["French"]                      │
│ AllRecipes-e4cb86b8f967dfc5b6b… ┆ ["Mexican"]                     │
│ AllRecipes-942e803d60308a3dd50… ┆ ["Indian"]                      │
│ AllRecipes-c71861a1837801491fb… ┆ ["Mexican"]                     │
│ Panlasang_Pinoy-ce66d4577616c2… ┆ ["Filipino"]                    │
│ AllRecipes-bf179cdfa520616ebe7… ┆ ["Mexican"]                     │
│ Panlasang_Pinoy-220bf948eed8f6… ┆ ["Filipino"]                    │
│ 

Multiple approaches
1. I can join this dataframe with the original one (pulling from the server again) and joining the relabeled cuisine dataframe with the original one
2. Go back and do a list operation on the original data <-

In [None]:
# pull all data

query = """
    SELECT
        *
    FROM recipe_scrapes
"""

all_data_df = pl.read_database_uri(query, uri, engine="adbc")

rename_cuisines = {
    "AFRICAN CARIBBEAN": "African-Caribbean",
    "African/Carribean": "African-Caribbean",
    "Carribean": "Caribbean",
    "Southern": "US Southern", 
    "soul": "Soul",
    "thanksgiving": "American",
    "dinner": "American",
    "New": "Fusion"
}

with pl.Config(tbl_rows = -1):
    q = (
        all_data_df.lazy()
        .with_columns(
            relabeled_cuisines = pl.col("cuisines").list.eval(pl.element().replace(rename_cuisines))
        )
    )


    temp = q.collect()
    print(temp)

shape: (8_210, 12)
┌───────────┬──────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐
│ mealeon_i ┆ language ┆ source_id ┆ title     ┆ … ┆ descripti ┆ steps     ┆ cuisines  ┆ relabeled │
│ d         ┆ ---      ┆ ---       ┆ ---       ┆   ┆ on        ┆ ---       ┆ ---       ┆ _cuisines │
│ ---       ┆ str      ┆ str       ┆ str       ┆   ┆ ---       ┆ list[str] ┆ list[str] ┆ ---       │
│ str       ┆          ┆           ┆           ┆   ┆ str       ┆           ┆           ┆ list[str] │
╞═══════════╪══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡
│ AfricanBi ┆ English  ┆ 629883    ┆ Smoked    ┆ … ┆ Enjoy fal ┆ ["Remove  ┆ ["Souther ┆ ["US Sout │
│ tes-3f1a4 ┆          ┆           ┆ Spatchcoc ┆   ┆ l-off-the ┆ the       ┆ n"]       ┆ hern"]    │
│ fc7e09937 ┆          ┆           ┆ k Turkey  ┆   ┆ -bone     ┆ giblet    ┆           ┆           │
│ 5ad…      ┆          ┆           ┆           ┆   ┆ goodne…   ┆ package

In [None]:
temp.filter(
    pl.col("relabeled_cuisines").list.len() > 1
)


mealeon_id,language,source_id,title,origin,url,ingredients,photo_url,description,steps,cuisines,relabeled_cuisines
str,str,str,str,str,str,list[str],str,str,list[str],list[str],list[str]
"""AfricanBites-378e4cd9d9469919c…","""English""","""612196""","""Lemon Blueberry Scones""","""AfricanBites""","""https://www.africanbites.com/l…","[""2½ cups (312.5 g) all-purpose flour"", ""¼ cup (50 g) granulated sugar"", … ""Zest of 1 lemon""]","""https://www.africanbites.com/w…","""These Lemon Blueberry Scones p…","[""Preheat the oven to 400°F/200℃."", ""In a large mixing bowl, whisk together flour, sugar, baking powder, and salt."", … ""Add more sugar or juice if needed to your own desired consistency. Drizzle over scones.""]","[""American"", ""British""]","[""American"", ""British""]"
"""AfricanBites-91fb63d2300b34ce5…","""English""","""690586""","""Teriyaki Shrimp""","""AfricanBites""","""https://www.africanbites.com/t…","[""1 pound (450g) raw jumbo shrimp, peeled and deveined (thawed if frozen)"", ""½ teaspoon (1g) grated ginger"", … ""1 teaspoon sesame seeds ""]","""https://www.africanbites.com/w…","""Quick and healthy with the per…","[""In a small mixing bowl, combine the garlic, ginger, brown sugar, honey, white wine, soy sauce, and black pepper. Mix well until all the ingredients combine. "", ""Place shrimp in a medium bowl, add about two tablespoons of the teriyaki mixture, and toss until it completely coats all the shrimp. Let it marinate in the refrigerator for about 15 minutes. "", … ""Serve over white rice and veggies garnished with green onions and sesame seeds if desired.""]","[""Asian"", ""Japanese""]","[""Asian"", ""Japanese""]"
"""AfricanBites-92ddcff0e9b591efe…","""English""","""640851""","""Roast Beef""","""AfricanBites""","""https://www.africanbites.com/r…","[""1 3-4-pound (1.5-2k) beef eye of round roast"", ""Salt and freshly ground pepper, to taste"", … ""salt and pepper, to taste ""]","""https://www.africanbites.com/w…","""Roast beef is a classic dish t…","[""Rinse roast, pat dry beef, and then season it with salt and pepper. "", ""Mix the olive oil or butter, thyme, paprika, rosemary, minced garlic, and cayenne pepper in a small bowl. Rub it all over the beef roast, covering every inch of it with marinade. Put it in a ziplock bag or cover it in a bowl. Refrigerate overnight. "", … ""Serve with mashed potatoes or make roast beef sandwiches.""]","[""American"", ""Southern""]","[""American"", ""US Southern""]"
"""AfricanBites-5f3d2dcb8e100eb8b…","""English""","""568672""","""Yellow Rice""","""AfricanBites""","""https://www.africanbites.com/s…","[""2-3 tablespoons (28-45ml) cooking oil (or butter)"", ""1 medium onion, chopped"", … ""½ cup (80g) raisins""]","""https://www.africanbites.com/w…","""Two quick and simple to make y…","[""Heat the oil or butter in a saucepan. Then add onions, garlic, ginger, paprika, curry pepper, cayenne pepper, and white pepper, and sauté for about a minute."", ""Pour the rice into the pan and stir for about 2 minutes."", … ""Fluff with a fork, transfer the rice to a bowl, and serve.""]","[""Indian"", ""South African""]","[""Indian"", ""South African""]"
"""AfricanBites-c737b5c22edefea6a…","""English""","""700309""","""7UP Biscuits""","""AfricanBites""","""https://www.africanbites.com/7…","[""3 cups 360g) all-purpose flour (if using self-rising flour or a baking mix, see notes)"", ""1½ tablespoons (21g) baking powder"", … ""⅓ cup (76g) unsalted butter, melted""]","""https://www.africanbites.com/w…","""These biscuits are totally tas…","[""Preheat oven to 425℉ (220℃)."", ""Whisk flour, baking powder, and salt in a large bowl. Omit this step if using self-rising flour or baking mix already containing these ingredients."", … ""Remove the biscuits from the oven and let rest until they have absorbed all the butter. Enjoy!""]","[""American"", ""Southern""]","[""American"", ""US Southern""]"
…,…,…,…,…,…,…,…,…,…,…,…
"""AllRecipes-1a4a1cafe5f2f5e9ee8…","""English""","""8977""","""Keligun Chicken""","""AllRecipes""","""https://www.allrecipes.com/rec…","[""6 cut up chicken pieces"", ""1 cup soy sauce"", … ""⅛ cup chopped green onion for topping""]","""https://images.media-allrecipe…","""An island recipe from the Mari…","[""To Make Marinade: Combine the soy sauce, vinegar, garlic and ground black pepper. Mix together. Marinate chicken in refrigerator for 2 to 4 hours."", ""Remove chicken from refrigerator and dispose of marinade. Broil or barbecue chicken for 25 to 30 minutes or until cooked through and juices run clear. When chicken is cooked, remove bones and dice chicken meat."", … ""The nutrition data for this recipe includes information for the full amount of the marinade ingredients. Depending on marinating time, ingredients, cooking method, etc., the actual amount of the marinade consumed will vary.""]","[""Australian"", ""New"", ""Zealander""]","[""Australian"", ""Fusion"", ""Zealander""]"
"""AllRecipes-9f5d8a8347394ba3782…","""English""","""20952""","""Skite Cake""","""AllRecipes""","""https://www.allrecipes.com/rec…","[""½ cup butter, room temperature"", ""½ cup white sugar"", … ""1 (16 ounce) container chocolate frosting""]","""https://images.media-allrecipe…","""One of my mum's best.""","[""Preheat oven to 350 degrees F (175 degrees C). Line a rimmed baking sheet or jellyroll pan with foil."", ""Beat 1/2 cup butter and sugar with an electric mixer in until light and fluffy. The mixture should be noticeably lighter in color. Add the room-temperature egg and beat well. Sift together the flour, cocoa powder, and baking powder. Gradually stir into butter mixture until fully combined. Spread batter into prepared pan."", … ""Instead of chocolate frosting, you could glaze the cake with chocolate ganache or a pourable chocolate icing.""]","[""Australian"", ""New"", ""Zealander""]","[""Australian"", ""Fusion"", ""Zealander""]"
"""AllRecipes-467c30863817f7dd199…","""English""","""90344""","""Turkey Stir Fry with Lychees""","""AllRecipes""","""https://www.allrecipes.com/rec…","[""1 pound turkey meat, diced"", ""2 tablespoons oyster sauce, divided"", … ""1 bunch green onions, chopped""]","""https://images.media-allrecipe…","""Lychee...a typical oriental fr…","[""Place turkey in a bowl with 1 tablespoon oyster sauce, 1/2 the ginger, and 1 tablespoon Chinese cooking wine. Marinate 20 minutes."", ""Heat the oil and garlic in a wok over high heat. When garlic begins to brown, discard marinade from bowl and mix turkey into wok. Stir in remaining ginger and wine. Mix in remaining oyster sauce, lychees, chile peppers, and soy sauce. Reduce heat to low, cover, and continue cooking 5 minutes, or until turkey is done. Season with pepper, and garnish with cilantro and green onions to serve.""]","[""Australian"", ""New"", ""Zealander""]","[""Australian"", ""Fusion"", ""Zealander""]"
"""AllRecipes-d72d9be6ebb2a8f01c3…","""English""","""35062""","""Hamburger Sarah Style""","""AllRecipes""","""https://www.allrecipes.com/rec…","[""1 pound ground beef"", ""1 egg"", … ""¼ cup all-purpose flour, or as needed""]","""https://images.media-allrecipe…","""Australian Beef Hamburgers are…","[""In a medium bowl, combine the ground beef, egg, ketchup, barbeque sauce, Worcestershire sauce, and onion. If you are using the yeast extract spread, throw it in too. Mix together using your hands until well mixed. Add only as much flour as necessary to keep it from being too runny."", ""Heat a large skillet over medium heat. Form the burger mixture into 4 large patties. Place patties in the hot skillet, and fry until no longer pink, about 7 minutes per side depending on thickness. Serve on buns or with gravy."", ""These can also be made on an indoor or outdoor grill.""]","[""Australian"", ""New"", ""Zealander""]","[""Australian"", ""Fusion"", ""Zealander""]"


In [None]:
temp.filter(
    pl.col("relabeled_cuisines").list.len() == 0
)

mealeon_id,language,source_id,title,origin,url,ingredients,photo_url,description,steps,cuisines,relabeled_cuisines
str,str,str,str,str,str,list[str],str,str,list[str],list[str],list[str]


In [None]:
temp.filter(
    pl.col("cuisines").list.len() == 0
)

mealeon_id,language,source_id,title,origin,url,ingredients,photo_url,description,steps,cuisines,relabeled_cuisines
str,str,str,str,str,str,list[str],str,str,list[str],list[str],list[str]


In [None]:
temp.filter(
    pl.col("relabeled_cuisines").is_null()
)

mealeon_id,language,source_id,title,origin,url,ingredients,photo_url,description,steps,cuisines,relabeled_cuisines
str,str,str,str,str,str,list[str],str,str,list[str],list[str],list[str]
"""AllRecipes-e556569d6282f10623b…","""English""","""222014""","""""Pantry Raid"" Chicken Enchilad…","""AllRecipes""","""https://www.allrecipes.com/rec…","[""1 (15 ounce) can tomato sauce¼ cup water1 envelope taco seasoning mix1 ½ tablespoons chili powder1 tablespoon vegetable oil1 pound chicken breast tenderloins1 (15 ounce) can black beans, drained¼ cup cream cheese1 cup shredded Mexican-style cheese blend, or more to taste1 (7.5 ounce) package corn bread mix1 egg⅓ cup milk""]","""https://imagesvc.meredithcorp.…","""I made this recipe up one nigh…","[""Preheat the oven to 375 degrees F (190 degrees C). Grease a 9x9-inch baking dish.Mix tomato sauce, water, taco seasoning mix, and chili powder together in a saucepan; bring to a simmer over medium heat.Heat vegetable oil in a skillet over medium heat and brown chicken tenderloins on both sides, about 5 minutes per side. Pour tomato sauce mixture over the chicken, bring to a simmer, and cook over medium-low heat until chicken tenderloins are no longer pink inside, about 8 minutes. Transfer chicken to a bowl and shred; return shredded chicken to the sauce. Mix in black beans and cream cheese until thoroughly combined.Pour chicken mixture into prepared baking dish. Top with shredded Mexican cheese. Whisk corn bread mix, egg, and milk in a bowl, and spoon the batter over the chicken mix.Bake in the preheated oven until the casserole is bubbling and the corn bread topping is browned and set, about 30 minutes.""]",,


We have successfully cleaned up the cuisine labels, should now migrate these recipes into Vespa to create embeddings

In [None]:
# convert postgres records into Vespa format
# example structure of a record in vespa format
# vespa_record = {
#                 "put": f"id:{doc_type}:{doc_type}::{['recipe']['source']}-{record['id']}",
#                 "fields": {
#                     "origin": "epicurious", # replace with ['recipe']['source']
#                     "id": "",
#                     "title": "",
#                     "ingredients": "",
#                     "steps": "",
#                     "cuisine": "",
#                 },
#             }

{"put": f"id:{doc_type}:{doc_type}::{record[mealeon_id]}",
 "fields": {
    "origin": record[origin], # not sure if needed, can just join with postgres
    "id": record[source_id], # not sure if needed, can just join with postgres or use mealeon_id for join
    "title": record[title], 
    "ingredients": record[ingredients],
    "steps": record[steps],
    "description": record[description],
    "cuisines": record[relabeled_cuisine]
    }
}

In [None]:
# create sqlalchemy engine and session to import data
engine = create_engine(
    f"postgresql+psycopg://{user}:{password}@{host}/mealeon"
)

Session = sessionmaker(engine)

In [None]:
# load sqlalchemy MeaLeon into dataframe to do EDA and cleaning
# try it with 
with Session.begin() as session:
    pl.read_database(session)

In [None]:
# load Edamam credentials from secrets file
with open("../secrets/edamam.json","r") as f:
    cred = json.load(f)
    
app_id = cred["id"]
app_id_s = f"&app_id={app_id}"

app_key = cred["key"]
app_key_s = f"&app_key={app_key}"

In [None]:
# test recipe to look for
dish_name = "buffalo%20wings"
cuisine_name = "American"

In [None]:
now = datetime.now()
dt_string = now.strftime("%d_%m_%Y_%H_%M_%S")

# lay out the query base
# api_base = "https://api.edamam.com/search?"

# v2 of edamam
api_base = "https://api.edamam.com/api/recipes/v2?type=public&beta=false"

# Edamam query
# what is the better way to handle this API ID and Key
q = f"{api_base}&q={dish_name}{app_id_s}{app_key_s}"
print(q)

api_call = api_base + f"&q={dish_name}" + app_id_s + app_key_s
print(api_call)


https://api.edamam.com/api/recipes/v2?type=public&beta=false&q=buffalo%20wings&app_id=abaa034e&app_key=251fdd6b808c90fa3b5863d9943692e5
https://api.edamam.com/api/recipes/v2?type=public&beta=false&q=buffalo%20wings&app_id=abaa034e&app_key=251fdd6b808c90fa3b5863d9943692e5


In [None]:
working = 'https://api.edamam.com/api/recipes/v2?type=public&beta=false&q=buffalo%20wings&app_id=abaa034e&app_key=251fdd6b808c90fa3b5863d9943692e5'

In [None]:
q == working

True

In [None]:
resp = requests.get(q)
resp

<Response [200]>

In [None]:
resp_json = resp.json()
resp_json

{'from': 1,
 'to': 20,
 'count': 989,
 '_links': {'next': {'href': 'https://api.edamam.com/api/recipes/v2?q=buffalo%20wings&app_key=251fdd6b808c90fa3b5863d9943692e5&_cont=CHcVQBtNNQphDmgVQntAEX4BYEtxBQcARGxIBmEaY1x1BwoVX3dBUmIVZlNxDQBSETNAAmEQMVd0VQoCEWETBTAbYQZzVhFqX3cWQT1OcV9xBE4%3D&type=public&app_id=abaa034e&beta=false',
   'title': 'Next page'}},
 'hits': [{'recipe': {'uri': 'http://www.edamam.com/ontologies/edamam.owl#recipe_e2f7014774924ac19ed65b31dfe33667',
    'label': 'Vegan Cauliflower Buffalo Wings',
    'image': 'https://edamam-product-images.s3.amazonaws.com/web-img/2a9/2a9ce33a06ea51848de68717b7d1f578.png?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEEAaCXVzLWVhc3QtMSJIMEYCIQCTE2CscCjxtL%2FiaFfXc5ndlQeB1p0n5ZUvp%2Bl6tetdSAIhAMF7VLKAzFzsNI7DCQVj2r5ASX0tHWYiKyanpz6HGWrwKsIFCMn%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQABoMMTg3MDE3MTUwOTg2IgxLiUo%2FVIaJyjdeuKoqlgWbRTYLPl%2BqHABqwizKsNXUaDqbcznXEFzbtroEcn0PGtmKF3MxRBNslJfQctO0fFd%2BSmCWpwlf%2Bpw%2FkTcFvsHjYkFwz%2FRuGPsDuNZXZGQg2457ARvXIWRb5gK

In [None]:
resp_json['hits'][0]

{'recipe': {'uri': 'http://www.edamam.com/ontologies/edamam.owl#recipe_e2f7014774924ac19ed65b31dfe33667',
  'label': 'Vegan Cauliflower Buffalo Wings',
  'image': 'https://edamam-product-images.s3.amazonaws.com/web-img/2a9/2a9ce33a06ea51848de68717b7d1f578.png?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEEAaCXVzLWVhc3QtMSJIMEYCIQCTE2CscCjxtL%2FiaFfXc5ndlQeB1p0n5ZUvp%2Bl6tetdSAIhAMF7VLKAzFzsNI7DCQVj2r5ASX0tHWYiKyanpz6HGWrwKsIFCMn%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQABoMMTg3MDE3MTUwOTg2IgxLiUo%2FVIaJyjdeuKoqlgWbRTYLPl%2BqHABqwizKsNXUaDqbcznXEFzbtroEcn0PGtmKF3MxRBNslJfQctO0fFd%2BSmCWpwlf%2Bpw%2FkTcFvsHjYkFwz%2FRuGPsDuNZXZGQg2457ARvXIWRb5gKL5sOJEHlk3zXG72rk%2B5MbHvxHTeIEDkmnlbngwotwZqU8hD6sEucThJbeYrf7YH2MiAAjexdBYqhELhWbIB44mnytPNK7Dhk9UsXPqlhHz8RN%2FbeSPjhPlWP0%2F1sauytFd8sBQULWOB5%2FoDHh6Il%2FN1dzhP7RJcVSJIn7utq90JrUcvR8bdRr4JBGD32TQ8uddcItFSBOhVBCctHRnN29qqeBOGOJs08zkvpCxVUeLOQiFCP7KxeHtCY%2FHR7V1kSRQmwsXMOYZrnKbmkLh8cnobGuhPO93W1we9mC6X%2BX%2BIIJG8sgZ8kNubvOJxDqnp3RX65rgSH5kHYIEbYEEBnkvpwiok5NKNK

In [None]:
for i in range(0,10):
    print(resp_json['hits'][i]['recipe']['label'])

Vegan Cauliflower Buffalo Wings
Buffalo Wings Recipe | Grilling
Buffalo Wings
Basic Buffalo Wings Recipe
Basically Buffalo Wings
Extraordinary Everyday Baked Buffalo Wings
Next level buffalo wings
Air-Fryer Buffalo Wings
Buffalo Wings
Buffalo Wings


In [None]:
for i in range(0,10):
    print(resp_json['hits'][i]['recipe']['source'])

Food52
Serious Eats
Simply Recipes
Food Republic
Bon Appetit
Chez Us
BBC Good Food
EatingWell
Delish
The Daily Meal


In [None]:
for i in range(0,10):
    print(resp_json['hits'][i]['recipe']['url'])

https://food52.com/recipes/54991-vegan-cauliflower-buffalo-wings
https://www.seriouseats.com/grilling-buffalo-wings-recipe
http://simplyrecipes.com/recipes/buffalo_wings/
http://www.foodrepublic.com/2011/09/12/basic-buffalo-wings-recipe
https://www.bonappetit.com/recipe/basically-buffalo-wings
http://chezus.com/2013/01/28/baked-buffalo-wings/
https://www.bbcgoodfood.com/recipes/next-level-buffalo-wings
https://www.eatingwell.com/recipe/274220/air-fryer-buffalo-wings
https://www.delish.com/cooking/recipe-ideas/a51133/classic-buffalo-wings-recipe
http://www.thedailymeal.com/buffalo-wings-recipe


In [None]:
for i in range(0,10):
    print(resp_json['hits'][i]['recipe']['healthLabels'])

['Mediterranean', 'Dairy-Free', 'Gluten-Free', 'Wheat-Free', 'Egg-Free', 'Peanut-Free', 'Tree-Nut-Free', 'Soy-Free', 'Fish-Free', 'Shellfish-Free', 'Pork-Free', 'Crustacean-Free', 'Celery-Free', 'Mustard-Free', 'Sesame-Free', 'Lupine-Free', 'Mollusk-Free', 'Alcohol-Free', 'No oil added', 'Sulfite-Free', 'Kosher']
['Gluten-Free', 'Wheat-Free', 'Egg-Free', 'Peanut-Free', 'Tree-Nut-Free', 'Soy-Free', 'Fish-Free', 'Shellfish-Free', 'Crustacean-Free', 'Celery-Free', 'Mustard-Free', 'Sesame-Free', 'Lupine-Free', 'Mollusk-Free', 'Alcohol-Free']
['Sugar-Conscious', 'Kidney-Friendly', 'Keto-Friendly', 'Gluten-Free', 'Wheat-Free', 'Peanut-Free', 'Tree-Nut-Free', 'Soy-Free', 'Fish-Free', 'Shellfish-Free', 'Crustacean-Free', 'Sesame-Free', 'Lupine-Free', 'Mollusk-Free', 'Alcohol-Free', 'Sulfite-Free']
['Sugar-Conscious', 'Low Potassium', 'Kidney-Friendly', 'Egg-Free', 'Peanut-Free', 'Tree-Nut-Free', 'Soy-Free', 'Fish-Free', 'Shellfish-Free', 'Crustacean-Free', 'Celery-Free', 'Mustard-Free', 'Sesam

In [None]:
for i in range(0,10):
    print(resp_json['hits'][i]['recipe']['ingredientLines'])

['1 small cauliflower', '1/2 cup chickpea flour', '3 tablespoons rice flour', '1/2 teaspoon red chili powder', '2/3 cup buffalo wing hot sauce', '1-2 scallions, sliced']
['3 pounds chicken wings (18 wings), cut up', '1 teaspoon cayenne pepper', '1 teaspoon ground black pepper', '1 teaspoon kosher salt', 'For the Sauce', '4 tablespoons unsalted butter', "1/2 cup hot sauce, preferably Frank's Louisiana Hot Sauce", '2 tablespoons Tabasco sauce or other hot sauce', '1 tablespoon dark brown sugar', '1 tablespoon honey', '2 teaspoons cider vinegar', '1/4 teaspoon cayenne pepper, plus more to taste']
['2 lbs chicken wings (about 12 wings)', '3 Tbsp butter, melted', "4 Tbsp bottled hot pepper sauce (like Crystal or Frank's Original)", '1 Tbsp paprika', '1/2 teaspoon salt', '1/2 teaspoon cayenne pepper', '1/4 teaspoon black pepper', 'Celery sticks (optional)', '1/2 cup sour cream', '1/2 cup crumbled blue cheese', '1/2 cup mayonnaise', '1 Tbsp white wine vinegar or white vinegar', '1 clove garli

In [None]:
for i in range(0,10):
    print([s.title() for s in resp_json['hits'][i]['recipe']['cuisineType']])

['American']
['American']
['American']
['American']
['American']
['American']
['American']
['American']
['American']
['American']


In [None]:
# find a hashing algorithm to create {source}-{hashed-url}
# remove https:// and www. if present, necessary?
# taken from keraion
from hashlib import sha256

def unique_name_from_str(string: str) -> str:
    """
    Generates a unique id name
    """
    return sha256(string.encode("utf8")).hexdigest()

In [None]:
# this gives ['fields']['id']
for i in range(0,10):
    print(f"{resp_json['hits'][i]['recipe']['source'].replace(' ', '_')}-{unique_name_from_str(resp_json['hits'][i]['recipe']['url'])}")

Food52-7600a8c806c686a1216aa850cde99e457605d15276498a0bf734e72c8d0982d3
Serious_Eats-8fefce8fc109ffc82e2607819afa7a0e26f590f3af04cb8219aaff9fada9fd0c
Simply_Recipes-6602b9373086110f05cf4db003389ef41a89f21ac65162272bc52ba09efc862f
Food_Republic-4a73cb9988dd6c8341f8fa8ec5c65cd221218085ec8409545722cc2ec3fb96ea
Bon_Appetit-316182ccfad731f0376900f95d966ada60134c52e48c140b458430f421f477a5
Chez_Us-acbd7407da22d78169f8475cf2e2be1d2d1a66e86551b0ad6b5054acfd8fe3c6
BBC_Good_Food-bb68506a8438676d2068881d4769529fa395ebf6a162a335385a93494ca1a086
EatingWell-6f957796baf4ff81061808502ceba6e9c705e6db3be4efdafc6428fd0f3ab8c7
Delish-679717846737446ea95da3489fcb16c3767235cfd74e4f19aa942dcf9a0d1321
The_Daily_Meal-c8083fefbbf3daba9b055a012d2bad3de4df338535aae1d1cc371001486e56ff


In [None]:
# example structure of a record in vespa format
# vespa_record = {
#                 "put": f"id:{doc_type}:{doc_type}::{['recipe']['source']}-{record['id']}",
#                 "fields": {
#                     "origin": "epicurious", # replace with ['recipe']['source']
#                     "id": "",
#                     "title": "",
#                     "ingredients": "",
#                     "steps": "",
#                     "cuisine": "",
#                 },
#             }

import unicodedata

vespa_edamam = []

for i in range(0,10):
    origin = resp_json['hits'][i]['recipe']['source'].replace(' ', '_')
    rec_id = unique_name_from_str(resp_json['hits'][i]['recipe']['url'])
    rec_title = resp_json['hits'][i]['recipe']['label']
    
    rec_ingreds = [ingred.lstrip("*- ") for ingred in "".join(
                cha if unicodedata.category(cha)[0] != "C" else " "
                for sen in resp_json['hits'][i]['recipe']['ingredientLines']
                for cha in sen + "|"
            )
            .rstrip("|")
            .split("|")
            ]
    
    rec_cuisines = [s.title() for s in resp_json['hits'][i]['recipe']['cuisineType']]


    vespa_record = {
                "put": f"id:mealeon:mealeon::{origin}-{rec_id}",
                "fields": {
                    "origin": origin,
                    "id": rec_id,
                    "title": rec_title,
                    "ingredients": rec_ingreds,
                    "steps": "",
                    "cuisine": rec_cuisines,
                },
            }
    
    vespa_edamam.append(vespa_record)

print(vespa_edamam)

[{'put': 'id:mealeon:mealeon::Food52-7600a8c806c686a1216aa850cde99e457605d15276498a0bf734e72c8d0982d3', 'fields': {'origin': 'Food52', 'id': '7600a8c806c686a1216aa850cde99e457605d15276498a0bf734e72c8d0982d3', 'title': 'Vegan Cauliflower Buffalo Wings', 'ingredients': ['1 small cauliflower', '1/2 cup chickpea flour', '3 tablespoons rice flour', '1/2 teaspoon red chili powder', '2/3 cup buffalo wing hot sauce', '1-2 scallions, sliced'], 'steps': '', 'cuisine': ['American']}}, {'put': 'id:mealeon:mealeon::Serious_Eats-8fefce8fc109ffc82e2607819afa7a0e26f590f3af04cb8219aaff9fada9fd0c', 'fields': {'origin': 'Serious_Eats', 'id': '8fefce8fc109ffc82e2607819afa7a0e26f590f3af04cb8219aaff9fada9fd0c', 'title': 'Buffalo Wings Recipe | Grilling', 'ingredients': ['3 pounds chicken wings (18 wings), cut up', '1 teaspoon cayenne pepper', '1 teaspoon ground black pepper', '1 teaspoon kosher salt', 'For the Sauce', '4 tablespoons unsalted butter', "1/2 cup hot sauce, preferably Frank's Louisiana Hot Sauc

### Try mixing in PyVespa
Following documentation [here](https://pyvespa.readthedocs.io/en/latest/getting-started-pyvespa.html)

In [None]:
from vespa.package import (
    ApplicationPackage,
    Field,
    Schema,
    Document,
    HNSW,
    RankProfile,
    Component,
    Parameter,
    FieldSet,
    GlobalPhaseRanking,
    Function,
    DocumentSummary,
    Summary
)

package = ApplicationPackage(
    name="mealeon",
    schema=[
        Schema(
            name="mealeon",
            document=Document(
                fields=[
                    Field(
                        name="language", 
                        type="string", 
                        indexing=["summary", "index", "set_language"],
                        match=["word"]
                    ),
                    Field(
                        name="id",
                        type="string",
                        indexing=["attribute", "summary"],
                        match=["word"],
                        bolding=True,
                    ),
                    Field(
                        name="title",
                        type="string",
                        indexing=["index", "summary"],
                        index="enable-bm25",
                    ), 
                    Field(
                        name="origin",
                        type="string",
                        indexing=["attribute", "summary"],
                        match=["word"],
                    ),                 
                    Field(
                        name="ingredients",
                        type="array<string>",
                        indexing=["index"],
                        index="enable-bm25",
                        match=["word"],
                    ),
                    Field(
                        name="steps",
                        type="array<string>",
                        indexing=["index"],
                        index="enable-bm25"
                    ),
                    Field(
                        name="cuisine",
                        type="array<string>",
                        indexing=["index", "summary"],
                        index="enable-bm25",
                        match=["text"],
                    ),
                    # Field(
                    #     name="embedding",
                    #     type="tensor<float>(x[384])",
                    #     indexing=[
                    #         'input title . " " . input body',
                    #         "embed",
                    #         "index",
                    #         "attribute",
                    #     ],
                    #     ann=HNSW(distance_metric="angular"),
                    #     is_document_field=False,
                    # ),
                ]
            ),
            fieldsets=[
                FieldSet(
                    name="default", 
                    fields=["title", "ingredients"]
                )
            ],
            document_summaries=[
                    DocumentSummary(
                    name="document-summary",
                    summary_fields=[
                        Summary("id")
                    ]
                ),
            ],
            rank_profiles=[
                RankProfile(
                    name="default",
                    first_phase="nativeRank(title, ingredients)"
                ),
                RankProfile(
                    name="bm25",
                    inherits="default",
                    first_phase="bm25(title) + bm25(ingredients)",
                    # inputs=[("query(q)", "tensor<float>(x[384])")],
                    functions=[
                        Function(name="bm25sum", expression="bm25(title) + bm25(ingredients)")
                    ],
                ),
                RankProfile(
                    name="combined", 
                    inherits="default",
                    first_phase="bm25(title) + bm25(ingredients) + nativeRank(title) + nativeRank(ingredients)",
                    functions=[
                        Function(name="bm25nativeRank",
                                 expression="bm25(title) + bm25(ingredients) + nativeRank(title) + nativeRank(ingredients)")
                    ]
                )
                # RankProfile(
                #     name="semantic",
                #     inputs=[("query(q)", "tensor<float>(x[384])")],
                #     first_phase="closeness(field, embedding)",
                # ),
                # RankProfile(
                #     name="fusion",
                #     inherits="bm25",
                #     inputs=[("query(q)", "tensor<float>(x[384])")],
                #     first_phase="closeness(field, embedding)",
                #     global_phase=GlobalPhaseRanking(
                #         expression="reciprocal_rank_fusion(bm25sum, closeness(field, embedding))",
                #         rerank_count=1000,
                #     ),
                # ),
            ],
        )
    ],
    # components=[
    #     Component(
    #         id="e5",
    #         type="hugging-face-embedder",
    #         parameters=[
    #             Parameter(
    #                 "transformer-model",
    #                 {
    #                     "url": "https://github.com/vespa-engine/sample-apps/raw/master/simple-semantic-search/model/e5-small-v2-int8.onnx"
    #                 },
    #             ),
    #             Parameter(
    #                 "tokenizer-model",
    #                 {
    #                     "url": "https://raw.githubusercontent.com/vespa-engine/sample-apps/master/simple-semantic-search/model/tokenizer.json"
    #                 },
    #             ),
    #         ],
    #     )
    # ],
)

In [None]:
# try mixing in PyVespa

from vespa.deployment import VespaDocker

vespa_docker = VespaDocker(port=8181,
                           cfgsrv_port=19081)
app = vespa_docker.deploy(application_package=package)


Waiting for configuration server, 0/300 seconds...
Waiting for configuration server, 5/300 seconds...
Waiting for configuration server, 10/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 0/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 5/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 10/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 15/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 20/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 25/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Application is up!
Finished de

In [None]:
!vespa feed ../data/processed/mealeon_vespa.json --target http://localhost:8181

{
  "feeder.operation.count": 34756,
  "feeder.seconds": 43.816,
  "feeder.ok.count": 34756,
  "feeder.ok.rate": 793.220,
  "feeder.error.count": 0,
  "feeder.inflight.count": 0,
  "http.request.count": 34756,
  "http.request.bytes": 26112962,
  "http.request.MBps": 0.596,
  "http.exception.count": 0,
  "http.response.count": 34756,
  "http.response.bytes": 5074376,
  "http.response.MBps": 0.116,
  "http.response.error.count": 0,
  "http.response.latency.millis.min": 11,
  "http.response.latency.millis.avg": 69,
  "http.response.latency.millis.max": 576,
  "http.response.code.counts": {
    "200": 34756
  }
}


In [None]:
from vespa.io import VespaResponse, VespaQueryResponse

# query should be recipe name?
    # WHERE title !contains {query}
# cuisine name should be in the WHERE filter clause of YQL
    # AND WHERE cuisine NOT IN {cuisines}
# how to penalize similar title?

# start with plain keyword search

with app.syncio(connections=1) as session:
    query = "Buffalo Wings"
    response: VespaQueryResponse = session.query(
        yql=f"select * from sources mealeon where (title contains '{query}') limit 5",
        query=query,
        ranking="bm25"
        # body={"input.query(q)": f"embed({query})"},
    )
    assert response.is_successful()

In [None]:
print(response.hits)

[{'id': 'id:mealeon:mealeon::epicurious-54a4697e6529d92b2c0279d3', 'relevance': 9.38931639154318, 'source': 'mealeon_content', 'fields': {'sddocname': 'mealeon', 'id': 'epicurious-54a4697e6529d92b2c0279d3', 'documentid': 'id:mealeon:mealeon::epicurious-54a4697e6529d92b2c0279d3', 'title': 'Buffalo Wings', 'origin': 'epicurious', 'cuisine': ['Missing Cuisine']}}, {'id': 'id:mealeon:mealeon::epicurious-54a42af319925f464b37f2a5', 'relevance': 8.446862631228765, 'source': 'mealeon_content', 'fields': {'sddocname': 'mealeon', 'id': 'epicurious-54a42af319925f464b37f2a5', 'documentid': 'id:mealeon:mealeon::epicurious-54a42af319925f464b37f2a5', 'title': 'Korean Buffalo Wings', 'origin': 'epicurious', 'cuisine': ['Missing Cuisine']}}]


In [None]:
next_resp_json = next_resp.json()
next_resp_json

NameError: name 'next_resp' is not defined

In [None]:
# actual results
results = resp_json['hits']
results

In [None]:
# | hide
nbdev.nbdev_export()