# | default_exp Testing Edamam API request querying Vespa-friendly Epicurious

In [None]:
# | hide
from datetime import datetime
from enum import Enum
import json
from pydantic import BaseModel
# import vespa
import requests

Want to parse the edamam return
1. get results 1-10
   1. 'hits'[0:9]
2. for each result, pull
   1. 'recipe'['label'] should give title
   2. 'recipe'['source'] gives origin
   3. 'recipe'['url'] gives link
   4. 'recipe'['cautions'] for allergies?
      1. This seems insufficient, can parse ['ingredients']['foodCategory'] for each ingredient, but it seems not sufficient (ie, too general for gluten)
      2. ['recipe']['healthLabels'] is better, but it notes the inverse. Instead of saying what allergens the recipe has, it says whether the recipe is allergen-free
         1. Is dependent on source labeling correctly, but that's fine, we aren't using Edamam to scrape
   5. 'recipe'['ingredientLines'] for array<ingredients>
   6. 'recipe'['cuisineType'] for array<cuisines>
   7. does not have steps
3. Can use URL to follow up with scraper?
   1. Get origin's recipe ID
      1. Could just use the source and hashed URL with a hash to create a unique ID
   2. Get steps   
4. Could pull image for image to image comparison

In [None]:
# load Edamam credentials from secrets file
with open("../secrets/edamam.json","r") as f:
    cred = json.load(f)
    
app_id = cred["id"]
app_id_s = f"&app_id={app_id}"

app_key = cred["key"]
app_key_s = f"&app_key={app_key}"

In [None]:
# test recipe to look for
dish_name = "buffalo%20wings"
cuisine_name = "American"

In [None]:
now = datetime.now()
dt_string = now.strftime("%d_%m_%Y_%H_%M_%S")

# lay out the query base
# api_base = "https://api.edamam.com/search?"

# v2 of edamam
api_base = "https://api.edamam.com/api/recipes/v2?type=public&beta=false"

# Edamam query
# what is the better way to handle this API ID and Key
q = f"{api_base}&q={dish_name}{app_id_s}{app_key_s}"
print(q)

api_call = api_base + f"&q={dish_name}" + app_id_s + app_key_s
print(api_call)


https://api.edamam.com/api/recipes/v2?type=public&beta=false&q=buffalo%20wings&app_id=abaa034e&app_key=251fdd6b808c90fa3b5863d9943692e5
https://api.edamam.com/api/recipes/v2?type=public&beta=false&q=buffalo%20wings&app_id=abaa034e&app_key=251fdd6b808c90fa3b5863d9943692e5


In [None]:
working = 'https://api.edamam.com/api/recipes/v2?type=public&beta=false&q=buffalo%20wings&app_id=abaa034e&app_key=251fdd6b808c90fa3b5863d9943692e5'

In [None]:
q == working

True

In [None]:
resp = requests.get(q)
resp

<Response [200]>

In [None]:
resp_json = resp.json()
resp_json

{'from': 1,
 'to': 20,
 'count': 989,
 '_links': {'next': {'href': 'https://api.edamam.com/api/recipes/v2?q=buffalo%20wings&app_key=251fdd6b808c90fa3b5863d9943692e5&_cont=CHcVQBtNNQphDmgVQntAEX4BYEtxBQcARGxIBmEaY1x1BwoVX3dBUmIVZlNxDQBSETNAAmEQMVd0VQoCEWETBTAbYQZzVhFqX3cWQT1OcV9xBE4%3D&type=public&app_id=abaa034e&beta=false',
   'title': 'Next page'}},
 'hits': [{'recipe': {'uri': 'http://www.edamam.com/ontologies/edamam.owl#recipe_e2f7014774924ac19ed65b31dfe33667',
    'label': 'Vegan Cauliflower Buffalo Wings',
    'image': 'https://edamam-product-images.s3.amazonaws.com/web-img/2a9/2a9ce33a06ea51848de68717b7d1f578.png?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEEAaCXVzLWVhc3QtMSJIMEYCIQCTE2CscCjxtL%2FiaFfXc5ndlQeB1p0n5ZUvp%2Bl6tetdSAIhAMF7VLKAzFzsNI7DCQVj2r5ASX0tHWYiKyanpz6HGWrwKsIFCMn%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQABoMMTg3MDE3MTUwOTg2IgxLiUo%2FVIaJyjdeuKoqlgWbRTYLPl%2BqHABqwizKsNXUaDqbcznXEFzbtroEcn0PGtmKF3MxRBNslJfQctO0fFd%2BSmCWpwlf%2Bpw%2FkTcFvsHjYkFwz%2FRuGPsDuNZXZGQg2457ARvXIWRb5gK

In [None]:
resp_json['hits'][0]

{'recipe': {'uri': 'http://www.edamam.com/ontologies/edamam.owl#recipe_e2f7014774924ac19ed65b31dfe33667',
  'label': 'Vegan Cauliflower Buffalo Wings',
  'image': 'https://edamam-product-images.s3.amazonaws.com/web-img/2a9/2a9ce33a06ea51848de68717b7d1f578.png?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEEAaCXVzLWVhc3QtMSJIMEYCIQCTE2CscCjxtL%2FiaFfXc5ndlQeB1p0n5ZUvp%2Bl6tetdSAIhAMF7VLKAzFzsNI7DCQVj2r5ASX0tHWYiKyanpz6HGWrwKsIFCMn%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQABoMMTg3MDE3MTUwOTg2IgxLiUo%2FVIaJyjdeuKoqlgWbRTYLPl%2BqHABqwizKsNXUaDqbcznXEFzbtroEcn0PGtmKF3MxRBNslJfQctO0fFd%2BSmCWpwlf%2Bpw%2FkTcFvsHjYkFwz%2FRuGPsDuNZXZGQg2457ARvXIWRb5gKL5sOJEHlk3zXG72rk%2B5MbHvxHTeIEDkmnlbngwotwZqU8hD6sEucThJbeYrf7YH2MiAAjexdBYqhELhWbIB44mnytPNK7Dhk9UsXPqlhHz8RN%2FbeSPjhPlWP0%2F1sauytFd8sBQULWOB5%2FoDHh6Il%2FN1dzhP7RJcVSJIn7utq90JrUcvR8bdRr4JBGD32TQ8uddcItFSBOhVBCctHRnN29qqeBOGOJs08zkvpCxVUeLOQiFCP7KxeHtCY%2FHR7V1kSRQmwsXMOYZrnKbmkLh8cnobGuhPO93W1we9mC6X%2BX%2BIIJG8sgZ8kNubvOJxDqnp3RX65rgSH5kHYIEbYEEBnkvpwiok5NKNK

In [None]:
for i in range(0,10):
    print(resp_json['hits'][i]['recipe']['label'])

Vegan Cauliflower Buffalo Wings
Buffalo Wings Recipe | Grilling
Buffalo Wings
Basic Buffalo Wings Recipe
Basically Buffalo Wings
Extraordinary Everyday Baked Buffalo Wings
Next level buffalo wings
Air-Fryer Buffalo Wings
Buffalo Wings
Buffalo Wings


In [None]:
for i in range(0,10):
    print(resp_json['hits'][i]['recipe']['source'])

Food52
Serious Eats
Simply Recipes
Food Republic
Bon Appetit
Chez Us
BBC Good Food
EatingWell
Delish
The Daily Meal


In [None]:
for i in range(0,10):
    print(resp_json['hits'][i]['recipe']['url'])

https://food52.com/recipes/54991-vegan-cauliflower-buffalo-wings
https://www.seriouseats.com/grilling-buffalo-wings-recipe
http://simplyrecipes.com/recipes/buffalo_wings/
http://www.foodrepublic.com/2011/09/12/basic-buffalo-wings-recipe
https://www.bonappetit.com/recipe/basically-buffalo-wings
http://chezus.com/2013/01/28/baked-buffalo-wings/
https://www.bbcgoodfood.com/recipes/next-level-buffalo-wings
https://www.eatingwell.com/recipe/274220/air-fryer-buffalo-wings
https://www.delish.com/cooking/recipe-ideas/a51133/classic-buffalo-wings-recipe
http://www.thedailymeal.com/buffalo-wings-recipe


In [None]:
for i in range(0,10):
    print(resp_json['hits'][i]['recipe']['healthLabels'])

['Mediterranean', 'Dairy-Free', 'Gluten-Free', 'Wheat-Free', 'Egg-Free', 'Peanut-Free', 'Tree-Nut-Free', 'Soy-Free', 'Fish-Free', 'Shellfish-Free', 'Pork-Free', 'Crustacean-Free', 'Celery-Free', 'Mustard-Free', 'Sesame-Free', 'Lupine-Free', 'Mollusk-Free', 'Alcohol-Free', 'No oil added', 'Sulfite-Free', 'Kosher']
['Gluten-Free', 'Wheat-Free', 'Egg-Free', 'Peanut-Free', 'Tree-Nut-Free', 'Soy-Free', 'Fish-Free', 'Shellfish-Free', 'Crustacean-Free', 'Celery-Free', 'Mustard-Free', 'Sesame-Free', 'Lupine-Free', 'Mollusk-Free', 'Alcohol-Free']
['Sugar-Conscious', 'Kidney-Friendly', 'Keto-Friendly', 'Gluten-Free', 'Wheat-Free', 'Peanut-Free', 'Tree-Nut-Free', 'Soy-Free', 'Fish-Free', 'Shellfish-Free', 'Crustacean-Free', 'Sesame-Free', 'Lupine-Free', 'Mollusk-Free', 'Alcohol-Free', 'Sulfite-Free']
['Sugar-Conscious', 'Low Potassium', 'Kidney-Friendly', 'Egg-Free', 'Peanut-Free', 'Tree-Nut-Free', 'Soy-Free', 'Fish-Free', 'Shellfish-Free', 'Crustacean-Free', 'Celery-Free', 'Mustard-Free', 'Sesam

In [None]:
for i in range(0,10):
    print(resp_json['hits'][i]['recipe']['ingredientLines'])

['1 small cauliflower', '1/2 cup chickpea flour', '3 tablespoons rice flour', '1/2 teaspoon red chili powder', '2/3 cup buffalo wing hot sauce', '1-2 scallions, sliced']
['3 pounds chicken wings (18 wings), cut up', '1 teaspoon cayenne pepper', '1 teaspoon ground black pepper', '1 teaspoon kosher salt', 'For the Sauce', '4 tablespoons unsalted butter', "1/2 cup hot sauce, preferably Frank's Louisiana Hot Sauce", '2 tablespoons Tabasco sauce or other hot sauce', '1 tablespoon dark brown sugar', '1 tablespoon honey', '2 teaspoons cider vinegar', '1/4 teaspoon cayenne pepper, plus more to taste']
['2 lbs chicken wings (about 12 wings)', '3 Tbsp butter, melted', "4 Tbsp bottled hot pepper sauce (like Crystal or Frank's Original)", '1 Tbsp paprika', '1/2 teaspoon salt', '1/2 teaspoon cayenne pepper', '1/4 teaspoon black pepper', 'Celery sticks (optional)', '1/2 cup sour cream', '1/2 cup crumbled blue cheese', '1/2 cup mayonnaise', '1 Tbsp white wine vinegar or white vinegar', '1 clove garli

In [None]:
for i in range(0,10):
    print([s.title() for s in resp_json['hits'][i]['recipe']['cuisineType']])

['American']
['American']
['American']
['American']
['American']
['American']
['American']
['American']
['American']
['American']


In [None]:
# find a hashing algorithm to create {source}-{hashed-url}
# remove https:// and www. if present, necessary?
# taken from keraion
from hashlib import sha256

def unique_name_from_str(string: str) -> str:
    """
    Generates a unique id name
    """
    return sha256(string.encode("utf8")).hexdigest()

In [None]:
# this gives ['fields']['id']
for i in range(0,10):
    print(f"{resp_json['hits'][i]['recipe']['source'].replace(' ', '_')}-{unique_name_from_str(resp_json['hits'][i]['recipe']['url'])}")

Food52-7600a8c806c686a1216aa850cde99e457605d15276498a0bf734e72c8d0982d3
Serious_Eats-8fefce8fc109ffc82e2607819afa7a0e26f590f3af04cb8219aaff9fada9fd0c
Simply_Recipes-6602b9373086110f05cf4db003389ef41a89f21ac65162272bc52ba09efc862f
Food_Republic-4a73cb9988dd6c8341f8fa8ec5c65cd221218085ec8409545722cc2ec3fb96ea
Bon_Appetit-316182ccfad731f0376900f95d966ada60134c52e48c140b458430f421f477a5
Chez_Us-acbd7407da22d78169f8475cf2e2be1d2d1a66e86551b0ad6b5054acfd8fe3c6
BBC_Good_Food-bb68506a8438676d2068881d4769529fa395ebf6a162a335385a93494ca1a086
EatingWell-6f957796baf4ff81061808502ceba6e9c705e6db3be4efdafc6428fd0f3ab8c7
Delish-679717846737446ea95da3489fcb16c3767235cfd74e4f19aa942dcf9a0d1321
The_Daily_Meal-c8083fefbbf3daba9b055a012d2bad3de4df338535aae1d1cc371001486e56ff


In [None]:
# example structure of a record in vespa format
# vespa_record = {
#                 "put": f"id:{doc_type}:{doc_type}::{['recipe']['source']}-{record['id']}",
#                 "fields": {
#                     "origin": "epicurious", # replace with ['recipe']['source']
#                     "id": "",
#                     "title": "",
#                     "ingredients": "",
#                     "steps": "",
#                     "cuisine": "",
#                 },
#             }

import unicodedata

vespa_edamam = []

for i in range(0,10):
    origin = resp_json['hits'][i]['recipe']['source'].replace(' ', '_')
    rec_id = unique_name_from_str(resp_json['hits'][i]['recipe']['url'])
    rec_title = resp_json['hits'][i]['recipe']['label']
    
    rec_ingreds = [ingred.lstrip("*- ") for ingred in "".join(
                cha if unicodedata.category(cha)[0] != "C" else " "
                for sen in resp_json['hits'][i]['recipe']['ingredientLines']
                for cha in sen + "|"
            )
            .rstrip("|")
            .split("|")
            ]
    
    rec_cuisines = [s.title() for s in resp_json['hits'][i]['recipe']['cuisineType']]


    vespa_record = {
                "put": f"id:mealeon:mealeon::{origin}-{rec_id}",
                "fields": {
                    "origin": origin,
                    "id": rec_id,
                    "title": rec_title,
                    "ingredients": rec_ingreds,
                    "steps": "",
                    "cuisine": rec_cuisines,
                },
            }
    
    vespa_edamam.append(vespa_record)

print(vespa_edamam)

[{'put': 'id:mealeon:mealeon::Food52-7600a8c806c686a1216aa850cde99e457605d15276498a0bf734e72c8d0982d3', 'fields': {'origin': 'Food52', 'id': '7600a8c806c686a1216aa850cde99e457605d15276498a0bf734e72c8d0982d3', 'title': 'Vegan Cauliflower Buffalo Wings', 'ingredients': ['1 small cauliflower', '1/2 cup chickpea flour', '3 tablespoons rice flour', '1/2 teaspoon red chili powder', '2/3 cup buffalo wing hot sauce', '1-2 scallions, sliced'], 'steps': '', 'cuisine': ['American']}}, {'put': 'id:mealeon:mealeon::Serious_Eats-8fefce8fc109ffc82e2607819afa7a0e26f590f3af04cb8219aaff9fada9fd0c', 'fields': {'origin': 'Serious_Eats', 'id': '8fefce8fc109ffc82e2607819afa7a0e26f590f3af04cb8219aaff9fada9fd0c', 'title': 'Buffalo Wings Recipe | Grilling', 'ingredients': ['3 pounds chicken wings (18 wings), cut up', '1 teaspoon cayenne pepper', '1 teaspoon ground black pepper', '1 teaspoon kosher salt', 'For the Sauce', '4 tablespoons unsalted butter', "1/2 cup hot sauce, preferably Frank's Louisiana Hot Sauc

### Try mixing in PyVespa
Following documentation [here](https://pyvespa.readthedocs.io/en/latest/getting-started-pyvespa.html)

In [None]:
from vespa.package import (
    ApplicationPackage,
    Field,
    Schema,
    Document,
    HNSW,
    RankProfile,
    Component,
    Parameter,
    FieldSet,
    GlobalPhaseRanking,
    Function,
    DocumentSummary,
    Summary
)

package = ApplicationPackage(
    name="mealeon",
    schema=[
        Schema(
            name="mealeon",
            document=Document(
                fields=[
                    Field(
                        name="language", 
                        type="string", 
                        indexing=["summary", "index", "set_language"],
                        match=["word"]
                    ),
                    Field(
                        name="id",
                        type="string",
                        indexing=["attribute", "summary"],
                        match=["word"],
                        bolding=True,
                    ),
                    Field(
                        name="title",
                        type="string",
                        indexing=["index", "summary"],
                        index="enable-bm25",
                    ), 
                    Field(
                        name="origin",
                        type="string",
                        indexing=["attribute", "summary"],
                        match=["word"],
                    ),                 
                    Field(
                        name="ingredients",
                        type="array<string>",
                        indexing=["index"],
                        index="enable-bm25",
                        match=["word"],
                    ),
                    Field(
                        name="steps",
                        type="array<string>",
                        indexing=["index"],
                        index="enable-bm25"
                    ),
                    Field(
                        name="cuisine",
                        type="array<string>",
                        indexing=["index", "summary"],
                        index="enable-bm25",
                        match=["text"],
                    ),
                    # Field(
                    #     name="embedding",
                    #     type="tensor<float>(x[384])",
                    #     indexing=[
                    #         'input title . " " . input body',
                    #         "embed",
                    #         "index",
                    #         "attribute",
                    #     ],
                    #     ann=HNSW(distance_metric="angular"),
                    #     is_document_field=False,
                    # ),
                ]
            ),
            fieldsets=[
                FieldSet(
                    name="default", 
                    fields=["title", "ingredients"]
                )
            ],
            document_summaries=[
                    DocumentSummary(
                    name="document-summary",
                    summary_fields=[
                        Summary("id")
                    ]
                ),
            ],
            rank_profiles=[
                RankProfile(
                    name="default",
                    first_phase="nativeRank(title, ingredients)"
                ),
                RankProfile(
                    name="bm25",
                    inherits="default",
                    first_phase="bm25(title) + bm25(ingredients)",
                    # inputs=[("query(q)", "tensor<float>(x[384])")],
                    functions=[
                        Function(name="bm25sum", expression="bm25(title) + bm25(ingredients)")
                    ],
                ),
                RankProfile(
                    name="combined", 
                    inherits="default",
                    first_phase="bm25(title) + bm25(ingredients) + nativeRank(title) + nativeRank(ingredients)",
                    functions=[
                        Function(name="bm25nativeRank",
                                 expression="bm25(title) + bm25(ingredients) + nativeRank(title) + nativeRank(ingredients)")
                    ]
                )
                # RankProfile(
                #     name="semantic",
                #     inputs=[("query(q)", "tensor<float>(x[384])")],
                #     first_phase="closeness(field, embedding)",
                # ),
                # RankProfile(
                #     name="fusion",
                #     inherits="bm25",
                #     inputs=[("query(q)", "tensor<float>(x[384])")],
                #     first_phase="closeness(field, embedding)",
                #     global_phase=GlobalPhaseRanking(
                #         expression="reciprocal_rank_fusion(bm25sum, closeness(field, embedding))",
                #         rerank_count=1000,
                #     ),
                # ),
            ],
        )
    ],
    # components=[
    #     Component(
    #         id="e5",
    #         type="hugging-face-embedder",
    #         parameters=[
    #             Parameter(
    #                 "transformer-model",
    #                 {
    #                     "url": "https://github.com/vespa-engine/sample-apps/raw/master/simple-semantic-search/model/e5-small-v2-int8.onnx"
    #                 },
    #             ),
    #             Parameter(
    #                 "tokenizer-model",
    #                 {
    #                     "url": "https://raw.githubusercontent.com/vespa-engine/sample-apps/master/simple-semantic-search/model/tokenizer.json"
    #                 },
    #             ),
    #         ],
    #     )
    # ],
)

In [None]:
# try mixing in PyVespa

from vespa.deployment import VespaDocker

vespa_docker = VespaDocker(port=8181,
                           cfgsrv_port=19081)
app = vespa_docker.deploy(application_package=package)


Waiting for configuration server, 0/300 seconds...
Waiting for configuration server, 5/300 seconds...
Waiting for configuration server, 10/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 0/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 5/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 10/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 15/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 20/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Waiting for application status, 25/300 seconds...
Using plain http against endpoint http://localhost:8181/ApplicationStatus
Application is up!
Finished de

In [None]:
!vespa feed ../data/processed/mealeon_vespa.json --target http://localhost:8181

{
  "feeder.operation.count": 34756,
  "feeder.seconds": 43.816,
  "feeder.ok.count": 34756,
  "feeder.ok.rate": 793.220,
  "feeder.error.count": 0,
  "feeder.inflight.count": 0,
  "http.request.count": 34756,
  "http.request.bytes": 26112962,
  "http.request.MBps": 0.596,
  "http.exception.count": 0,
  "http.response.count": 34756,
  "http.response.bytes": 5074376,
  "http.response.MBps": 0.116,
  "http.response.error.count": 0,
  "http.response.latency.millis.min": 11,
  "http.response.latency.millis.avg": 69,
  "http.response.latency.millis.max": 576,
  "http.response.code.counts": {
    "200": 34756
  }
}


In [None]:
from vespa.io import VespaResponse, VespaQueryResponse

# query should be recipe name?
    # WHERE title !contains {query}
# cuisine name should be in the WHERE filter clause of YQL
    # AND WHERE cuisine NOT IN {cuisines}
# how to penalize similar title?

# start with plain keyword search

with app.syncio(connections=1) as session:
    query = "Buffalo Wings"
    response: VespaQueryResponse = session.query(
        yql=f"select * from sources mealeon where (title contains '{query}') limit 5",
        query=query,
        ranking="bm25"
        # body={"input.query(q)": f"embed({query})"},
    )
    assert response.is_successful()

In [None]:
print(response.hits)

[{'id': 'id:mealeon:mealeon::epicurious-54a4697e6529d92b2c0279d3', 'relevance': 9.38931639154318, 'source': 'mealeon_content', 'fields': {'sddocname': 'mealeon', 'id': 'epicurious-54a4697e6529d92b2c0279d3', 'documentid': 'id:mealeon:mealeon::epicurious-54a4697e6529d92b2c0279d3', 'title': 'Buffalo Wings', 'origin': 'epicurious', 'cuisine': ['Missing Cuisine']}}, {'id': 'id:mealeon:mealeon::epicurious-54a42af319925f464b37f2a5', 'relevance': 8.446862631228765, 'source': 'mealeon_content', 'fields': {'sddocname': 'mealeon', 'id': 'epicurious-54a42af319925f464b37f2a5', 'documentid': 'id:mealeon:mealeon::epicurious-54a42af319925f464b37f2a5', 'title': 'Korean Buffalo Wings', 'origin': 'epicurious', 'cuisine': ['Missing Cuisine']}}]


In [None]:
next_resp_json = next_resp.json()
next_resp_json

NameError: name 'next_resp' is not defined

In [None]:
# actual results
results = resp_json['hits']
results

In [None]:
# | hide
nbdev.nbdev_export()