## Testing

In [1]:
import sys, os, json
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from src.scraper import get_recipe_data
from src.ingredients_parser import IngredientsParser
with open("../src/helper_files/ground_truth.json", "r", encoding="utf-8") as f:
    ground_truth = json.load(f)

allrecipes_urls = [
    "https://www.allrecipes.com/recipe/21014/good-old-fashioned-pancakes/",
    "https://www.allrecipes.com/recipe/24074/alysias-basic-meat-lasagna/",
    "https://www.allrecipes.com/recipe/228293/curry-stand-chicken-tikka-masala-sauce/",
    "https://www.allrecipes.com/recipe/218091/classic-and-simple-meat-lasagna/",
    "https://www.allrecipes.com/recipe/238543/grandmas-cucumber-and-onion-salad/",
    "https://www.allrecipes.com/recipe/15375/fried-chicken-with-creamy-gravy/",
    "https://www.allrecipes.com/recipe/218288/garlic-spinach/",
    "https://www.allrecipes.com/recipe/57348/balsamic-glazed-carrots/",
    "https://www.allrecipes.com/recipe/254558/refreshing-oatmeal-drink-agua-de-avena/",
    "https://www.allrecipes.com/recipe/202975/potstickers-chinese-dumplings/"
]
epicurious_urls = [
    "https://www.epicurious.com/recipes/food/views/easy-fried-rice",
    "https://www.epicurious.com/recipes/food/views/ba-syn-easy-apple-cake",
    "https://www.epicurious.com/recipes/food/views/ba-syn-spicy-cashew-scallion-noodles",
    "https://www.epicurious.com/recipes/food/views/ba-syn-sweet-and-sour-cranberry-chicken-stir-fry",
    "https://www.epicurious.com/recipes/food/views/ba-syn-peach-blueberry-pie",
    "https://www.epicurious.com/recipes/food/views/tomato-galette",
    "https://www.epicurious.com/recipes/food/views/ba-syn-ginger-cardamom-zucchini-bread",
    "https://www.epicurious.com/recipes/food/views/white-bean-turkey-chili",
    "https://www.epicurious.com/recipes/food/views/thai-curry-puff-ga-ree-puff",
    "https://www.epicurious.com/recipes/food/views/steamed-winter-veggie-bowls"
]
bonappetit_urls = [
    "https://www.bonappetit.com/recipe/kale-pesto-with-whole-wheat-pasta",
    "https://www.bonappetit.com/recipe/adult-spaghettios",
    "https://www.bonappetit.com/recipe/ham-cheese-and-onion-empanadas",
    "https://www.bonappetit.com/recipe/crispy-smashed-potatoes-with-walnut-dressing",
    "https://www.bonappetit.com/recipe/zucchini-lentil-fritters-with-lemony-yogurt",
    "https://www.bonappetit.com/recipe/grilled-chicken-skewers-with-toum-shish-taouk",
    "https://www.bonappetit.com/recipe/bas-best-apple-pie",
    "https://www.bonappetit.com/recipe/ba-best-lasagna",
    "https://www.bonappetit.com/recipe/bas-best-chocolate-chip-cookies",
    "https://www.bonappetit.com/recipe/spicy-salmon-bowl"
]

allrecipes_data = allrecipes_urls.copy() + epicurious_urls.copy() + bonappetit_urls.copy()

In [2]:
parsed_components = []
for i in range(len(allrecipes_data)):
    title, ingredients, directions = get_recipe_data(allrecipes_data[i])
    ingreadient_parser = IngredientsParser(ingredients)
    parsed_components.append(ingreadient_parser.parse())

In [13]:
def performance(ground_truth, parsed_components):
    ing_name_correct = 0
    ing_quantity_correct = 0
    ing_unit_correct = 0
    total_ingredients = len(ground_truth)

    ing_prep_correct = 0
    total_preps = 0

    ing_descriptors_correct = 0
    total_descriptors = 0
    
    for gt, ping in zip(ground_truth, parsed_components):

        # Ingredient-level accuracy
        if gt['ingredient_name'] == ping['ingredient_name']:
            ing_name_correct += 1
        
        if gt['ingredient_quantity'] == ping['ingredient_quantity']:
            ing_quantity_correct += 1
        
        if gt['measurement_unit'] == ping['measurement_unit']:
            ing_unit_correct += 1

        # --- Descriptor Scoring ---
        gt_desc = set(gt.get('ingredient_descriptors', []))
        p_desc = set(ping.get('ingredient_descriptors', []))

        if len(gt_desc) == 0 and len(p_desc) == 0:
            # If neither has descriptors â†’ count as correct match of size 1
            total_descriptors += 1
            ing_descriptors_correct += 1
        else:
            total_descriptors += len(gt_desc)
            ing_descriptors_correct += len(gt_desc & p_desc)

        # --- Preparation Scoring ---
        gt_prep = set(gt.get('ingredient_preparation', []))
        p_prep = set(ping.get('ingredient_preparation', []))

        if len(gt_prep) == 0 and len(p_prep) == 0:
            total_preps += 1
            ing_prep_correct += 1
        else:
            total_preps += len(gt_prep)
            ing_prep_correct += len(gt_prep & p_prep)

    return {
        "name_accuracy": ing_name_correct / total_ingredients,
        "quantity_accuracy": ing_quantity_correct / total_ingredients,
        "unit_accuracy": ing_unit_correct / total_ingredients,
        "descriptor_accuracy": ing_descriptors_correct / total_descriptors if total_descriptors > 0 else 1,
        "preparation_accuracy": ing_prep_correct / total_preps if total_preps > 0 else 1
    }


In [None]:
all_scores = {
    "name_accuracy": 0,
    "quantity_accuracy": 0,
    "unit_accuracy": 0,
    "descriptor_accuracy": 0,
    "preparation_accuracy": 0
}

num_recipes = len(ground_truth)  # ground_truth is a list of recipes

for idx in range(num_recipes):
    result = performance(ground_truth[idx], parsed_components[idx])
    
    for key in all_scores:
        all_scores[key] += result[key]

# Divide to get the average
for key in all_scores:
    all_scores[key] /= num_recipes


{'name_accuracy': 0.40603674569349624, 'quantity_accuracy': 0.9774850943615244, 'unit_accuracy': 0.8830749440617862, 'descriptor_accuracy': 0.6873709307261939, 'preparation_accuracy': 0.7793717639403074}
