**Ejercicio 11**


**Nombre:** Aarón Yumancela

**Web Scraping**

In [9]:
# Web scraping de recetas
import requests
from bs4 import BeautifulSoup
import json

HEADERS = {"User-Agent": "Mozilla/5.0"}

def get_recipe(url):
    html = requests.get(url, headers=HEADERS).text
    soup = BeautifulSoup(html, "html.parser")

    recipe = None
    # Buscar datos estructurados JSON-LD
    for script in soup.find_all("script", type="application/ld+json"):
        try:
            data = json.loads(script.string)
            items = data if isinstance(data, list) else [data]

            for item in items:
                t = item.get("@type")
                if t == "Recipe" or (isinstance(t, list) and "Recipe" in t):
                    recipe = item
                    break
        except:
            continue

        if recipe:
            break

    if not recipe:
        return None

    return {
        "url": url,
        "puntaje": recipe.get("aggregateRating", {}).get("ratingValue"),
        "descripcion": recipe.get("description"),
        "prep_time": recipe.get("prepTime"),
        "cook_time": recipe.get("cookTime"),
        "total_time": recipe.get("totalTime"),
        "porciones": recipe.get("recipeYield"),
        "ingredientes": recipe.get("recipeIngredient", []),
        "pasos": [s.get("text") for s in recipe.get("recipeInstructions", [])],
        "nutricion": recipe.get("nutrition")
    }


In [10]:
# Lista de URLs de recetas
urls = [
    "https://www.allrecipes.com/hibachi-chicken-noodles-with-yum-yum-sauce-recipe-11885825",
    "https://www.allrecipes.com/grill-master-chicken-wing-recipe-11885412",
    "https://www.allrecipes.com/million-dollar-soup-recipe-11823470",
    "https://www.allrecipes.com/recipe/270770/garlic-noodles/",
    "https://www.allrecipes.com/chicken-cobbler-recipe-7966464",
    "https://www.allrecipes.com/recipe/9999/peanut-butter-bars-i/",
    "https://www.allrecipes.com/recipe/16354/easy-meatloaf/",
    "https://www.allrecipes.com/recipe/125658/chicken-enchiladas-with-creamy-green-chile-sauce/",
    "https://www.allrecipes.com/recipe/235997/unstuffed-cabbage-roll/",
    "https://www.allrecipes.com/recipe/13218/absolutely-ultimate-potato-soup/",
    "https://www.allrecipes.com/recipe/276647/instant-pot-chicken-and-dumplings/",
    "https://www.allrecipes.com/recipe/6776/pizza-dough-i/",
    "https://www.allrecipes.com/recipe/8500479/ground-pork-tacos-with-pineapple-salsa/",
    "https://www.allrecipes.com/recipe/240605/shipwreck-dinner/",
    "https://www.allrecipes.com/recipe/235794/eggplant-parmesan-casserole/",
    "https://www.allrecipes.com/recipe/155379/puerto-rican-breakfast-custard/",
    "https://www.allrecipes.com/recipe/272308/monkey-bread-from-scratch/",
    "https://www.allrecipes.com/bang-bang-salmon-recipe-8748368",
    "https://www.allrecipes.com/recipe/229088/apple-crisp-with-oat-topping/",
    "https://www.allrecipes.com/italian-sausage-beans-and-greens-recipe-11881660",
    "https://www.allrecipes.com/viral-pork-dumpling-lasagna-recipe-11883823",
    "https://www.allrecipes.com/slow-cooker-hoisin-chicken-recipe-11881916",
    "https://www.allrecipes.com/air-fryer-beef-kofta-kabobs-recipe-11714847",
    "https://www.allrecipes.com/mini-chicken-pot-pies-recipe-11857602",
    "https://www.allrecipes.com/recipe/22262/greek-honey-cake/"
]


recetas = []

# Extraer información de recetas

for url in urls:
    data = get_recipe(url)
    if data:
        recetas.append(data)

print("Total recetas:", len(recetas))


Total recetas: 24


In [11]:
# Método alternativo de extracción
def get_recipe(url):
    soup = BeautifulSoup(
        requests.get(url, headers={"User-Agent": "Mozilla/5.0"}).text,
        "html.parser"
    )

    for s in soup.find_all("script", type="application/ld+json"):
        try:
            for item in (json.loads(s.string) if isinstance(json.loads(s.string), list) else [json.loads(s.string)]):
                if "Recipe" in (item.get("@type") if isinstance(item.get("@type"), list) else [item.get("@type")]):
                    return item
        except:
            pass


In [12]:
# Mostrar recetas obtenidas
for i, r in enumerate(recetas, 1):
    print("\n" + "="*50)
    print(f"RECETA {i}")
    print("URL:", r["url"])
    print("PUNTAJE:", r["puntaje"])
    print("DESCRIPCIÓN:", r["descripcion"])
    print("PREP TIME:", r["prep_time"])
    print("COOK TIME:", r["cook_time"])
    print("TOTAL TIME:", r["total_time"])
    print("PORCIONES:", r["porciones"])

    print("\nINGREDIENTES:")
    for ing in r["ingredientes"]:
        print("-", ing)

    print("\nPASOS:")
    for n, paso in enumerate(r["pasos"], 1):
        print(f"{n}. {paso}")

    print("\nNUTRICIÓN:")
    if r["nutricion"]:
        for k, v in r["nutricion"].items():
            print(f"{k}: {v}")



RECETA 1
URL: https://www.allrecipes.com/hibachi-chicken-noodles-with-yum-yum-sauce-recipe-11885825
PUNTAJE: None
DESCRIPCIÓN: Hibachi Chicken Noodles with Yum Yum Sauce bring the bold, savory flavors of your favorite Japanese steakhouse right to your kitchen in just 40 minutes. Tender chicken, saucy noodles, and crisp broccoli are tossed together and finished with homemade Yum Yum sauce for a comforting, takeout-style meal the whole family will love.
PREP TIME: PT10M
COOK TIME: PT30M
TOTAL TIME: PT40M
PORCIONES: 4

INGREDIENTES:
- 1/3 cup mayonnaise
- 1 tablespoon Sriracha
- 1 tablespoon ketchup
- 1 tablespoon rice vinegar
- 1 teaspoon granulated sugar
- 1/2 teaspoon garlic powder
- 1/4 teaspoon salt
- 1/2 cup reduced-sodium soy sauce
- 2 tablespoons packed brown sugar
- 2 tablespoons toasted sesame oil
- 2 tablespoons water
- 4 cloves garlic, minced
- 2 tablespoons vegetable oil
- 3 cups broccoli florets
- 3 tablespoons butter
- 1 pound boneless, skinless chicken breasts, cut into 1