# Scraping de Recetas y Creación del DataFrame

In [1]:
import os
import pandas as pd
from bs4 import BeautifulSoup
from tqdm import tqdm # Para barras de progreso
from IPython.display import display, HTML # Importación para renderizar HTML

In [2]:
def scrape_recipe_from_html(file_path):
    """
    Extrae la información de una receta desde un archivo HTML local
    Maneja errores si algunos campos no se encuentran
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    soup = BeautifulSoup(content, 'html.parser')
    recipe_data = {}

    # 1. Título
    title_tag = soup.find("meta", {"property": "og:title"})
    recipe_data['Titulo'] = title_tag["content"] if title_tag else "No encontrado"

    # 2. Resumen
    summary_p = soup.find("p", class_="article-subheading")
    recipe_data['resumen'] = summary_p.get_text(strip=True) if summary_p else "No encontrado"

    # 3. Valoración
    # Ajustado el selector para que coincida con el ejemplo de webcrawling
    score_div = soup.find("div", class_="comp mm-recipes-review-bar__rating mntl-text-block text-label-300")
    recipe_data['valoracion'] = score_div.text.strip() if score_div else "No encontrado"

    # 4. Tiempos y Porciones
    details_labels = soup.find_all("div", class_="mm-recipes-details__label")
    details_values = soup.find_all("div", class_="mm-recipes-details__value")
    
    # Valores por defecto
    recipe_data['tiempo_coccion'] = "No encontrado"
    recipe_data['porciones'] = "No encontrado"

    for label, value in zip(details_labels, details_values):
        label_text = label.text.strip().lower()
        if "total time" in label_text:
            recipe_data['tiempo_coccion'] = value.text.strip()
        elif "servings" in label_text:
            recipe_data['porciones'] = value.text.strip()

    # 5. Ingredientes
    ingredients_li = soup.find_all("li", class_="mm-recipes-structured-ingredients__list-item")
    ingredients_list = sorted(list(set([ing.text.strip() for ing in ingredients_li])))
    recipe_data['Ingredientes'] = ", ".join(ingredients_list)

    # 6. Preparación
    steps_li = soup.select("li.mntl-sc-block-group--LI p")
    preparation_steps = [step.get_text(" ", strip=True) for step in steps_li if step.get_text(strip=True)]
    recipe_data['preparacion'] = "\\n".join([f"Paso {i+1}: {step}" for i, step in enumerate(preparation_steps)])

    # 7. Factor Nutricional
    nutrition_rows = soup.select("tr.mm-recipes-nutrition-facts-summary__table-row")
    nutrition_facts = []
    for row in nutrition_rows:
        cols = row.find_all("td")
        if len(cols) == 2:
            value = cols[0].text.strip()
            label = cols[1].text.strip()
            nutrition_facts.append(f"{label}: {value}")
    recipe_data['Factor_nutricional'] = ", ".join(nutrition_facts)

    # 8. Imagen
    img_tag = soup.select_one("div.img-placeholder img")
    recipe_data['Imagen'] = img_tag.get("data-src") or img_tag.get("src") if img_tag else "No encontrada"

    return recipe_data

In [3]:
# --- Proceso Principal ---
data_folder = 'data'
if not os.path.exists(data_folder):
    print(f"Error: La carpeta '{data_folder}' no se encuentra")
else:
    html_files = [os.path.join(data_folder, f) for f in os.listdir(data_folder) if f.endswith('.html')]
    
    if not html_files:
        print(f"No se encontraron archivos .html en la carpeta '{data_folder}'.")
    else:
        all_recipes = []
        print(f"Procesando {len(html_files)} archivos HTML")
        for file_path in tqdm(html_files, desc="Scrapeando recetas"):
            all_recipes.append(scrape_recipe_from_html(file_path))
        # Crear el DataFrame
        df_recipes = pd.DataFrame(all_recipes)
df_recipes

Procesando 102 archivos HTML


Scrapeando recetas: 100%|██████████| 102/102 [00:08<00:00, 12.50it/s]


Unnamed: 0,Titulo,resumen,valoracion,tiempo_coccion,porciones,Ingredientes,preparacion,Factor_nutricional,Imagen
0,Magic Cookie Bars,This magic bars recipe was one of my grandmoth...,4.8,55 mins,12,"1 (14 ounce) can sweetened condensed milk, 1 c...",Paso 1: Gather the ingredients. Preheat the ov...,"Calories: 384, Fat: 24g, Carbs: 40g, Protein: 6g",https://www.allrecipes.com/thmb/Wot6yWDwiCwTdC...
1,Vietnamese Stir-Fry,This is a tangy stir-fry with beef and green b...,4.3,2 hrs 55 mins,6,"1 (1 inch) piece fresh ginger root, minced, 1 ...","Paso 1: Whisk together the olive oil, 4 cloves...","Calories: 475, Fat: 34g, Carbs: 9g, Protein: 32g",https://www.allrecipes.com/thmb/ieYux00FV4WWua...
2,Rocky Road Squares,Reminds one of that yummy ice cream flavor.,4.5,No encontrado,32,"1 (14 ounce) can sweetened condensed milk, 1 ½...",Paso 1: Preheat oven to 350 degrees F (175 deg...,"Calories: 200, Fat: 13g, Carbs: 22g, Protein: 3g",https://www.allrecipes.com/thmb/DKLlmzWw8Gn9ja...
3,Filipino Pork Adobo,Pork in a tasty Adobo sauce is great smothered...,3.2,2 hrs 50 mins,6,"1 cup distilled white vinegar, 1 cup soy sauce...","Paso 1: Stir together the vinegar, soy sauce, ...","Calories: 337, Fat: 16g, Carbs: 14g, Protein: 35g",https://www.allrecipes.com/thmb/O3WG01q3Z3CEJh...
4,Squash and Coconut Milk Stew,"This is a Filipino dish, (Ginostoan Sitawan Ka...",4.4,1 hr 20 mins,4,"1 acorn squash, peeled and cut into 1-inch cu...",Paso 1: Melt butter in a large skillet over me...,"Calories: 450, Fat: 29g, Carbs: 30g, Protein: 24g",https://www.allrecipes.com/thmb/dtEVHUgtixv1TQ...
...,...,...,...,...,...,...,...,...,...
97,Pennsyltucky Pepper Stew,"A delightful stew! Os it a soup? I don't know,...",4.7,1 hr 10 mins,6,"1 Anaheim chile pepper, chopped, 1 fresh jal...",Paso 1: Place the beef in a skillet over mediu...,"Calories: 359, Fat: 16g, Carbs: 26g, Protein: 30g",https://www.allrecipes.com/thmb/2CJSURLtiZQSHH...
98,Crab and Swiss Omelet,A delicious crab omelet recipe made with Dunge...,4.4,25 mins,2,"1 cup cooked crabmeat, 1 cup shredded Swiss ch...",Paso 1: Melt butter in a large nonstick skille...,"Calories: 796, Fat: 57g, Carbs: 10g, Protein: 62g",https://www.allrecipes.com/thmb/zEhg1Hp5ES6S6G...
99,Shipwreck Stew,"This recipe is a wonderful, hearty meal for th...",3.9,5 hrs 20 mins,10,"2 (10.75 ounce) cans condensed tomato soup, 2...",Paso 1: Crumble the ground beef into a large s...,"Calories: 425, Fat: 12g, Carbs: 55g, Protein: 25g",https://www.allrecipes.com/thmb/7hfU6oPYNvGEFf...
100,Seven Layer Bars,"These 7 layer bars are easy to make, and very ...",4.7,1 hr,36,"1 (14 ounce) can sweetened condensed milk, 1 c...",Paso 1: Preheat the oven to 350 degrees F (175...,"Calories: 155, Fat: 10g, Carbs: 17g, Protein: 2g",https://imagesvc.meredithcorp.io/v3/mm/image?u...


In [4]:
# --- CÓDIGO DE VISUALIZACIÓN CON IMÁGENES ---
print("\nVisualización de las primeras 5 recetas:")
def path_to_image_html(path):
    """Convierte una URL de imagen en una etiqueta HTML <img>"""
    return f'<img src="{path}" width="5550">'
# Tomamos una copia de las 5 primeras filas solo para visualizar
df_visual = df_recipes.head().copy()
html_output = df_visual.style.format({
    'Imagen': path_to_image_html
}).to_html(escape=False)
# Mostramos la tabla HTML resultante
HTML(html_output)


Visualización de las primeras 5 recetas:


Unnamed: 0,Titulo,resumen,valoracion,tiempo_coccion,porciones,Ingredientes,preparacion,Factor_nutricional,Imagen
0,Magic Cookie Bars,This magic bars recipe was one of my grandmother's favorites. Easy to make for a crowd-pleasing sweet treat!,4.8,55 mins,12,"1 (14 ounce) can sweetened condensed milk, 1 cup chopped walnuts, 1 cup semisweet chocolate chips, 1 ½ cups graham cracker crumbs, 1 ⅓ cups flaked coconut, ½ cup butter or margarine, melted","Paso 1: Gather the ingredients. Preheat the oven to 350 degrees F (180 degrees C).\nPaso 2: Jen Causey / Food Styling: Chelsea Zimmer / Prop Styling: Hannah Greenwood\nPaso 3: Pour melted butter into a 9x13-inch dish. Sprinkle graham crumbs evenly over melted butter, followed by chopped nuts, then chocolate chips.\nPaso 4: Jen Causey / Food Styling: Chelsea Zimmer / Prop Styling: Hannah Greenwood\nPaso 5: Top with flaked coconut and pour condensed milk over all.\nPaso 6: Jen Causey / Food Styling: Chelsea Zimmer / Prop Styling: Hannah Greenwood\nPaso 7: Bake in the preheated oven for 25 minutes or until lightly browned on top. Cool for 15 minutes before cutting into finger-length bars.\nPaso 8: Jen Causey / Food Styling: Chelsea Zimmer / Prop Styling: Hannah Greenwood","Calories: 384, Fat: 24g, Carbs: 40g, Protein: 6g",
1,Vietnamese Stir-Fry,"This is a tangy stir-fry with beef and green beans. If you have an electric wok, they work perfect to cook this in! Add any extra veggies that you prefer!",4.3,2 hrs 55 mins,6,"1 (1 inch) piece fresh ginger root, minced, 1 dash sesame oil, 1 large onion, thinly sliced, 1 pinch red pepper flakes, or to taste, 1 tablespoon chopped fresh Thai basil, 1 tablespoon chopped fresh mint, 1 tablespoon vegetable oil, 2 cloves garlic, minced, 2 cups frozen whole green beans, partially thawed, 2 pounds sirloin tip, thinly sliced, 2 tablespoons lime juice, 3 green onions, cut into 2 inch pieces, 4 cloves garlic, minced, ¼ cup chopped fresh cilantro, ¼ cup fish sauce, ¼ cup olive oil, ¼ cup reduced-sodium soy sauce, ½ cup reduced-sodium beef broth, ½ teaspoon ground black pepper","Paso 1: Whisk together the olive oil, 4 cloves of garlic, ginger, fish sauce, soy sauce, and sesame oil in a bowl, and pour into a resealable plastic bag. Add the beef sirloin tip, coat with the marinade, squeeze out excess air, and seal the bag. Marinate in the refrigerator for 2 hours. Remove the beef sirloin tip from the marinade, and shake off excess. Discard the remaining marinade.\nPaso 2: Heat vegetable oil in a large skillet over medium-high heat and stir in the beef. Cook and stir until the beef is evenly browned, and no longer pink. Place beef on a plate and set aside. Reduce heat to medium, adding more vegetable oil to the skillet if needed. Stir in 2 cloves of garlic, green onion, and onion; cook and stir until the onion has softened and turned translucent, about 5 minutes. Stir in green beans, beef broth, lime juice, basil, mint, red pepper flakes and pepper. Return beef sirloin to skillet and toss to combine. Remove from heat and toss in cilantro.","Calories: 475, Fat: 34g, Carbs: 9g, Protein: 32g",
2,Rocky Road Squares,Reminds one of that yummy ice cream flavor.,4.5,No encontrado,32,"1 (14 ounce) can sweetened condensed milk, 1 ½ cups chopped walnuts, 1 ½ cups flaked coconut, 1 ½ cups graham cracker crumbs, 1 ½ cups miniature marshmallows, 2 (1 ounce) squares semisweet chocolate, 2 cups semisweet chocolate chips, ½ cup butter","Paso 1: Preheat oven to 350 degrees F (175 degrees C).\nPaso 2: In a 9 x 13 inch baking pan melt the butter and sprinkle the graham cracker crumbs over the melted butter. Mix together and press onto bottom of pan.\nPaso 3: Layer coconut, nuts, chocolate chips and marshmallows over crust. Drizzle the condensed milk evenly over all.\nPaso 4: Bake at 350 degrees F (175 degrees C) for 25 to 30 minutes or until golden brown. Remove from oven and drizzle with the melted chocolate. Cool completely and cut into squares.","Calories: 200, Fat: 13g, Carbs: 22g, Protein: 3g",
3,Filipino Pork Adobo,Pork in a tasty Adobo sauce is great smothered over white jasmine rice!,3.2,2 hrs 50 mins,6,"1 cup distilled white vinegar, 1 cup soy sauce, 1 pound small green beans, trimmed (Optional), 1 tablespoon minced garlic, 1 teaspoon fresh-ground black pepper, 2 ½ pounds lean pork, cut into 1 inch cubes, 3 bay leaves, ½ cup ketchup","Paso 1: Stir together the vinegar, soy sauce, ketchup, garlic, and bay leaves in a large saucepan. Add the cubed pork, and bring to a boil over high heat. Reduce heat to medium-low, cover, and simmer until the pork is tender, about 2 1/2 hours. Stir occasionally. If using the green beans, add them during the last half hour of cooking.","Calories: 337, Fat: 16g, Carbs: 14g, Protein: 35g",
4,Squash and Coconut Milk Stew,"This is a Filipino dish, (Ginostoan Sitawan Kalsbass) that is made with acorn squash, green beans, tofu, shrimp, ginger, garlic, and onion. My MIL makes it on the soupy side because you serve it over rice, but I like to mash the squash because I don't like rice.",4.4,1 hr 20 mins,4,"1 acorn squash, peeled and cut into 1-inch cubes, 1 (1 inch) piece fresh ginger, minced, 1 (14 ounce) can coconut milk, 1 (14 ounce) package extra-firm tofu, cut into 1/2-inch cubes, 1 clove garlic, minced, 1 small onion, chopped, 1 tablespoon butter, 2 tablespoons white sugar, 8 ounces cooked shrimp, peeled and deveined, 8 ounces green beans, cut into 3-inch pieces, Salt and pepper to taste","Paso 1: Melt butter in a large skillet over medium heat. Add ginger, garlic, and onion. Cook until garlic begins to brown, about 5 min.\nPaso 2: Add squash, coconut milk, and green beans to skillet. Bring to a boil over high heat, then reduce heat to medium, cover, and simmer for 30 minutes until squash is tender, stirring occasionally. Stir in shrimp and tofu, then season to taste with salt, pepper, and sugar.","Calories: 450, Fat: 29g, Carbs: 30g, Protein: 24g",


In [5]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
# Cargar el modelo de Sentence Transformers
model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
# Crear una columna de texto combinado para generar los embeddings.
# Esta combinación captura la esencia de cada receta
df_recipes['texto_para_embedding'] = df_recipes['Titulo'] + ". " + \
                                      df_recipes['resumen'] + ". Ingredientes: " + \
                                      df_recipes['Ingredientes'] + ". " + \
                                      df_recipes['valoracion'] + ". " + \
                                      df_recipes['tiempo_coccion'] + ". " + \
                                      df_recipes['porciones'] + ". " + \
                                      df_recipes['preparacion'] + ". " + \
                                      df_recipes['Factor_nutricional']

In [18]:
# Generar los embeddings para todas las recetas
recipe_embeddings = model.encode(
    df_recipes['texto_para_embedding'].tolist(),
    convert_to_numpy=True,
    show_progress_bar=True
)

Batches: 100%|██████████| 4/4 [00:02<00:00,  1.54it/s]


In [20]:
# Añadir los embeddings al DataFrame
df_recipes['embedding'] = list(recipe_embeddings)
print(f"\\nEmbeddings generados. Dimensión del vector: {recipe_embeddings.shape[1]}")

\nEmbeddings generados. Dimensión del vector: 384


In [21]:
# Construir el índice FAISS para búsqueda vectorial
embedding_dim = recipe_embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)

In [22]:
# Añadir los vectores al índice
index.add(recipe_embeddings)
print(f"Índice FAISS construido. Total de recetas indexadas: {index.ntotal}")

Índice FAISS construido. Total de recetas indexadas: 102


In [None]:
# Generación de respuesta con Gemini y gestión de la conversación
import google.generativeai as genai
GEMINI_API_KEY = "API"
genai.configure(api_key=GEMINI_API_KEY)
gemini_model = genai.GenerativeModel('gemini-2.5-flash')

In [None]:

def recommend_recipes(query, top_k=5):
    """
    Busca en el índice FAISS las recetas más relevantes para una consulta
    y las formatea como contexto para el LLM.
    """
    # 1. Codificar la consulta del usuario
    query_embedding = model.encode(query, convert_to_numpy=True).reshape(1, -1)
    
    # 2. Buscar en FAISS
    distances, indices = index.search(query_embedding, top_k)
    
    # 3. Recuperar las recetas del DataFrame
    retrieved_recipes = df_recipes.iloc[indices[0]]
    
    # 4. Formatear el contexto
    context = ""
    for i, (idx, recipe) in enumerate(retrieved_recipes.iterrows()):
        context += f"--- Receta Candidata {i+1} ---\\n"
        context += f"Título: {recipe['Titulo']}\\n"
        context += f"Resumen: {recipe['resumen']}\\n"
        context += f"Ingredientes: {recipe['Ingredientes']}\\n"
        context += f"Porciones: {recipe['porciones']}\\n"
        context += f"Tiempo Total: {recipe['tiempo_coccion']}\\n"
        context += f"Valoración: {recipe['valoracion']}\\n\\n"
        context += f"Preparación: {recipe['preparacion']}\\n"
        context += f"Factor Nutricional: {recipe['Factor_nutricional']}\\n"
        
    return context

In [28]:
def generate_recommendation_with_gemini(query, context):
    """
    Genera una respuesta amigable y experta usando Gemini, basada en el contexto de recetas.
    """
    prompt = f"""
    *Rol y Objetivo:* Eres un asistente de cocina y chef experto llamado 'Chef Gemini'. Tu objetivo es analizar un conjunto de recetas que se te proporcionan como contexto y dar una recomendación útil y amigable al usuario, basándote en su consulta.

    *Instrucciones Clave:
    1.  Analiza la Consulta: Primero, entiende lo que el usuario está buscando (ej. ingredientes, tipo de comida, tiempo de preparación, etc.).
    2.  Sintetiza el Contexto: Revisa las 'Recetas Candidatas' que te he proporcionado. No te limites a repetir la información. Compáralas y contrástalas si es necesario.
    3.  Crea una Recomendación Personalizada: Responde directamente a la consulta del usuario. Explica por qué una o más de las recetas son una buena opción. Por ejemplo: "Basado en que buscas algo rápido con pollo, te recomiendo la receta 'Pollo al Limón Rápido' porque solo toma 25 minutos y usa ingredientes simples que mencionaste".
    4.  Proporciona toda la información relevante: Incluye detalles como Titulo, resumen, valoración, tiempo de cocción, porciones, Ingredientes, preparación, Factor nutricional. Sé específico y útil.
    5.  Fallback Inteligente: Si ninguna de las recetas en el contexto parece encajar bien con la consulta del usuario, indícalo amablemente. Por ejemplo: "He revisado mis recetas, pero no parece que tenga algo que se ajuste perfectamente a lo que buscas. ¿Podrías darme más detalles?". No inventes recetas.

    --- Contexto de Recetas ---
    {context}
    --- Fin del Contexto ---
    
    Consulta del Usuario: "{query}"
    
    Chef Gemini:
    """

    try:
        response = gemini_model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"Hubo un error al contactar con Gemini: {e}"

In [None]:
# --- Ciclo de conversación con el Chef Gemini
print("\n\n --- BIENVENIDO AL ASISTENTE DE COCINA 'CHEF GEMINI' ---")
print("Puedes pedirme recomendaciones como: 'una idea para una cena vegetariana' o 'algo con pescado que sea fácil'")
print("Escribe 'salir' para terminar la conversación.\n")

while True:
    user_query = input("Tú: ")
    if user_query.lower() == 'salir':
        print("\nChef Gemini: ¡Buen provecho! ¡Vuelve pronto!")
        break
    
    # 1. Recuperar contexto con RAG
    retrieved_context = recommend_recipes(user_query, top_k=4)
    
    # 2. Generar respuesta con el LLM
    final_answer = generate_recommendation_with_gemini(user_query, retrieved_context)
    
    print("\n---------------------------------------------------------")
    print(f"\nChef Gemini:\\n{final_answer}")
    print("\n---------------------------------------------------------\n")



 --- BIENVENIDO AL ASISTENTE DE COCINA 'CHEF GEMINI' ---
Puedes pedirme recomendaciones como: 'una idea para una cena vegetariana' o 'algo con pescado que sea fácil'
Escribe 'salir' para terminar la conversación.


----------------

Chef Gemini:\n¡Claro que sí! Como Chef Gemini, me encanta ayudarte a encontrar la receta perfecta. Has pedido una "pasta rápida para cocinar", y he revisado mis opciones para ti.

De las recetas que tengo, la que mejor se ajusta a tu solicitud de una pasta y con un tiempo de preparación razonable es la **Bacon White Cheddar Pesto Mac and Cheese**.

Aquí te explico por qué es una excelente opción y todos los detalles:

---

**Recomendación de Chef Gemini:**

**Título:** Bacon White Cheddar Pesto Mac and Cheese

**Resumen:** Es una variación rica y con mucho sabor a tocino del tradicional plato de macarrones con queso. Es un plato reconfortante y relativamente rápido para ser una pasta horneada.

**Valoración:** 4.4 estrellas (¡Muy buena!)

**Tiempo Total:*