In [63]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.service import Service
from webdriver_manager.firefox import GeckoDriverManager
import pandas as pd

In [64]:
driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()))

driver.get("https://www.allrecipes.com/recipe/256100/nutella-pastry-christmas-tree/")

In [65]:
 
# Extraer título
titulo_xpath = '//h1[@class="article-heading text-headline-400"]'
titulo = driver.find_element(By.XPATH, titulo_xpath).text.strip()

# Extraer tiempos y porciones
prep_time_xpath = '//div[contains(text(),"Prep Time:")]/following-sibling::div'
cook_time_xpath = '//div[contains(text(),"Cook Time:")]/following-sibling::div'
total_time_xpath = '//div[contains(text(),"Total Time:")]/following-sibling::div'
servings_xpath = '//div[contains(text(),"Servings:")]/following-sibling::div'

prep_time = driver.find_element(By.XPATH, prep_time_xpath).text.strip()
cook_time = driver.find_element(By.XPATH, cook_time_xpath).text.strip()
total_time = driver.find_element(By.XPATH, total_time_xpath).text.strip()
servings = driver.find_element(By.XPATH, servings_xpath).text.strip()

# Extraer ingredientes
ingredients_xpath = '//ul[@class="mm-recipes-structured-ingredients__list"]/li'
ingredient_elements = driver.find_elements(By.XPATH, ingredients_xpath)

ingredientes = []
for item in ingredient_elements:
    try:
        cantidad = item.find_element(By.XPATH, './/span[@data-ingredient-quantity="true"]').text.strip()
    except:
        cantidad = ''
    
    try:
        unidad = item.find_element(By.XPATH, './/span[@data-ingredient-unit="true"]').text.strip()
    except:
        unidad = ''
    
    try:
        ingrediente = item.find_element(By.XPATH, './/span[@data-ingredient-name="true"]').text.strip()
    except:
        ingrediente = ''
    
    ingredientes.append(f'{cantidad} {unidad} {ingrediente}'.strip())

# Extraer pasos de preparación
steps_xpath = '//ol[@id="mntl-sc-block_1-0"]/li/p[not(contains(@class, "figure-article-caption"))]'
step_elements = driver.find_elements(By.XPATH, steps_xpath)

# Crear lista de pasos
pasos = []
for i, step in enumerate(step_elements, 1):
    paso_texto = step.text.strip()
    if paso_texto:  # Solo agregar si el paso tiene texto
        pasos.append(f"Paso {i}: {paso_texto}")

# Crear DataFrame
df = pd.DataFrame([{
    'titulo': titulo,
    'tiempo_preparacion': prep_time,
    'tiempo_cocina': cook_time,
    'tiempo_total': total_time,
    'porciones': servings,
    'ingredientes': '\n'.join(ingredientes),
    'pasos': '\n'.join(pasos)  # Agregar los pasos como una nueva columna
}])

print(df)

# Cerrar el navegador
driver.quit()

                               titulo tiempo_preparacion tiempo_cocina  \
0  Nutella Puff Pastry Christmas Tree            35 mins       15 mins   

  tiempo_total porciones                                       ingredientes  \
0      55 mins         8  1 (17.5 ounce) package frozen puff pastry, tha...   

                                               pasos  
0  Paso 1: Preheat the oven to 375 degrees F (190...  


In [66]:
df.to_csv('recetas.csv', index=False, encoding='utf-8-sig')
print("El archivo CSV ha sido creado exitosamente.")
print("\nContenido del DataFrame:")
print(df)

El archivo CSV ha sido creado exitosamente.

Contenido del DataFrame:
                               titulo tiempo_preparacion tiempo_cocina  \
0  Nutella Puff Pastry Christmas Tree            35 mins       15 mins   

  tiempo_total porciones                                       ingredientes  \
0      55 mins         8  1 (17.5 ounce) package frozen puff pastry, tha...   

                                               pasos  
0  Paso 1: Preheat the oven to 375 degrees F (190...  
