In [None]:
!pip install selenium pandas

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Setup headless browser
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

# Initialize driver
driver = webdriver.Chrome(options=chrome_options)

# Keyword to search
search_term = "chicken"
search_url = f"https://www.allrecipes.com/search?q=chicken"

# Load search page
driver.get(search_url)
time.sleep(5)  # wait for JS to load

# Get recipe links
recipe_links = []
elements = driver.find_elements(By.CSS_SELECTOR, "a.card__titleLink")
for elem in elements:
    href = elem.get_attribute("href")
    if href and "/recipe/" in href:
        recipe_links.append(href)

recipe_links = list(set(recipe_links))  # unique

print(f"🔗 Found {len(recipe_links)} recipes. Scraping each...")

# Scrape recipe content
recipes_data = []
for i, url in enumerate(recipe_links[:1000]):
    try:
        print(f"➡️  Scraping {i+1}: {url}")
        driver.get(url)
        time.sleep(3)

        title = driver.find_element(By.TAG_NAME, "h1").text.strip()

        ingredients = [
            ing.text.strip()
            for ing in driver.find_elements(By.CSS_SELECTOR, "span.ingredients-item-name")
        ]

        instructions = [
            step.text.strip()
            for step in driver.find_elements(By.CSS_SELECTOR, "li.subcontainer.instructions-section-item p")
        ]

        recipes_data.append({
            "Title": title,
            "Ingredients": ingredients,
            "Instructions": instructions,
            "URL": url
        })

    except Exception as e:
        print(f"❌ Failed at {url}: {e}")

# Save to CSV
df = pd.DataFrame(recipes_data)
df.to_csv(f"chicken_recipes.csv", index=False)

driver.quit()
print(f"✅ Done. Saved recipes to chicken_recipes.csv")
