In [None]:
pip install requests beautifulsoup4 pandas


In [None]:
!pip install selenium


In [None]:
!pip install webdriver-manager


In [None]:
!apt-get update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin


In [None]:
import sys
sys.path.insert(0, '/usr/lib/chromium-browser/chromedriver')

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import csv


In [None]:
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

driver = webdriver.Chrome(options=options)


In [None]:
def get_recipe_links_selenium(url):
    driver.get(url)
    time.sleep(2)  # wait for page to load
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    recipe_cards = soup.select('a.card__titleLink')
    return [card['href'] for card in recipe_cards if 'href' in card.attrs]

def parse_recipe_selenium(recipe_url):
    try:
        driver.get(recipe_url)
        time.sleep(2)
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        title = soup.find('h1', class_='headline heading-content')
        ingredients = [i.get_text(strip=True) for i in soup.select('span.ingredients-item-name')]
        directions = [d.get_text(strip=True) for d in soup.select('li.subcontainer.instructions-section-item p')]

        return {
            'title': title.text.strip() if title else 'No title',
            'ingredients': ingredients,
            'directions': directions,
            'url': recipe_url
        }
    except Exception as e:
        print(f"Error: {e}")
        return None


In [None]:
def scrape_allrecipes_selenium(pages=1):
    base_url = "https://www.allrecipes.com/recipes/201/meat-and-poultry/chicken/?page="
    all_recipes = []
    for page in range(1, pages+1):
        print(f"Scraping page {page}")
        links = get_recipe_links_selenium(base_url + str(page))
        print(f"Found {len(links)} recipes")

        for link in links:
            recipe = parse_recipe_selenium(link)
            if recipe:
                all_recipes.append(recipe)
            time.sleep(1)
    return all_recipes

def save_to_csv(recipes, filename='chicken_recipes.csv'):
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['title', 'ingredients', 'directions', 'url'])
        writer.writeheader()
        for r in recipes:
            writer.writerow({
                'title': r['title'],
                'ingredients': '; '.join(r['ingredients']),
                'directions': ' '.join(r['directions']),
                'url': r['url']
            })


In [None]:
def scrape_allrecipes_selenium(pages=1):
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import TimeoutException
    from selenium.webdriver.chrome.service import Service
    from webdriver_manager.chrome import ChromeDriverManager
    import time

    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    base_url = "https://www.allrecipes.com/recipes/201/meat-and-poultry/chicken/?page="
    all_recipes = []

    for page in range(1, pages + 1):
        print(f"Scraping page {page}")
        driver.get(base_url + str(page))
        time.sleep(3)  # wait for JS to load

        # Close cookie popup if present
        try:
            consent_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
            )
            consent_button.click()
        except:
            pass

        recipe_links = driver.find_elements(By.CSS_SELECTOR, "a.comp.card__titleLink")

        urls = [link.get_attribute("href") for link in recipe_links if link.get_attribute("href")]
        print(f"Found {len(urls)} recipes")

        for url in urls:
            try:
                driver.get(url)
                time.sleep(2)

                title = driver.find_element(By.CSS_SELECTOR, "h1").text.strip()

                ingredients_elements = driver.find_elements(By.CSS_SELECTOR, "span.ingredients-item-name")
                ingredients = [elem.text.strip() for elem in ingredients_elements]

                directions_elements = driver.find_elements(By.CSS_SELECTOR, "li.subcontainer.instructions-section-item > div.section-body")
                directions = [elem.text.strip() for elem in directions_elements]

                all_recipes.append({
                    "title": title,
                    "ingredients": "; ".join(ingredients),
                    "directions": ". ".join(directions),
                    "url": url
                })

            except Exception as e:
                print(f"Failed to scrape {url}: {e}")
                continue

    driver.quit()
    return all_recipes


In [None]:
def save_to_csv(data, filename='chicken_recipes.csv'):
    import pandas as pd
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)


In [None]:
import requests
from bs4 import BeautifulSoup
import time
import csv

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/114.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "Referer": "https://www.google.com/",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
}

BASE_URL = "https://www.allrecipes.com/recipes/201/meat-and-poultry/chicken/?page={}"

def scrape_recipes(num_pages=20):
    session = requests.Session()
    session.headers.update(HEADERS)

    recipes = []
    for page in range(1, num_pages + 1):
        print(f"Scraping page {page}...")
        url = BASE_URL.format(page)
        try:
            response = session.get(url)
            response.raise_for_status()
        except requests.RequestException as e:
            print(f"Error fetching page {page}: {e}")
            continue

        soup = BeautifulSoup(response.text, "html.parser")

        cards = soup.select("article.fixed-recipe-card") or soup.select("div.card__detailsContainer")

        print(f"Found {len(cards)} recipes on page {page}")

        for card in cards:
            title_tag = card.select_one("h3.fixed-recipe-card__h3") or card.select_one("h3.card__title")
            link_tag = card.select_one("a.fixed-recipe-card__title-link") or card.select_one("a.card__titleLink")

            if title_tag and link_tag:
                title = title_tag.get_text(strip=True)
                link = link_tag.get("href")
                recipes.append({"title": title, "url": link})

        time.sleep(3)  # longer delay to avoid blocking

    return recipes

def save_to_csv(recipes, filename="chicken_recipes.csv"):
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["title", "url"])
        writer.writeheader()
        writer.writerows(recipes)
    print(f"Saved {len(recipes)} recipes to {filename}")

if __name__ == "__main__":
    recipes = scrape_recipes(num_pages=20)
    save_to_csv(recipes)


In [None]:
pip install python-allrecipes==0.3.1


In [None]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_tasty_recipes(pages=1, output_file="tasty_recipes.csv"):
    base_url = "https://tasty.co/topic/chicken"
    headers = {
        "User-Agent": "Mozilla/5.0"
    }

    recipe_data = []

    print("Scraping Tasty.co...")

    response = requests.get(base_url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to load page: {base_url}")
        return

    soup = BeautifulSoup(response.content, 'html.parser')
    links = soup.select('a.feed-item analyt-unit-tap')[:10]  # Top 10 recipes on the page

    for link in links:
        recipe_url = "https://tasty.co" + link['href']
        print(f"Fetching: {recipe_url}")
        recipe_resp = requests.get(recipe_url, headers=headers)
        if recipe_resp.status_code != 200:
            print(f"Failed to fetch: {recipe_url}")
            continue

        recipe_soup = BeautifulSoup(recipe_resp.content, 'html.parser')

        title_tag = recipe_soup.find('h1', class_='recipe-name')
        ingredients = recipe_soup.select('li.ingredient')
        steps = recipe_soup.select('li.preparation-step')

        title = title_tag.text.strip() if title_tag else "N/A"
        ingredient_list = [i.text.strip() for i in ingredients]
        instruction_list = [s.text.strip() for s in steps]

        recipe_data.append({
            'title': title,
            'ingredients': " | ".join(ingredient_list),
            'instructions': " | ".join(instruction_list)
        })

    # Save to CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=["title", "ingredients", "instructions"])
        writer.writeheader()
        for recipe in recipe_data:
            writer.writerow(recipe)

    print(f"\nSaved {len(recipe_data)} recipes to {output_file}")

# Run the scraper
scrape_tasty_recipes()


In [None]:
import requests
import csv

def get_mealdb_all_recipes(max_limit=10000, output_csv="mealdb_all_recipes.csv"):
    recipes = []
    letters = [chr(i) for i in range(ord('a'), ord('z')+1)]

    for letter in letters:
        url = f"https://www.themealdb.com/api/json/v1/1/search.php?f={letter}"
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to fetch letter '{letter}'")
            continue

        data = response.json()
        meals = data.get("meals")
        if not meals:
            continue

        for meal in meals:
            title = meal.get("strMeal")
            instructions = meal.get("strInstructions")

            ingredients = []
            for i in range(1, 21):
                ingredient = meal.get(f"strIngredient{i}")
                measure = meal.get(f"strMeasure{i}")
                if ingredient and ingredient.strip():
                    ingredients.append(f"{measure.strip()} {ingredient.strip()}")

            recipes.append({
                "title": title,
                "ingredients": " | ".join(ingredients),
                "instructions": instructions
            })

            if len(recipes) >= max_limit:
                break
        if len(recipes) >= max_limit:
            break

    with open(output_csv, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["title", "ingredients", "instructions"])
        writer.writeheader()
        writer.writerows(recipes)

    print(f"Saved {len(recipes)} recipes to '{output_csv}'")

# Run to collect up to 10,000 recipes (or fewer if not available)
get_mealdb_all_recipes(max_limit=10000)
