In [2]:
import requests
from bs4 import BeautifulSoup
import json

# Example recipe URL
url = 'https://www.allrecipes.com/recipe/24074/alysias-basic-meat-lasagna/'

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

def scrape_recipe(url):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the title element and safely get the text
    title_element = soup.find('h1', class_='headline heading-content')
    # Check if title_element was found before calling get_text()
    title = title_element.get_text(strip=True) if title_element else "Title not found"

    # Ingredients
    ingredients = [ing.get_text(strip=True) for ing in soup.select('span.ingredients-item-name')]

    # Instructions
    instructions = [step.get_text(strip=True) for step in soup.select('li.subcontainer.instructions-section-item')]

    # Nutrition Info (if present)
    nutrition_section = soup.find('div', class_='partial recipe-nutrition-section')
    nutrition = nutrition_section.get_text(strip=True) if nutrition_section else "Not available"

    return {
        'title': title,
        'ingredients': ingredients,
        'instructions': instructions,
        'nutrition': nutrition
    }

recipe_data = scrape_recipe(url)
print(json.dumps(recipe_data, indent=2))

{
  "title": "Title not found",
  "ingredients": [],
  "instructions": [],
  "nutrition": "Not available"
}


In [3]:
import requests
from bs4 import BeautifulSoup
import json

def scrape_allrecipes_recipe(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
    }

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve page: Status code {response.status_code}")
        return None

    soup = BeautifulSoup(response.content, 'html.parser')

    # Recipe Title
    title_tag = soup.find('h1', class_='headline heading-content')
    title = title_tag.get_text(strip=True) if title_tag else 'N/A'

    # Ingredients
    ingredients = []
    ingredient_tags = soup.select('span.ingredients-item-name')
    for tag in ingredient_tags:
        ingredients.append(tag.get_text(strip=True))

    # Instructions
    instructions = []
    instruction_tags = soup.select('li.subcontainer.instructions-section-item div.section-body')
    for tag in instruction_tags:
        instructions.append(tag.get_text(strip=True))

    # Nutrition
    nutrition_tag = soup.find('div', class_='partial recipe-nutrition-section')
    nutrition = nutrition_tag.get_text(strip=True) if nutrition_tag else 'N/A'

    # Result Dictionary
    recipe = {
        'title': title,
        'ingredients': ingredients,
        'instructions': instructions,
        'nutrition': nutrition
    }

    return recipe

# Test with a sample AllRecipes URL
recipe_url = 'https://www.allrecipes.com/recipe/24074/alysias-basic-meat-lasagna/'
data = scrape_allrecipes_recipe(recipe_url)

# Output as JSON
print(json.dumps(data, indent=2))


{
  "title": "N/A",
  "ingredients": [],
  "instructions": [],
  "nutrition": "N/A"
}


In [4]:
pip install requests beautifulsoup4




In [5]:
import requests
from bs4 import BeautifulSoup
import json
import time

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

def get_recipe_links(search_term, max_results=5):
    search_url = f'https://www.allrecipes.com/search/results/?search={search_term}'
    response = requests.get(search_url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    recipe_links = []
    for link_tag in soup.select('a.card__titleLink'):
        href = link_tag.get('href')
        if href and href.startswith('https://www.allrecipes.com/recipe/'):
            recipe_links.append(href)
        if len(recipe_links) >= max_results:
            break
    return recipe_links

def scrape_recipe(url):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    title_tag = soup.find('h1', class_='headline heading-content')
    title = title_tag.get_text(strip=True) if title_tag else 'N/A'

    ingredients = [tag.get_text(strip=True) for tag in soup.select('span.ingredients-item-name')]

    instructions = [tag.get_text(strip=True)
                    for tag in soup.select('li.subcontainer.instructions-section-item div.section-body')]

    nutrition_tag = soup.find('div', class_='partial recipe-nutrition-section')
    nutrition = nutrition_tag.get_text(strip=True) if nutrition_tag else 'N/A'

    return {
        'title': title,
        'url': url,
        'ingredients': ingredients,
        'instructions': instructions,
        'nutrition': nutrition
    }

def main():
    search_term = "chicken"
    recipe_urls = get_recipe_links(search_term, max_results=5)

    all_recipes = []
    for url in recipe_urls:
        print(f"Scraping: {url}")
        recipe_data = scrape_recipe(url)
        all_recipes.append(recipe_data)
        time.sleep(2)  # be polite with a delay

    # Save or print result
    print(json.dumps(all_recipes, indent=2))

if __name__ == '__main__':
    main()


[]


In [6]:
import requests
from bs4 import BeautifulSoup
import json
import time
import pandas as pd

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

def get_recipe_links(search_term, max_results=5):
    search_url = f'https://www.allrecipes.com/search/results/?search={search_term}'
    response = requests.get(search_url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    recipe_links = []
    for link_tag in soup.select('a.card__titleLink'):
        href = link_tag.get('href')
        if href and href.startswith('https://www.allrecipes.com/recipe/'):
            recipe_links.append(href)
        if len(recipe_links) >= max_results:
            break
    return recipe_links

def scrape_recipe(url):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    title_tag = soup.find('h1', class_='headline heading-content')
    title = title_tag.get_text(strip=True) if title_tag else 'N/A'

    ingredients = [tag.get_text(strip=True) for tag in soup.select('span.ingredients-item-name')]
    instructions = [tag.get_text(strip=True)
                    for tag in soup.select('li.subcontainer.instructions-section-item div.section-body')]

    nutrition_tag = soup.find('div', class_='partial recipe-nutrition-section')
    nutrition = nutrition_tag.get_text(strip=True) if nutrition_tag else 'N/A'

    return {
        'title': title,
        'url': url,
        'ingredients': '; '.join(ingredients),
        'instructions': ' '.join(instructions),
        'nutrition': nutrition
    }

def main():
    search_term = "chicken"
    recipe_urls = get_recipe_links(search_term, max_results=5)

    all_recipes = []
    for url in recipe_urls:
        print(f"Scraping: {url}")
        recipe_data = scrape_recipe(url)
        all_recipes.append(recipe_data)
        time.sleep(2)  # Be polite

    # Convert to DataFrame
    df = pd.DataFrame(all_recipes)

    # Save to CSV and Excel
    df.to_csv('chicken_recipes.csv', index=False)
    df.to_excel('chicken_recipes.xlsx', index=False)

    print("✅ Data saved to 'chicken_recipes.csv' and 'chicken_recipes.xlsx'.")

if __name__ == '__main__':
    main()


✅ Data saved to 'chicken_recipes.csv' and 'chicken_recipes.xlsx'.
