In [1]:
# IMPORT LIBRARIES
import requests
from bs4 import BeautifulSoup
import json

# Function to scrape a recipe from food.com
def scrape_recipe(url):
    # Request the webpage
    response = requests.get(url)
    html_content = response.text

    # Use BeautifulSoup to parse the HTML
    soup = BeautifulSoup(html_content, 'html.parser')

    # Extract the title
    title_tag = soup.find('h1', class_='svelte-1muv3s8')
    title = title_tag.text.strip() if title_tag else 'No title found'

    # Extract the ingredients
    ingredients_list = []
    ingredients_ul = soup.find('ul', class_='ingredient-list')
    if ingredients_ul:
        for li in ingredients_ul.find_all('li', style='display: contents'):
            quantity_span = li.find('span', class_='ingredient-quantity')
            text_span = li.find('span', class_='ingredient-text')
        
            # Extract and clean up the quantity
            quantity = ''.join(quantity_span.stripped_strings) if quantity_span else ''
            quantity = quantity.replace('"/"', '/').replace("\u2044", "/")
        
            # Extract and clean up the text
            text_parts = []
            if text_span:
                for part in text_span.stripped_strings:
                    text_parts.append(part)
                text = ' '.join(text_parts).replace('"', '').strip()
            else:
                text = ''
        
            # Combine quantity and text
            combined_text = f"{quantity} {text}"
        
            # Append to the ingredients list
            ingredients_list.append(combined_text)
    
    # Extract the directions
    directions_list = []
    directions_ul = soup.find('ul', class_='direction-list')
    if directions_ul:
        for li in directions_ul.find_all('li', class_='direction'):
            directions_list.append(li.text.strip())
    
    # Extract the cooking time
    cooking_time_tag = soup.find('dd', class_='facts__value svelte-1dqq0pw')
    cooking_time = cooking_time_tag.text.strip() if cooking_time_tag else 'Unknown'
    
    # Create recipe data
    recipe_data = {
        "title": title,
        "ingredients": ingredients_list,
        "directions": directions_list,
        "cooking_time": cooking_time
    }

    return recipe_data

# Function to read URLs from a text file
def read_urls(file_path):
    with open(file_path, 'r') as file:
        urls = [line.strip() for line in file.readlines()]
    return urls

# Main function to scrape multiple recipes and save them to a single JSON file
def scrape_multiple_recipes(file_path, output_file):
    urls = read_urls(file_path)
    all_recipes = []

    for url in urls:
        recipe = scrape_recipe(url)
        all_recipes.append(recipe)

    # Save all recipes to a JSON file
    with open(output_file, 'w') as file:
        json.dump(all_recipes, file, indent=4)

# Path to the file containing URLs
urls_file_path = '/kaggle/input/receipe/receipe.txt'  # Update this to the path of your text file with URLs
output_json_file = '/kaggle/working/recipes.json'

# Scrape recipes and save to JSON
scrape_multiple_recipes(urls_file_path, output_json_file)


In [4]:
import json

def process_directions(directions):
    processed_directions = []
    for direction in directions:
        sentences = direction.split('. ')
        for sentence in sentences:
            if sentence:  # ensure it's not an empty string
                # Remove any trailing dot and strip spaces
                sentence = sentence.strip('.').strip()
                processed_directions.append(sentence)
    # Add numbering
    numbered_directions = [f"{i+1}. {dir}" for i, dir in enumerate(processed_directions)]
    return numbered_directions

def process_recipes(input_json_path, output_json_path):
    # Load JSON data
    with open(input_json_path, 'r') as file:
        recipes = json.load(file)
    
    processed_recipes = []
    # Process each recipe
    for recipe in recipes:
        processed_recipe = recipe.copy()  # Make a copy to avoid modifying the original recipe
        processed_recipe['directions'] = process_directions(recipe['directions'])
        processed_recipes.append(processed_recipe)

    # Save the modified JSON data back to a file
    with open(output_json_path, 'w') as file:
        json.dump(processed_recipes, file, indent=4)

# Paths to input and output JSON files
input_json_path = '/kaggle/input/newrecip/recipes1000.json'
output_json_path = '/kaggle/working/processed_recipes.json'

# Process the recipes
process_recipes(input_json_path, output_json_path)
