In [55]:
import requests
from bs4 import BeautifulSoup
import csv
import time  

def get_recipe_links(main_url):
    response = requests.get(main_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    recipe_links = []
    for link in soup.find_all('a', href=True):
        if '/recipes/' in link['href']:
            absolute_url = f"https://www.gordonramsay.com{link['href']}"
            recipe_links.append(absolute_url)
    
    return list(set(recipe_links)) 

def scrape_recipe(url):
    print(f"Scraping URL: {url}") 
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    ingredients = []
    ingredients_section = soup.find('ul') 
    if ingredients_section:
        for item in ingredients_section.find_all('li'):
            ingredients.append(item.get_text(strip=True))

    instructions = []
    instructions_section = soup.find('ol')  
    if instructions_section:
        for item in instructions_section.find_all('li'):
            instructions.append(item.get_text(strip=True))

    return {
        'url': url,
        'ingredients': ingredients,
        'instructions': instructions,
    }


if __name__ == "__main__":
    main_url = 'https://www.gordonramsay.com/gr/recipes/'
    recipe_links = get_recipe_links(main_url)

    recipes_data = []
    for recipe_link in recipe_links:
        try:
            recipe_data = scrape_recipe(recipe_link)
            recipes_data.append(recipe_data)
            time.sleep(1) 
        except requests.exceptions.RequestException as e:
            print(f"Error while scraping {recipe_link}: {e}")

    
    with open('GRrecipes.csv', 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['url', 'ingredients', 'instructions']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for recipe in recipes_data:
            writer.writerow({
                'url': recipe['url'],
                'ingredients': ', '.join(recipe['ingredients']),
                'instructions': ' '.join(recipe['instructions']),
            })

    print("Recipes scraped and saved to recipes.csv")


Scraping URL: https://www.gordonramsay.com/gr/recipes/category/chicken
Scraping URL: https://www.gordonramsay.com/gr/recipes/category/game
Scraping URL: https://www.gordonramsay.com/gr/recipes/category/seafood
Scraping URL: https://www.gordonramsay.com/gr/recipes/roastedrootvegetables/
Scraping URL: https://www.gordonramsay.comhttps://www.gordonramsay.com/gr/recipes/category/fit-food
Error while scraping https://www.gordonramsay.comhttps://www.gordonramsay.com/gr/recipes/category/fit-food: HTTPSConnectionPool(host='www.gordonramsay.comhttps', port=443): Max retries exceeded with url: //www.gordonramsay.com/gr/recipes/category/fit-food (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000173A4FF62E0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Scraping URL: https://www.gordonramsay.com/gr/recipes/category/fit-food
Scraping URL: https://www.gordonramsay.com/gr/recipes/category/store-cupboard-staples
Scraping URL: https://www.g

In [58]:
import requests
from bs4 import BeautifulSoup

def get_recipe_links(main_url):
    response = requests.get(main_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    GRrecipe_links = []
    for link in soup.find_all('a', href=True):
        if '/recipes/' in link['href']:
            # Construct absolute URL
            absolute_url = f"https://www.gordonramsay.com{link['href']}"
            GRrecipe_links.append(absolute_url)
    
    return list(set(GRrecipe_links))  # Return unique links

if __name__ == "__main__":
    main_url = 'https://www.gordonramsay.com/gr/recipes/'
    GRrecipe_links = get_recipe_links(main_url)

    for url in recipe_links:
        print(url)
    
    with open('GRrecipe_links.txt', 'w', encoding='utf-8') as f:
        for url in recipe_links:
            f.write(url + '\n')

    print(f"Total recipe links collected: {len(GRrecipe_links)}")


https://www.gordonramsay.com/gr/recipes/category/chicken
https://www.gordonramsay.com/gr/recipes/category/game
https://www.gordonramsay.com/gr/recipes/category/seafood
https://www.gordonramsay.com/gr/recipes/roastedrootvegetables/
https://www.gordonramsay.comhttps://www.gordonramsay.com/gr/recipes/category/fit-food
https://www.gordonramsay.com/gr/recipes/category/fit-food
https://www.gordonramsay.com/gr/recipes/category/store-cupboard-staples
https://www.gordonramsay.com/gr/recipes/category/summer
https://www.gordonramsay.com/gr/recipes/category/comfort-food
https://www.gordonramsay.com/gr/recipes/category/pancake-day
https://www.gordonramsay.com/gr/recipes/category/vegan
https://www.gordonramsay.com/gr/recipes/
https://www.gordonramsay.com/gr/recipes/category/beef
https://www.gordonramsay.com/gr/recipes/chickenthighswithbacongravy/
https://www.gordonramsay.com/gr/recipes/prsteaksandwich/
https://www.gordonramsay.com/gr/recipes/category/bbq
https://www.gordonramsay.com/gr/recipes/categ

In [63]:
import requests
from bs4 import BeautifulSoup

def get_recipe_links(base_url):
    recipe_links = []
    start = 0  # Starting index for pagination
    max_iterations = 100  # Prevent infinite loops by limiting iterations
    iterations = 0

    while True:
        if iterations >= max_iterations:
            print("Reached maximum iterations. Exiting to prevent infinite loop.")
            break

        current_url = f"{base_url}?start={start}"
        try:
            response = requests.get(current_url)
            response.raise_for_status()  
            soup = BeautifulSoup(response.text, 'html.parser')
        except requests.exceptions.RequestException as e:
            print(f"Error fetching {current_url}: {e}")
            break  # Exit if there was an error

        found_links = False
        for link in soup.find_all('a', href=True):
            if '/recipes/' in link['href']:
                absolute_url = f"https://www.gordonramsay.com{link['href']}"
                if absolute_url not in recipe_links:  #Avoid duplicates
                    recipe_links.append(absolute_url)
                    found_links = True

        print(f"Fetched {len(recipe_links)} links from {current_url}")

        #Increment start for the next load
        start += 16  #16 recipes per load
        iterations += 1  

        if not found_links:
            print("No new links found. Exiting.")
            break

    return recipe_links  


if __name__ == "__main__":
    base_url = 'https://www.gordonramsay.com/gr/recipes/'
    recipe_links = get_recipe_links(base_url)

    for url in recipe_links:
        print(url)

    with open('recipe_links.txt', 'w', encoding='utf-8') as f:
        for url in recipe_links:
            f.write(url + '\n')

    print(f"Total recipe links collected: {len(recipe_links)}")


Fetched 55 links from https://www.gordonramsay.com/gr/recipes/?start=0
Fetched 73 links from https://www.gordonramsay.com/gr/recipes/?start=16
Fetched 91 links from https://www.gordonramsay.com/gr/recipes/?start=32
Fetched 109 links from https://www.gordonramsay.com/gr/recipes/?start=48
Fetched 127 links from https://www.gordonramsay.com/gr/recipes/?start=64
Fetched 145 links from https://www.gordonramsay.com/gr/recipes/?start=80
Fetched 163 links from https://www.gordonramsay.com/gr/recipes/?start=96
Fetched 181 links from https://www.gordonramsay.com/gr/recipes/?start=112
Fetched 199 links from https://www.gordonramsay.com/gr/recipes/?start=128
Fetched 217 links from https://www.gordonramsay.com/gr/recipes/?start=144
Fetched 234 links from https://www.gordonramsay.com/gr/recipes/?start=160
Fetched 235 links from https://www.gordonramsay.com/gr/recipes/?start=176
Fetched 236 links from https://www.gordonramsay.com/gr/recipes/?start=192
Fetched 237 links from https://www.gordonramsay.c

In [78]:
import requests
from bs4 import BeautifulSoup

def scrape_recipe(url):
    try:
        # Simulate a browser request with headers LOOK INTO IT MORE 
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
        }
L
        response = requests.get(url, headers=headers)
        response.raise_for_status()  
        soup = BeautifulSoup(response.content, 'html.parser')

        title_tag = soup.find('div', class_='hero-title-recipe')
        title = title_tag.h2.text.strip() if title_tag else "No title found"

        ingredients_list = []
        ingredients_section = soup.find('ul', class_='recipe-ingredients')
        if not ingredients_section:
            ingredients_section = soup.find('ul', class_='recipe-division')
        if ingredients_section:
            ingredients = ingredients_section.find_all('li')
            for li in ingredients:
                ingredients_list.append(li.text.strip())

        instructions_list = []
        instructions_section = soup.find('ol', class_='recipe-steps')
        if not instructions_section:
            instructions_section = soup.find('ol', class_='recipe-division')  
        if instructions_section:
            instructions = instructions_section.find_all('li')
            for li in instructions:
                instructions_list.append(li.text.strip())

        return {
            'title': title,
            'ingredients': ingredients_list,
            'instructions': instructions_list
        }
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return None

urls = [
    "https://www.gordonramsay.com/gr/recipes/pork-stuffed-with-manchego-and-membrillo/",
"https://www.gordonramsay.com/gr/recipes/pork-stuffed-with-manchego-and-membrillo/",
"https://www.gordonramsay.com/gr/recipes/category/pork",
"https://www.gordonramsay.com/gr/recipes/roast-beef-with-caramelised-onion-gravy/",
"https://www.gordonramsay.com/gr/recipes/category/beef",
"https://www.gordonramsay.com/gr/recipes/chickenthighswithbacongravy/",
"https://www.gordonramsay.com/gr/recipes/category/chicken",
"https://www.gordonramsay.com/gr/recipes/category/thanksgiving",
"https://www.gordonramsay.com/gr/recipes/category/christmas",
"https://www.gordonramsay.com/gr/recipes/stuffed-lamb-with-spinach-and-pine-nuts/",
"https://www.gordonramsay.com/gr/recipes/category/lamb",
"https://www.gordonramsay.com/gr/recipes/pan-seared-scallops-with-butternut-squash-puree-and-pomegranate-quince-slaw/",
"https://www.gordonramsay.com/gr/recipes/category/seafood",
"https://www.gordonramsay.com/gr/recipes/roastedrootvegetables/",
"https://www.gordonramsay.com/gr/recipes/category/vegetarian",
"https://www.gordonramsay.com/gr/recipes/miso-maple-braised-leeks/",
"https://www.gordonramsay.com/gr/recipes/everythingdip/",
"https://www.gordonramsay.com/gr/recipes/category/summer",
"https://www.gordonramsay.com/gr/recipes/category/autumn",
"https://www.gordonramsay.com/gr/recipes/category/comfort-food",
"https://www.gordonramsay.com/gr/recipes/crispbutterleafsalad/",
"https://www.gordonramsay.com/gr/recipes/category/salads",
"https://www.gordonramsay.com/gr/recipes/pannacottawithpomegranateglaze/",
"https://www.gordonramsay.com/gr/recipes/category/chocolate",
"https://www.gordonramsay.com/gr/recipes/prsteaksandwich/",
"https://www.gordonramsay.com/gr/recipes/crawfishomelette/",
"https://www.gordonramsay.com/gr/recipes/category/egg",
"https://www.gordonramsay.com/gr/recipes/roast-turkey-with-lemon-parsley-and-garlic/",
"https://www.gordonramsay.com/gr/recipes/mexicantlayuda/",
"https://www.gordonramsay.com/gr/recipes/sheetpanfocacciapizza/",
"https://www.gordonramsay.com/gr/recipes/appleberryjamfrenchtoast/",
"https://www.gordonramsay.com/gr/recipes/category/dessert",
"https://www.gordonramsay.com/gr/recipes/category/breakfast",
"https://www.gordonramsay.com/gr/recipes/category/2020-holiday",
"https://www.gordonramsay.com/gr/recipes/category/bbq",
"https://www.gordonramsay.com/gr/recipes/category/drinks",
"https://www.gordonramsay.com/gr/recipes/category/duck",
"https://www.gordonramsay.com/gr/recipes/category/easter",
"https://www.gordonramsay.com/gr/recipes/category/fish",
"https://www.gordonramsay.com/gr/recipes/category/fit-food",
"https://www.gordonramsay.com/gr/recipes/category/game",
"https://www.gordonramsay.com/gr/recipes/category/kids",
"https://www.gordonramsay.com/gr/recipes/category/pancake-day",
"https://www.gordonramsay.com/gr/recipes/category/pasta",
"https://www.gordonramsay.com/gr/recipes/category/pie",
"https://www.gordonramsay.com/gr/recipes/category/store-cupboard-staples",
"https://www.gordonramsay.com/gr/recipes/category/valentines-day",
"https://www.gordonramsay.com/gr/recipes/category/vegan",
"https://www.gordonramsay.com/gr/recipes/category/wellington",
"https://www.gordonramsay.com/gr/recipes/reindeerburger/",
"https://www.gordonramsay.com/gr/recipes/pimento-dip/",
"https://www.gordonramsay.com/gr/recipes/texassteaktacos/",
"https://www.gordonramsay.com/gr/recipes/portuguesebifana/",
"https://www.gordonramsay.com/gr/recipes/lobsterblt/",
"https://www.gordonramsay.com/gr/recipes/mushroomtoast/",
"https://www.gordonramsay.com/gr/recipes/rootvegetablehashbrown/",
"https://www.gordonramsay.com/gr/recipes/veganblt/",
"https://www.gordonramsay.com/gr/recipes/smokedlaketroutmelt/",
"https://www.gordonramsay.com/gr/recipes/oaxacanchilaquiles/",
"https://www.gordonramsay.com/gr/recipes/picklebrinedfriedchicken/",
"https://www.gordonramsay.com/gr/recipes/brietoastwithberries/",
"https://www.gordonramsay.com/gr/recipes/4thofjulytrifle/",
"https://www.gordonramsay.com/gr/recipes/smoky-mountain-breakfast-from-scrambled/",
"https://www.gordonramsay.com/gr/recipes/puertoricanpegao/",
"https://www.gordonramsay.com/gr/recipes/croatiatruffleeggs/",
"https://www.gordonramsay.com/gr/recipes/lobsteromelettefromscrambled/",
"https://www.gordonramsay.com/gr/recipes/pistachiorhubarblayercake/",
"https://www.gordonramsay.com/gr/recipes/portugueseporkandeggs/",
"https://www.gordonramsay.com/gr/recipes/texassteakandeggs/",
"https://www.gordonramsay.com/gr/recipes/oniontatinburger/",
"https://www.gordonramsay.com/gr/recipes/stickyhoisinporkribs/",
"https://www.gordonramsay.com/gr/recipes/avocadotoastwithchorizo/",
"https://www.gordonramsay.com/gr/recipes/baconjamtoast/",
"https://www.gordonramsay.com/gr/recipes/beetcappelletti/",
"https://www.gordonramsay.com/gr/recipes/ricottagnudi/",
"https://www.gordonramsay.com/gr/recipes/ndujachicken/",
"https://www.gordonramsay.com/gr/recipes/honeycombcake/",
"https://www.gordonramsay.com/gr/recipes/easterhotcrossbuns/",
"https://www.gordonramsay.com/gr/recipes/lambshoulderwithspringvegetables/",
"https://www.gordonramsay.com/gr/recipes/vegansteakdinner/",
"https://www.gordonramsay.com/gr/recipes/sesamecrustedshrimp/",
"https://www.gordonramsay.com/gr/recipes/marinatedbeetsalad/",
"https://www.gordonramsay.com/gr/recipes/tortillasoup/",
"https://www.gordonramsay.com/gr/recipes/kingcakestackwithmapleglaze/",
"https://www.gordonramsay.com/gr/recipes/pavlovawithraspberrycurd/",
"https://www.gordonramsay.com/gr/recipes/porkdumplings/",
"https://www.gordonramsay.com/gr/recipes/bakedchickenwings/",
"https://www.gordonramsay.com/gr/recipes/candiedchickpeasnackmix/",
"https://www.gordonramsay.com/gr/recipes/frenchlentilswithlabneh/",
"https://www.gordonramsay.com/gr/recipes/harissatoast/",
"https://www.gordonramsay.com/gr/recipes/barleyrisotto/",
"https://www.gordonramsay.com/gr/recipes/buttermilkbiscuit/",
"https://www.gordonramsay.com/gr/recipes/mojochickenquinoabowl/",
"https://www.gordonramsay.com/gr/recipes/gingerbreadbundtcake/",
"https://www.gordonramsay.com/gr/recipes/holidaystuffing/",
"https://www.gordonramsay.com/gr/recipes/stickytoffee-pudding/",
"https://www.gordonramsay.com/gr/recipes/cacioepepepotatoes/",
"https://www.gordonramsay.com/gr/recipes/turkey-wellington/",
"https://www.gordonramsay.com/gr/recipes/turkey-breast-and-gravy/",
"https://www.gordonramsay.com/gr/recipes/potato-and-butternut-squash-gratin"

]

# Loop through URLs
recipes = []
for url in urls:
    recipe_data = scrape_recipe(url)
    if recipe_data:
        recipes.append(recipe_data)

for recipe in recipes:
    print(f"Title: {recipe['title']}")
    print("Ingredients:")
    for ingredient in recipe['ingredients']:
        print(f"- {ingredient}")
    print("Instructions:")
    for instruction in recipe['instructions']:
        print(f"1. {instruction}")
    print()


Error scraping https://www.gordonramsay.com/gr/recipes/potato-and-butternut-squash-gratin: 404 Client Error: Not Found for url: https://www.gordonramsay.com/gr/recipes/potato-and-butternut-squash-gratin
Title: Stuffed Pork Tenderloin Recipe
Ingredients:
- 1kg pork loin roasting joint, skin scored
- 175g Manchego cheese, finely sliced
- 150g membrillo
- 2 sage sprigs
- Olive oil, for drizzling
- 1 head of garlic, cut in half horizontally
- 1 thyme sprig
- 200ml medium-dry sherry
- Sea salt and freshly ground pepper
Instructions:
1. Preheat the oven to 220°C/Gas 7.
1. Lay the joint, skin side down, on a chopping board and cut three-quarters of the way into the flesh lengthways from top to bottom. Open the meat out to form a long rectangle and season with salt and pepper.
1. Arrange the slices of cheese and the membrillo along the centre of the meat. Scatter the leaves from 1 sage sprig on top, and roll the meat up to enclose the stuffing. Tie at 3cm intervals along the length of the meat

In [79]:
import csv
csv_file_path = 'gordon_ramsay_recipes1.csv'

csv_header = ['Title', 'Ingredients', 'Instructions']

with open(csv_file_path, mode='w', newline='', encoding='utf-8') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=csv_header)
    writer.writeheader()
    
    for recipe in recipes:
        writer.writerow({
            'Title': recipe['title'],
            'Ingredients': ', '.join(recipe['ingredients']),
            'Instructions': '. '.join(recipe['instructions'])
        })

print(f"Recipes saved to {csv_file_path}")

Recipes saved to gordon_ramsay_recipes1.csv
