In [5]:
import requests
from bs4 import BeautifulSoup

def get_recipe_links(base_url):
    recipe_links = []
    page = 1  # Starting page number
    max_pages = 50  # Prevent infinite loops by limiting the number of pages
    iterations = 0

    while True:
        if iterations >= max_pages:
            print("Reached maximum pages. Exiting to prevent infinite loop.")
            break

        current_url = f"{base_url}?page={page}"
        try:
            response = requests.get(current_url)
            response.raise_for_status()  
            soup = BeautifulSoup(response.text, 'html.parser')
        except requests.exceptions.RequestException as e:
            print(f"Error fetching {current_url}: {e}")
            break  # Exit if there was an error

        found_links = False
        # Find all recipe links
        for link in soup.find_all('a', href=True):
            if '/recipes/' in link['href']:
                absolute_url = f"https://www.hellofresh.co.uk{link['href']}"
                if absolute_url not in recipe_links:  # Avoid duplicates
                    recipe_links.append(absolute_url)
                    found_links = True

        print(f"Fetched {len(recipe_links)} links from {current_url}")

        # Increment page for the next load
        page += 1  
        iterations += 1  

        # Check if we found any new links
        if not found_links:
            print("No new links found. Exiting.")
            break

    return recipe_links  

if __name__ == "__main__":
    base_url = 'https://www.hellofresh.co.uk/recipes/most-popular-recipes'
    recipe_links = get_recipe_links(base_url)

    for url in recipe_links:
        print(url)

    # Save the links to a file
    with open('recipe_links.txt', 'w', encoding='utf-8') as f:
        for url in recipe_links:
            f.write(url + '\n')

    print(f"Total recipe links collected: {len(recipe_links)}")


Fetched 76 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=1
Fetched 77 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=2
Fetched 78 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=3
Fetched 79 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=4
Fetched 80 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=5
Fetched 81 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=6
Fetched 82 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=7
Fetched 83 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=8
Fetched 84 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=9
Fetched 85 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=10
Fetched 86 links from https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=11
Fetched 87 links from https://www.hellofr

In [1]:
import requests
from bs4 import BeautifulSoup
 
 
url = 'https://www.hellofresh.co.uk/recipes/most-popular-recipes?page=1'
reqs = requests.get(url)
soup = BeautifulSoup(reqs.text, 'html.parser')
 
urls = []
for link in soup.find_all('a'):
    print(link.get('href'))

/recipes/most-popular-recipes?page=1#
/plans?page=1
/gift?page=1
/login?page=1
/recipes/most-popular-recipes?page=1#
/
/recipes
/recipes/british-recipes
/recipes/asian-recipes
/recipes/french-recipes
/recipes/thai-recipes
/recipes/italian-recipes
/recipes/japanese-recipes
/recipes/mexican-recipes
/recipes/chinese-recipes
/recipes/greek-recipes
/recipes/african-recipes
/plans?c=C9-A3M15&discount_comm_id=7432d65e-367e-40e4-9656-67af4ccbf27c
https://www.hellofresh.co.uk/recipes/thai-style-pulled-chicken-bao-666853e1c42deafdb5a3470e
https://www.hellofresh.co.uk/recipes/yellow-thai-style-chicken-noodles-666ae656c2b46cc6d5537630
https://www.hellofresh.co.uk/recipes/spiced-tomato-veggie-mince-ragu-666ae654c2b46cc6d553762a
https://www.hellofresh.co.uk/recipes/sweet-and-sticky-chicken-udon-noodles-666853fb83f11ad6c3c639bc
/recipes/middle-eastern-recipes
/recipes/mediterranean-recipes
/recipes/american-recipes
/recipes/korean-recipes
/recipes/indian-recipes
/recipes/turkish-recipes
/recipes/germ

In [3]:
import requests
from bs4 import BeautifulSoup

# Initial request to the HelloFresh recipe page
url = 'https://www.hellofresh.co.uk/recipes'
response = requests.get(url)

# Parse the page content
soup = BeautifulSoup(response.content, 'html.parser')

# Find all recipe links
recipe_links = []
for link in soup.find_all('a', {'data-testid': 'zest-link'}):
    href = link.get('href')
    if href:
        full_link = f'{href}'
        recipe_links.append(full_link)

# Print or save the recipe links
for recipe_link in recipe_links:
    print(recipe_link)


/recipes?gad_source=1&gbraid=0AAAAADifOCNxaxJR23PoLugvHN1ZSsR_J&gclid=EAIaIQobChMIj-jN0djviAMVrJtQBh3FSgcFEAAYASAFEgJDFvD_BwE&utm_content=act_paidsearch_seanonbrand&utm_medium=cpc&utm_source=google#
/plans?gad_source=1&gbraid=0AAAAADifOCNxaxJR23PoLugvHN1ZSsR_J&gclid=EAIaIQobChMIj-jN0djviAMVrJtQBh3FSgcFEAAYASAFEgJDFvD_BwE&utm_content=act_paidsearch_seanonbrand&utm_medium=cpc&utm_source=google
/gift?gad_source=1&gbraid=0AAAAADifOCNxaxJR23PoLugvHN1ZSsR_J&gclid=EAIaIQobChMIj-jN0djviAMVrJtQBh3FSgcFEAAYASAFEgJDFvD_BwE&utm_content=act_paidsearch_seanonbrand&utm_medium=cpc&utm_source=google
/recipes?gad_source=1&gbraid=0AAAAADifOCNxaxJR23PoLugvHN1ZSsR_J&gclid=EAIaIQobChMIj-jN0djviAMVrJtQBh3FSgcFEAAYASAFEgJDFvD_BwE&utm_content=act_paidsearch_seanonbrand&utm_medium=cpc&utm_source=google#
/recipes/british-recipes
/recipes/asian-recipes
/recipes/french-recipes
/recipes/thai-recipes
/recipes/italian-recipes
/recipes/japanese-recipes
/recipes/mexican-recipes
/recipes/chinese-recipes
/recipes/greek-r