In [1]:
import json
import os
import time
from functools import wraps
from typing import List, Dict

import requests
from bs4 import BeautifulSoup

allowed_categories = ["vorspeise", "hauptgang", "dessert", "fruehstueck", "snacks", "brote", "getränke"]

In [2]:
def timeit(func):
    """
    Function, to get the runtime from different functions
    :param func:
    :return: runtime
    """

    @wraps(func)
    def timeit_wrapper(*args, **kwargs):
        start_time = time.perf_counter()
        result = func(*args, **kwargs)
        end_time = time.perf_counter()
        total_time = end_time - start_time
        print(f'Function {func.__name__}{args} {kwargs} Took {total_time:.4f} seconds')
        return result

    return timeit_wrapper

In [3]:
def get_page_content(link: str) -> str:
    """
    Get a Parsed homepage as HTML str.
    :param link:
    :return: parsed_homepage
    """
    homepage = requests.get(link)
    if not homepage.status_code == 200:
        print(f"Can't reach {link} with status code 200.")
    elif homepage.status_code == 200:
        parsed_homepage = BeautifulSoup(homepage.text, 'html.parser')
        return parsed_homepage

In [4]:
get_page_content('https://storage.googleapis.com/www.selinaschoice.ch/index.html')

<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<link href="css\style.css" rel="stylesheet"/>
<script src="js\scritp.js"></script>
<link href="img\icon_phone.png" rel="icon"/>
<link href="img\icon_phone.png" rel="apple-touch-icon" sizes="76x76"/>
<link href="img\icon_phone.png" rel="apple-touch-icon" sizes="120x120"/>
<link href="img\icon_phone.png" rel="apple-touch-icon" sizes="152x152"/>
<title>Selinas Choice</title>
</head>
<body>
<!-- App -->
<div class="app">
<header>
<a href="index.html">
<img alt="brand" class="brand" src="img\brand.png"/>
</a>
<h1 class="titel">Selina's Choice</h1>
</header>
<div class="scroll-abstandhalter">
</div>
<div class="nav-feed">
<nav>
<button class="btn" id="nav_vorspeise"><a href="#vorspeise">Vorspeise</a></button>
<button class="btn" id="nav_hauptgang"><a href="#hauptgang">Hauptgang</a></button>
<button clas

In [5]:
def get_links_from_category(category: List) -> List:
    """
    Get a List recipes acording to the categories
    :param category:
    :return: recipes
    """
    recipes = []
    main_page = get_page_content('https://storage.googleapis.com/www.selinaschoice.ch/index.html')
    recipes_by_category = main_page.find_all("div", class_=category)  # a -> div
    for tag in recipes_by_category:
        recipe = tag.a.get('href', None)
        recipes.append(recipe)
    return recipes

In [6]:
get_links_from_category(["vorspeise", "hauptgang"])

['https://shortcutapp.io/n/MjgzMzNlMzRkZWZmMGRiYzVjMzkxNDM2OWU0ZDA1MmQz',
 'https://shortcutapp.io/n/ZDBlMzhkNjRkZTliMDg0NGQ4NmRmNjNiM2U0ZGE1ODVj',
 'https://shortcutapp.io/n/ODQwMjlmODVmYWViM2Q0NmIxMDcwNzNjZDVlMGQ4NTg2',
 'https://shortcutapp.io/n/OTVmYmU0Y2I4MWYwMmY0ZTI3YTE4MGJkZTQ4ZTczYjRj',
 'https://shortcutapp.io/n/MWRmNjhhOWQ2ZjgzNzFhYzYxZDAzMTMwZTYzMzQxZGRi',
 'https://shortcutapp.io/n/MDg4OWJiZTU0NmEzMzRmNzQ4ZTMyMGNjMzMxM2IxMGNl',
 'https://shortcutapp.io/n/ZGNlZGM3NDQ1MzU2MTk4ZGJjODdkY2E4Nzc4YzA1ZDIx',
 'https://shortcutapp.io/n/YzVjNDhmZGFkNDMyNTMxYzFkYjk3ZWRhMjg0MmM4NDUx',
 'https://shortcutapp.io/n/MmNhZDIxZjcyNDg3NGY0ZGM3NzRlODNkNmQyOGVhMzQ0',
 'https://shortcutapp.io/n/NGE0MDg2Nzg2NjQyZjZjMzIzZTMyNjhlYWFjMDQzZmRl',
 'https://shortcutapp.io/n/MjZmNTYyNTI2YWQxMDBlMTc1MWUwODE4MmM0YTdkNTFj',
 'https://shortcutapp.io/n/YTEzNTRiNTFiOTJkOGM5N2FkZWMzNDJiNGY0YjcwZDhl',
 'https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5',
 'https://shortcutapp.io/n/Yzc4MzhhM2U

In [7]:
def get_links_to_scrape(categories=None) -> List:
    """
    Get all or some Recipes by categories. Default is None and will give you all recipies links.
    :param categories: None, str oder List
    :return: recipes: List
    """
    if categories is None:
        recipes = get_links_from_category(allowed_categories)
    else:
        recipes = get_links_from_category(categories)
    return recipes

In [8]:
get_links_to_scrape("hauptgang")

['https://shortcutapp.io/n/YzVjNDhmZGFkNDMyNTMxYzFkYjk3ZWRhMjg0MmM4NDUx',
 'https://shortcutapp.io/n/MmNhZDIxZjcyNDg3NGY0ZGM3NzRlODNkNmQyOGVhMzQ0',
 'https://shortcutapp.io/n/NGE0MDg2Nzg2NjQyZjZjMzIzZTMyNjhlYWFjMDQzZmRl',
 'https://shortcutapp.io/n/MjZmNTYyNTI2YWQxMDBlMTc1MWUwODE4MmM0YTdkNTFj',
 'https://shortcutapp.io/n/YTEzNTRiNTFiOTJkOGM5N2FkZWMzNDJiNGY0YjcwZDhl',
 'https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5',
 'https://shortcutapp.io/n/Yzc4MzhhM2UyMGIzNzAwMDM3MmQ2ZTg3YzU4NjIwMDEx',
 'https://shortcutapp.io/n/ZmQxN2M1ZGNlN2JhNjAwYTgwNDkyZTlkZWI1NzA5NmZm',
 'https://shortcutapp.io/n/MzhhY2E0OWE3ZWUwMWU1ODcwMGQ1YzA2YTQ3OGUzOGJk',
 'https://shortcutapp.io/n/Y2VmMGMxNTY1ZTU0NzFhMWU2OGE3Mjc3MTYyZWQxNGYy',
 'https://shortcutapp.io/n/MjYyNDJhMTJjMTNhMGJhNTU3NjM0NmU0M2YzNTg0YWYz',
 'https://shortcutapp.io/n/NzNiNDgwMzIzMTQwZmQ5ZjBkNDQ3NzdhYzY4N2NjOTZi',
 'https://shortcutapp.io/n/MjE3YmFjNDI3N2ZmYTdlYzY0N2NlZjMyN2Q2YzVhMmJl',
 'https://shortcutapp.io/n/OGZmNTk5NTV

In [9]:
def get_category_from_recipe(link: str) -> str:
    """
    Get the one of allowed classes based on the recipe link.
    :param link: str
    :return recipe_class: str
    """
    main_page = get_page_content('https://storage.googleapis.com/www.selinaschoice.ch/index.html#hauptgang')
    specific_recipe = main_page.find("a", href=link).parent
    specific_recipe_class = specific_recipe["class"][1:]
    recipe_class = "".join(specific_recipe_class)
    if recipe_class in allowed_categories:
        return recipe_class
    else:
        print(f"The class from recipe: {link} is {recipe_class} and not in {allowed_categories}.")

In [10]:
get_category_from_recipe('https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5')


'hauptgang'

In [11]:
def get_recipe_title(link: str) -> str:
    """
    Get the Title from a recipe with a recipe link.
    :param link: str
    :return: title: str
    """
    recipe = get_page_content(link)
    title = recipe.h1.text
    return title

In [12]:
get_recipe_title('https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5')

'Indisches Butterchickencurry'

In [13]:
def get_recipe_img_path(link: str) -> str:
    """
    Get the img path from a recipe with the recipe link.
    :param link: str
    :return img_path: str
    """
    main_page = get_page_content('https://storage.googleapis.com/www.selinaschoice.ch/index.html#hauptgang')
    specific_recipe = main_page.find("a", href=link)
    img = specific_recipe.find("img")
    img_path = img.get("src")
    return img_path

In [14]:
get_recipe_img_path("https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5")

'img\\hauptgang\\indisches_butterchickencurry.jpg'

In [15]:
def get_prep_time(link: str) -> str:
    """
    Get the preparation time from a recipe.
    :param link: str
    :return prep_time: str
    """
    page_content = get_page_content(link)
    prep_time = page_content.find('time', itemprop="prepTime").text
    return prep_time

In [16]:
get_prep_time("https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5")

'30 minutes'

In [17]:
def get_serves(link: str) -> str:
    """
    Get serves from recipe.
    :param link: str
    :return serves: str
    """
    page_content = get_page_content(link)
    serves = page_content.find_all('li')[1].text
    return serves[8:]  # without index returns "serves: 4 persons"

In [18]:
get_serves("https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5")

'4 persons'

In [19]:
def get_difficulty(link: str) -> str:
    """
    Get difficulty from recipe.
    :param link: str
    :return difficulty: str
    """
    page_content = get_page_content(link)
    difficulty = page_content.find_all('li')[2].text
    return difficulty[12:]  # without index returns "difficulty: easy emoji"

In [20]:
get_difficulty("https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5")

'easy 🥳'

In [21]:
def get_ingredients(link: str) -> List[Dict]:
    page_content = get_page_content(link)
    ingredients = page_content.find("section", class_="ingredients")
    ingredientArray = []
    for li in ingredients.find_all('li'):
        quantity = li.find('div', class_='quantity').text
        ingredient = li.find('div', class_='name').text
        # print(quantity)
        # print(ingredient)
        ingredientValues = {
            "quantity": quantity,
            "name": ingredient,
        }
        ingredientArray.append(ingredientValues)
    return ingredientArray

In [22]:
get_ingredients("https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5")

[{'quantity': '3', 'name': 'Hähnchenbrustfilets'},
 {'quantity': '1', 'name': 'Zwiebel'},
 {'quantity': '1', 'name': 'Knoblauchzehe'},
 {'quantity': '300 g', 'name': 'Gemüse deiner Wahl'},
 {'quantity': '300 ml', 'name': 'Kokosmilch'},
 {'quantity': '150 ml', 'name': 'passierte Tomaten'},
 {'quantity': '4 EL', 'name': 'Currypulver'},
 {'quantity': '1 EL', 'name': 'Butter'},
 {'quantity': '', 'name': 'Salz & Pfeffer'}]

In [23]:
def get_instructions(link: str) -> List[Dict]:
    page_content = get_page_content(link)
    instructionArray = []
    instructions = page_content.find('section', class_='instructions')
    for li in instructions.find_all('li'):
        stepTitle = li.find('div', class_='details').h3.text
        stepInfo = li.find('div', class_='text').text
        # print(stepTitle)
        # print(stepInfo)
        instructionValues = {
            "step": stepTitle,
            "instruction": stepInfo,
        }
        instructionArray.append(instructionValues)
    return instructionArray

In [24]:
get_instructions("https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5")

[{'step': 'Step 1',
  'instruction': 'Hähnchen und das gewünschte Gemüse, sowie Zwiebeln & Knoblauch in Stücke schneiden.'},
 {'step': 'Step 2',
  'instruction': 'In einer Bratpfanne den Butter schmelzen und die Hähnchenstücke anbraten. Nicht ganz durchbraten und dann zur Seite legen.'},
 {'step': 'Step 3',
  'instruction': 'Zwiebeln, Knoblauch und das Gemüse anbraten und anschliessend die Kokosmilch und passierte Tomaten hinzugeben.'},
 {'step': 'Step 4',
  'instruction': 'Currypulver, Salz und Pfeffer hinzugeben und das ganze ca. 10min köcheln lassen (je nach Gemüse). Anschliessend die Hähnchenstückebeigeben und nochmals ca. 5min köcheln lassen.'},
 {'step': 'Step 5',
  'instruction': 'Die Sauce abschmecken und anschliessend mit Reis oder Naan-Brote servieren und geniessen.'}]

In [25]:
def get_parsed_recipe(link: List) -> Dict:
    title = get_recipe_title(link)
    recipe_class = get_category_from_recipe(link)
    img = get_recipe_img_path(link)
    prep_time = get_prep_time(link)
    serves = get_serves(link)
    difficulty = get_difficulty(link)
    ingredients = get_ingredients(link)
    instructions = get_instructions(link)

    recipe_dict = {
        "title": title,
        "recipe_class": recipe_class,
        "img_path": img,
        "prep_time": prep_time,
        "serves": serves,
        "difficulty": difficulty,
        "ingredients": ingredients,
        "instructions": instructions,
    }
    return recipe_dict

In [26]:
get_parsed_recipe('https://shortcutapp.io/n/MGEzNzRmYzQ1MjkzYTlhMjY0YTE2NjE4MjgwMGEzOTY5')

{'title': 'Indisches Butterchickencurry',
 'recipe_class': 'hauptgang',
 'img_path': 'img\\hauptgang\\indisches_butterchickencurry.jpg',
 'prep_time': '30 minutes',
 'serves': '4 persons',
 'difficulty': 'easy 🥳',
 'ingredients': [{'quantity': '3', 'name': 'Hähnchenbrustfilets'},
  {'quantity': '1', 'name': 'Zwiebel'},
  {'quantity': '1', 'name': 'Knoblauchzehe'},
  {'quantity': '300 g', 'name': 'Gemüse deiner Wahl'},
  {'quantity': '300 ml', 'name': 'Kokosmilch'},
  {'quantity': '150 ml', 'name': 'passierte Tomaten'},
  {'quantity': '4 EL', 'name': 'Currypulver'},
  {'quantity': '1 EL', 'name': 'Butter'},
  {'quantity': '', 'name': 'Salz & Pfeffer'}],
 'instructions': [{'step': 'Step 1',
   'instruction': 'Hähnchen und das gewünschte Gemüse, sowie Zwiebeln & Knoblauch in Stücke schneiden.'},
  {'step': 'Step 2',
   'instruction': 'In einer Bratpfanne den Butter schmelzen und die Hähnchenstücke anbraten. Nicht ganz durchbraten und dann zur Seite legen.'},
  {'step': 'Step 3',
   'instr

In [27]:
def backup_recipe(recipe=Dict) -> json:
    filename = recipe["title"] + ".json"
    json_object = json.dumps(recipe, indent=4, ensure_ascii=False)
    with open(filename, "w", encoding="utf-8") as outfile:
        outfile.write(json_object)


# To-Do
- Get the categorie and save it to the dict
- Write json to specific folder with the right naming

In [32]:
# Create directory

os.chdir("../recipes")
dirName = 'hauptgang'
try:
    # Create target Directory
    os.mkdir(dirName)
except FileExistsError:
    print("Directory ", dirName, " already exists")