In [1]:
pip install requests beautifulsoup4 pandas


Note: you may need to restart the kernel to use updated packages.


In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL der Zielseite
URL = "http://quotes.toscrape.com/"

# HTTP-Header
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
}

def fetch_page(url):
    """Holt die HTML-Seite."""
    response = requests.get(url, headers=HEADERS)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Fehler: {response.status_code}")
        return None

def parse_page(html):
    """Extrahiert Zitate, Autoren und Tags."""
    soup = BeautifulSoup(html, "html.parser")
    quotes = []
    for quote in soup.select(".quote"):
        text = quote.select_one(".text").text.strip()
        author = quote.select_one(".author").text.strip()
        tags = [tag.text.strip() for tag in quote.select(".tag")]
        quotes.append({"Quote": text, "Author": author, "Tags": ", ".join(tags)})
    return quotes

def save_to_csv(data, filename="quotes.csv"):
    """Speichert Daten in einer CSV-Datei."""
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Daten gespeichert in {filename}")

# Ablauf
if __name__ == "__main__":
    # HTML-Seite abrufen
    html_content = fetch_page(URL)
    if html_content:
        # Daten extrahieren
        quotes_data = parse_page(html_content)
        # Daten speichern
        save_to_csv(quotes_data)


Daten gespeichert in quotes.csv


In [11]:
df = pd.read_csv("quotes.csv")
print(df.head())


                                               Quote           Author  \
0  “The world as we have created it is a process ...  Albert Einstein   
1  “It is our choices, Harry, that show what we t...     J.K. Rowling   
2  “There are only two ways to live your life. On...  Albert Einstein   
3  “The person, be it gentleman or lady, who has ...      Jane Austen   
4  “Imperfection is beauty, madness is genius and...   Marilyn Monroe   

                                           Tags  
0        change, deep-thoughts, thinking, world  
1                            abilities, choices  
2  inspirational, life, live, miracle, miracles  
3              aliteracy, books, classic, humor  
4                    be-yourself, inspirational  


# Web Scraper für Zitate

Ein einfacher Web-Scraper, der Zitate, Autoren und Tags von der Website [Quotes to Scrape](http://quotes.toscrape.com) sammelt und in einer CSV-Datei speichert.

## Anforderungen

- Python 3.x
- requests
- beautifulsoup4
- pandas

## Installation

1. Installiere die benötigten Python-Bibliotheken:
   ```bash
   pip install requests beautifulsoup4 pandas
