In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

BASE_URL = "https://www.scrapethissite.com/pages/forms/"

all_rows = []
page = 1

while True:
    print(f"Scraping page {page}...")
    url = f"{BASE_URL}?page_num={page}"
    response = requests.get(url)

    if response.status_code != 200:
        print("Error loading page. Stopping.")
        break

    soup = BeautifulSoup(response.text, "html.parser")

    # Extract all rows of class "team"
    rows = soup.find_all("tr", class_="team")
    if not rows:
        print("No more data found. Finished scraping.")
        break

    for row in rows:

        # Extract OT losses with fallback
        ot_losses_raw = row.find("td", class_="ot-losses").text.strip()
        ot_losses = int(ot_losses_raw) if ot_losses_raw.isdigit() else 0

        data = {
            "team_name": row.find("td", class_="name").text.strip(),
            "year": int(row.find("td", class_="year").text.strip()),
            "wins": int(row.find("td", class_="wins").text.strip()),
            "losses": int(row.find("td", class_="losses").text.strip()),
            "ot_losses": ot_losses,
            "win_percent": float(row.find("td", class_="pct").text.strip()),
            "goals_for": int(row.find("td", class_="gf").text.strip()),
            "goals_against": int(row.find("td", class_="ga").text.strip())
        }

        all_rows.append(data)

    page += 1

print(f"Total rows scraped: {len(all_rows)}")

df = pd.DataFrame(all_rows)
df.to_csv("data.csv", index=False)

print("Saved as data.csv")

import pandas as pd

df = pd.read_csv("data.csv")
df.head()

years_to_check = [1990, 2000, 2010]

for year in years_to_check:
    best_team = df[df["year"] == year].sort_values("wins", ascending=False).head(1)
    team_name = best_team["team_name"].values[0]
    wins = best_team["wins"].values[0]
    print(f"{year}: {team_name} mit {wins} Siegen")

    years_to_count = [1991, 2001, 2011]

for year in years_to_count:
    teams = df[df["year"] == year]["team_name"].nunique()
    print(f"{year}: {teams} Teams teilgenommen")




Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
No more data found. Finished scraping.
Total rows scraped: 582
Saved as data.csv
1990: Chicago Blackhawks mit 49 Siegen
2000: Colorado Avalanche mit 52 Siegen
2010: Vancouver Canucks mit 54 Siegen
1991: 22 Teams teilgenommen
2001: 30 Teams teilgenommen
2011: 30 Teams teilgenommen
