In [41]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Base URL and total number of pages
base_url = "https://www.scrapethissite.com/pages/forms/?page_num="
num_pages = 24

# Empty list to store all team data
all_teams = []

# Loop through all pages
for page_number in range(1, num_pages + 1):
    url = base_url + str(page_number)

    try:
        # Send GET request with reasonable timeout (adjust if needed)
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raise an exception for non-200 status codes

        # Parse HTML content
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find table and titles (handle potential absence)
        table = soup.find('table')
        if not table:
            print(f"Warning: Table not found on page {page_number}")
            continue

        titles = table.find('tr')
        if not titles:
            print(f"Warning: Titles row not found on page {page_number}")
            continue

        # Extract table titles
        world_table_titles = [title.text.strip() for title in titles if title.text.strip()]

        # Find teams and iterate
        teams = table.find_all('tr', class_="team")
        for row in teams:
            # Extract data from each team row
            row_data = row.find_all('td')
            individual_row_data = [data.text.strip() for data in row_data]

            # Append team data to all_teams list
            all_teams.append(individual_row_data)

    except requests.exceptions.RequestException as e:
        print(f"Error fetching page {page_number}: {e}")

# Create pandas DataFrame with extracted data
if all_teams:  # Check if any data was scraped
    df = pd.DataFrame(all_teams, columns=world_table_titles)

    # Specify filename for Excel output (adjust as needed)
    filename = "NHL_teams.xlsx"

    try:
        df.to_excel(filename, index=False)  # Save DataFrame to Excel, exclude index
        print(f"Team data saved to '{filename}'.")
    except PermissionError as e:
        print(f"Error saving to '{filename}': {e}")
        print("Make sure the file is not already open or in use.")

else:
    print("No teams found on any pages.")


Team data saved to 'NHL_teams.xlsx'.
