In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from dotenv import load_dotenv
import os

headers = {'User-Agent': 'Mozilla/5.0'}
all_team_stats = []

# Loop through seasons 2022 to 2025
for season in range(2022, 2026):
    print(f"Scraping season {season}...")

    url = f'https://www.espn.com/nba/standings/_/season/{season}/group/league'
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.find_all('table')

    # Skip if table structure is missing
    if len(tables) < 2:
        print(f"⚠️ Tables not found for season {season}")
        continue

    name_rows = tables[0].find_all('tr')[1:]
    stat_rows = tables[1].find_all('tr')[1:]

    for name_row, stat_row in zip(name_rows, stat_rows):
        name_cells = name_row.find_all('td')
        stat_cells = stat_row.find_all('td')

        if len(name_cells) == 0 or len(stat_cells) < 10:
            continue

        # Extract team name from <span class="hide-mobile"><a>...</a></span>
        name_cell = name_cells[0]
        team_tag = name_cell.select_one('span.hide-mobile a')
        team_name = team_tag.text.strip() if team_tag else ''

        if not team_name:
            continue

        # Collect stats with season included
        all_team_stats.append({
            'Season': season,
            'Team': team_name,
            'Wins': stat_cells[0].text.strip(),
            'Losses': stat_cells[1].text.strip(),
            'PPG': stat_cells[8].text.strip(),
            'OPP_PPG': stat_cells[9].text.strip()
        })

# Build DataFrame
df = pd.DataFrame(all_team_stats)

# Convert numeric columns
for col in ['Wins', 'Losses', 'PPG', 'OPP_PPG']:
    df = df.sort_values(by=['Team', 'Season']).reset_index(drop=True)
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Show full results
pd.set_option('display.max_rows', None)
print(df)

Scraping season 2022...
Scraping season 2023...
Scraping season 2024...
Scraping season 2025...
     Season                    Team  Wins  Losses    PPG  OPP_PPG
0      2022           Atlanta Hawks    43      39  113.9    112.4
1      2023           Atlanta Hawks    41      41  118.4    118.1
2      2024           Atlanta Hawks    36      46  118.3    120.5
3      2025           Atlanta Hawks    40      42  118.2    119.3
4      2022          Boston Celtics    51      31  111.8    104.5
5      2023          Boston Celtics    57      25  117.9    111.4
6      2024          Boston Celtics    64      18  120.6    109.2
7      2025          Boston Celtics    61      21  116.3    107.2
8      2022           Brooklyn Nets    44      38  112.9    112.1
9      2023           Brooklyn Nets    45      37  113.4    112.5
10     2024           Brooklyn Nets    32      50  110.4    113.3
11     2025           Brooklyn Nets    26      56  105.1    112.2
12     2022       Charlotte Hornets    43     

In [6]:
from sqlalchemy import create_engine
!pip install pymysql

Collecting pymysql
  Downloading PyMySQL-1.1.1-py3-none-any.whl.metadata (4.4 kB)
Downloading PyMySQL-1.1.1-py3-none-any.whl (44 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/45.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.0/45.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymysql
Successfully installed pymysql-1.1.1


In [None]:
host = os.getenv("PG_HOST")
user = os.getenv("PG_USER")
password = os.getenv("PG_PASSWORD")
port = 5432
database = os.getenv("PG_DATABASE")

In [16]:
pg_conn_str = f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}'

# Create engine
engine = create_engine(pg_conn_str)

# Load your DataFrame to your desired table
df.to_sql("web_scraped_standings", engine, if_exists="replace", index=False)

120