# How to Use This Notebook

1. Run **Cell 1** first.
   - This installs the required library (`google-colab-selenium`) for the current Colab session.
   - You typically only need to run this once per session (for example, after a runtime restart).

2. Run **Cell 2** next.
   - This is the program that loads the ESPN stats page, clicks **Show More** until all rows are visible, extracts the tables, and saves the results as a CSV file.

3. Find the output CSV in Colab:
   - On the **left-hand sidebar**, click the **Files** tab (folder icon).
   - If you don’t see it, expand the left sidebar until the Files tab appears.
   - You should see a CSV file created with a name like:
     `nba_player_stats_2025-2026.csv`

4. Download the CSV to your computer:
   - Click the **three dots (⋮)** beside the CSV file name.
   - Click **Download**.
   - The file will download to your local machine for your own use.

## Notes
- Wait until you see the printed message confirming the file was saved before downloading.
- If you restart the runtime, you’ll need to rerun **Cell 1** before running **Cell 2** again.

In [1]:
%%capture
!pip install google-colab-selenium


In [2]:
import time
import pandas as pd
from io import StringIO

import google_colab_selenium as gs
from selenium.webdriver.common.by import By

# Initialize headless Chrome via helper
driver = gs.Chrome()

# Target URL
end_year = 2026
url = f"https://www.espn.com/nba/stats/player/_/season/{end_year}/seasontype/2"
filename = f"nba_player_stats_{end_year-1}-{end_year}.csv"

# Open page
driver.get(url)
time.sleep(3)

# -------------------------
# Robust Show More Loader
# -------------------------
def click_all_show_more(driver, timeout=40):
    start = time.time()
    while True:
        if time.time() - start > timeout:
            print("Reached timeout waiting for all data to load.")
            break

        try:
            show_more_elems = driver.find_elements(
                By.XPATH,
                "//div[contains(@class,'loadMore')]//a[contains(@class,'loadMore__link')]"
            )

            if not show_more_elems:
                break

            btn = show_more_elems[-1]
            driver.execute_script(
                "arguments[0].scrollIntoView({block:'center'});", btn
            )
            time.sleep(1)

            try:
                btn.click()
            except Exception:
                driver.execute_script("arguments[0].click();", btn)

            time.sleep(2)

        except Exception as e:
            print("Show More loop ended:", e)
            break


# Run the Show More loader
click_all_show_more(driver)

# -------------------------
# Extract Tables
# -------------------------
html = driver.page_source
driver.quit()

tables = pd.read_html(StringIO(html))
if len(tables) < 2:
    raise RuntimeError(f"Expected >=2 tables, found {len(tables)}")

players = tables[0]
stats = tables[1]

# Drop 'RK' rank column if present
if "RK" in players.columns:
    players = players.drop(columns=["RK"])

# Merge player and stat tables
df = pd.concat(
    [
        players.reset_index(drop=True),
        stats.reset_index(drop=True)
    ],
    axis=1
)

# -------------------------
# Split Name into Name + Team
# -------------------------
TEAM_SLUGS = {
    "ATL", "BOS", "BKN", "CHA", "CHI", "CLE", "DAL", "DEN", "DET", "GS",
    "HOU", "IND", "LAC", "LAL", "MEM", "MIA", "MIL", "MIN", "NO", "NY",
    "OKC", "ORL", "PHI", "PHX", "POR", "SAC", "SA", "TOR", "UTAH", "WSH"
}

def split_name_and_team(raw_name: str):
    raw_name = str(raw_name).strip()

    for team in TEAM_SLUGS:
        if raw_name.endswith(team):
            player_name = raw_name[:-len(team)].strip()
            return player_name, team

    return raw_name, None


df[["Name", "Team"]] = (
    df["Name"]
    .apply(split_name_and_team)
    .apply(pd.Series)
)

# -------------------------
# Reorder columns: Name, Team, then the rest
# -------------------------
cols = list(df.columns)
new_order = ["Name", "Team"] + [c for c in cols if c not in ("Name", "Team")]
df = df[new_order]

# -------------------------
# Save CSV
# -------------------------
df.to_csv(filename, index=False)
print(f"Saved {filename} with {len(df)} rows")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Saved nba_player_stats_2025-2026.csv with 318 rows
