In [None]:
!pip install git+https://github.com/swar/nba_api.git

from nba_api.stats.endpoints import leaguegamefinder, boxscoretraditionalv2
from nba_api.stats.static import players, teams
import pandas as pd
import time

# Step 1: Get Steph Curry's player ID and Warriors' team ID
steph = [p for p in players.get_players() if p['full_name'] == 'Stephen Curry'][0]
warriors = [t for t in teams.get_teams() if t['abbreviation'] == 'GSW'][0]

steph_id = steph['id']
warriors_id = warriors['id']

# Step 2: Define seasons
seasons = [f"{year}-{str(year+1)[-2:]}" for year in range(2023, 2025)]

# Step 3: Initialize storage
all_box_scores = []
all_games = []

# Step 4: Loop over seasons
for season in seasons:
    try:
        # Get all GSW regular season games
        gamefinder = leaguegamefinder.LeagueGameFinder(
            team_id_nullable=warriors_id,
            season_nullable=season,
            season_type_nullable='Regular Season'
        )
        games = gamefinder.get_data_frames()[0]
        games = games[['GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL']]
        games['SEASON'] = season
        all_games.append(games)

        # Fetch box score for each game
        for i, row in games.iterrows():
            game_id = row['GAME_ID']
            try:
                boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
                box_df = boxscore.get_data_frames()[0]
                curry_stats = box_df[box_df['PLAYER_ID'] == steph_id]

                if not curry_stats.empty:
                    player_row = curry_stats.iloc[0].copy()
                else:
                    # Create a blank row for Curry
                    player_row = pd.Series({
                        'PLAYER_ID': steph_id,
                        'PLAYER_NAME': 'Stephen Curry',
                        'MIN': 0,
                        'PTS': 0,
                        'REB': 0,
                        'AST': 0,
                        'GAME_ID': game_id
                    })

                # Add metadata
                player_row['SEASON'] = season
                player_row['GAME_DATE'] = row['GAME_DATE']
                player_row['MATCHUP'] = row['MATCHUP']
                player_row['WL'] = row['WL']

                all_box_scores.append(player_row)
                time.sleep(1.2)
            except Exception as e:
                print(f"Box score fetch failed for game {game_id}: {e}")
                continue

    except Exception as e:
        print(f"Error processing season {season}: {e}")
        continue

# Final DataFrame with all data
final_df = pd.DataFrame(all_box_scores)
final_df.to_csv("steph_all_boxscores.csv", index=False)

print(f"Total games collected: {len(final_df)}")

Collecting git+https://github.com/swar/nba_api.git
  Cloning https://github.com/swar/nba_api.git to /tmp/pip-req-build-pkrmf52d
  Running command git clone --filter=blob:none --quiet https://github.com/swar/nba_api.git /tmp/pip-req-build-pkrmf52d
  Resolved https://github.com/swar/nba_api.git to commit 66d9dafbb675252e57e4c69ff22c1d95c86a6360
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: nba_api
  Building wheel for nba_api (pyproject.toml) ... [?25l[?25hdone
  Created wheel for nba_api: filename=nba_api-1.9.0-py3-none-any.whl size=285293 sha256=5aea579dbbc50ff3b85feb19a7747c28df637f0a64d027664f41b695c94dea50
  Stored in directory: /tmp/pip-ephem-wheel-cache-kh4mehxi/wheels/b9/77/54/a5cdb7fda77554a348b84d0cbb068fe434780e0c04085337b4
Successfully built nba_api
Installing collected packages: nba_api
Successfully installed n

In [None]:
final_df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,MIN,PTS,REB,AST,GAME_ID,SEASON,GAME_DATE,MATCHUP,...,FTM,FTA,FT_PCT,OREB,DREB,STL,BLK,TO,PF,PLUS_MINUS
Unnamed 0,201939,Stephen Curry,0,0.0,0.0,0.0,0022301198,2023-24,2024-04-14,GSW vs. UTA,...,,,,,,,,,,
17,201939,Stephen Curry,32:24,33.0,4.0,5.0,0022301182,2023-24,2024-04-12,GSW vs. NOP,...,2.0,2.0,1.00,0.0,4.0,1.0,0.0,7.0,2.0,-8.0
4,201939,Stephen Curry,36:22,22.0,7.0,8.0,0022301169,2023-24,2024-04-11,GSW @ POR,...,1.0,1.0,1.00,0.0,7.0,1.0,2.0,2.0,0.0,7.0
4,201939,Stephen Curry,32:19,23.0,7.0,8.0,0022301155,2023-24,2024-04-09,GSW @ LAL,...,3.0,4.0,0.75,0.0,7.0,1.0,2.0,1.0,2.0,13.0
Unnamed 1,201939,Stephen Curry,0,0.0,0.0,0.0,0022301142,2023-24,2024-04-07,GSW vs. UTA,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Unnamed 18,201939,Stephen Curry,0,0.0,0.0,0.0,0022400126,2024-25,2024-10-30,GSW vs. NOP,...,,,,,,,,,,
Unnamed 19,201939,Stephen Curry,0,0.0,0.0,0.0,0022400116,2024-25,2024-10-29,GSW vs. NOP,...,,,,,,,,,,
17,201939,Stephen Curry,26:42,18.0,4.0,6.0,0022400101,2024-25,2024-10-27,GSW vs. LAC,...,2.0,2.0,1.00,0.0,4.0,2.0,1.0,6.0,1.0,2.0
4,201939,Stephen Curry,27:25,20.0,3.0,4.0,0022400084,2024-25,2024-10-25,GSW @ UTA,...,2.0,2.0,1.00,0.0,3.0,2.0,0.0,3.0,3.0,22.0


In [None]:
final_df.groupby('SEASON')['TEAM_ID'].value_counts()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
SEASON,TEAM_ID,Unnamed: 2_level_1
2023-24,1610613000.0,74
2024-25,1610613000.0,70


In [None]:
final_df.to_csv("steph_present.csv", index=False)

In [None]:
df.groupby('SEASON')['TEAM_ID'].value_counts()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
SEASON,TEAM_ID,Unnamed: 2_level_1
2009-10,1610613000.0,82
2010-11,1610613000.0,74
2011-12,1610613000.0,35
2012-13,1610613000.0,81
2013-14,1610613000.0,80
2014-15,1610613000.0,82
2015-16,1610613000.0,82


In [None]:
final_df.to_csv("steph_till_22-23.csv", index=False)

In [None]:
import pandas as pd

def get_standings(season_end_year):
    url = f"https://www.basketball-reference.com/leagues/NBA_{season_end_year}_standings.html"
    tables = pd.read_html(url)

    east = tables[0].copy()
    west = tables[1].copy()

    # Ensure the first column is labeled 'Team'
    east.columns.values[0] = 'Team'
    west.columns.values[0] = 'Team'

    east['Conference'] = 'East'
    west['Conference'] = 'West'

    df = pd.concat([east, west], ignore_index=True)
    df['Season'] = f"{season_end_year - 1}-{str(season_end_year)[-2:]}"
    return df[['Team', 'W', 'L', 'W/L%', 'Conference', 'Season']]

# Loop through seasons 2009–2025
all_standings = []
for year in range(2010, 2026):
    print(f"Scraping {year}...")
    try:
        season_df = get_standings(year)
        all_standings.append(season_df)
    except Exception as e:
        print(f"Failed for {year}: {e}")
# Combine and save
df_standings = pd.concat(all_standings, ignore_index=True)

Scraping 2010...
Scraping 2011...
Scraping 2012...
Scraping 2013...
Scraping 2014...
Scraping 2015...
Scraping 2016...
Scraping 2017...
Scraping 2018...
Scraping 2019...
Scraping 2020...
Scraping 2021...
Scraping 2022...
Scraping 2023...
Scraping 2024...
Scraping 2025...


In [None]:
df_standings

Unnamed: 0,Team,W,L,W/L%,Conference,Season
0,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,East,2009-10
1,Boston Celtics*,50,32,.610,East,2009-10
2,Toronto Raptors,40,42,.488,East,2009-10
3,New York Knicks,29,53,.354,East,2009-10
4,Philadelphia 76ers,27,55,.329,East,2009-10
...,...,...,...,...,...,...
511,Phoenix Suns (11),36,46,0.439,West,2024-25
512,Portland Trail Blazers (12),36,46,0.439,West,2024-25
513,San Antonio Spurs (13),34,48,0.415,West,2024-25
514,New Orleans Pelicans (14),21,61,0.256,West,2024-25


In [None]:
df_standings = df_standings[df_standings['Team'] != 'Atlantic Division']
df_standings = df_standings[df_standings['Team'] != 'Pacific Division']
df_standings = df_standings[df_standings['Team'] != 'Central Division']
df_standings = df_standings[df_standings['Team'] != 'Southeast Division']
df_standings = df_standings[df_standings['Team'] != 'Northwest Division']
df_standings = df_standings[df_standings['Team'] != 'Southwest Division']

In [None]:
df_standings.groupby('Season')['Conference'].value_counts()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
Season,Conference,Unnamed: 2_level_1
2009-10,East,15
2009-10,West,15
2010-11,East,15
2010-11,West,15
2011-12,East,15
2011-12,West,15
2012-13,East,15
2012-13,West,15
2013-14,East,15
2013-14,West,15


In [None]:
df_standings.to_csv("FINAL_nba_standings.csv", index=False)