In [None]:
#this script will scrape the data for every player for every game for the provided season. 

In [None]:
#this is likely the endpoint we will use:
from nba_api.stats.endpoints import leaguegamefinder
from sqlalchemy import create_engine
from requests.exceptions import ReadTimeout
import pandas as pd
import random
import time 
import json

#create our engine for pushing to sql database
from dotenv import load_dotenv
import os

load_dotenv()

DATABASE_URL = os.getenv("DATABASE_URL")

engine = create_engine(DATABASE_URL)

#define seasons to gather data for 
seasons = ['2022-23', '2023-24', '2024-25']

for season in seasons:
    try:
        # Get all games for a season
        game_finder = leaguegamefinder.LeagueGameFinder(
            season_nullable=season,  # Format: YYYY-YY
            league_id_nullable='00',    # 00 is NBA
            season_type_nullable='Regular Season'  # or 'Playoffs'
        )
        
        # Convert to DataFrame
        games_df = game_finder.get_data_frames()[0]
        
        #make team ID the index, and sort the dataframe based on ID and gameDate
        games_df = games_df.set_index('TEAM_ID')
        games_df_sorted = games_df.sort_values(['TEAM_ID', 'GAME_DATE'], ascending=[True, False])
    
        #once we've created the dataframe, we want to push that to a SQL database
        filename = f"{season}_historic_game_data"
        games_df_sorted.to_sql(filename, engine, if_exists='replace', index=True)
        print(f"Created and stored dataframe for {season} season")
        print()
        time.sleep(round(random.uniform(3, 6), 1))

    #handle timeout/api rejection errors
    except (ReadTimeout, json.decoder.JSONDecodeError) as e:
        print(f"Error for {season}: {e} - retrying after 60 seconds")
        time.sleep(60)
        continue

print("Finished data gathering for specified seasons")

Created and stored dataframe for 2022-23 season

Created and stored dataframe for 2023-24 season

Created and stored dataframe for 2024-25 season

Finished data gathering for specified seasons


In [11]:
games_df_sorted

Unnamed: 0_level_0,SEASON_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
TEAM_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1610612737,22024,ATL,Atlanta Hawks,0022400656,2025-01-27,ATL @ MIN,L,241,92,34,...,0.667,14,37,51,24,10,3,21,20,-8.0
1610612737,22024,ATL,Atlanta Hawks,0022400639,2025-01-25,ATL vs. TOR,L,238,94,33,...,0.680,8,35,43,28,14,5,22,20,-23.0
1610612737,22024,ATL,Atlanta Hawks,0022400623,2025-01-23,ATL vs. TOR,L,240,119,42,...,0.733,9,27,36,30,11,2,14,21,-3.0
1610612737,22024,ATL,Atlanta Hawks,0022400612,2025-01-22,ATL vs. DET,L,241,104,39,...,0.769,15,32,47,25,14,3,13,14,-10.0
1610612737,22024,ATL,Atlanta Hawks,0022400602,2025-01-20,ATL @ NYK,L,239,110,40,...,0.773,16,25,41,23,10,6,23,21,-9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1610612766,22024,CHA,Charlotte Hornets,0022400132,2024-11-01,CHA vs. BOS,L,241,109,42,...,0.500,12,30,42,20,5,5,14,26,-15.0
1610612766,22024,CHA,Charlotte Hornets,0022400117,2024-10-30,CHA vs. TOR,W,239,138,49,...,0.800,14,24,38,31,9,8,15,24,5.0
1610612766,22024,CHA,Charlotte Hornets,0022400088,2024-10-26,CHA vs. MIA,L,241,106,39,...,0.714,15,31,46,20,6,5,17,24,-8.0
1610612766,22024,CHA,Charlotte Hornets,0022400079,2024-10-25,CHA @ ATL,L,241,120,37,...,0.806,22,31,53,22,7,4,17,28,-5.0


In [None]:
#Below is the iteration we used to create the script

In [1]:
#this is likely the endpoint we will use:
from nba_api.stats.endpoints import leaguegamefinder
import pandas as pd

# Get all games for a season
game_finder = leaguegamefinder.LeagueGameFinder(
    season_nullable='2023-24',  # Format: YYYY-YY
    league_id_nullable='00',    # 00 is NBA
    season_type_nullable='Regular Season'  # or 'Playoffs'
)

# Convert to DataFrame
games_df = game_finder.get_data_frames()[0]
pd.set_option('display.max_rows', )

games_df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612744,GSW,Golden State Warriors,0022301198,2024-04-14,GSW vs. UTA,W,239,123,...,0.826,12,30,42,35,10,6,9,14,7.0
1,22023,1610612747,LAL,Los Angeles Lakers,0022301195,2024-04-14,LAL @ NOP,W,241,124,...,0.944,7,32,39,32,12,3,14,13,16.0
2,22023,1610612756,PHX,Phoenix Suns,0022301194,2024-04-14,PHX @ MIN,W,244,125,...,0.833,13,19,32,28,13,3,12,24,19.0
3,22023,1610612749,MIL,Milwaukee Bucks,0022301191,2024-04-14,MIL @ ORL,L,238,88,...,0.913,7,27,34,16,10,4,17,18,-25.0
4,22023,1610612754,IND,Indiana Pacers,0022301188,2024-04-14,IND vs. ATL,W,239,157,...,0.800,12,36,48,41,10,6,14,24,42.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,22023,1610612764,WAS,Washington Wizards,0022300064,2023-10-25,WAS @ IND,L,238,120,...,0.793,11,34,45,28,6,9,14,11,-23.0
2456,22023,1610612747,LAL,Los Angeles Lakers,0022300061,2023-10-24,LAL @ DEN,L,239,107,...,0.750,13,31,44,23,5,4,11,18,-12.0
2457,22023,1610612743,DEN,Denver Nuggets,0022300061,2023-10-24,DEN vs. LAL,W,240,119,...,0.750,9,33,42,29,9,6,11,15,12.0
2458,22023,1610612756,PHX,Phoenix Suns,0022300062,2023-10-24,PHX @ GSW,W,241,108,...,0.765,17,43,60,23,5,7,19,22,4.0


In [5]:
import pandas as pd

pd.set_option('display.max_rows', 2460)
games_df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612744,GSW,Golden State Warriors,22301198,2024-04-14,GSW vs. UTA,W,239,123,...,0.826,12,30,42,35,10,6,9,14,7.0
1,22023,1610612747,LAL,Los Angeles Lakers,22301195,2024-04-14,LAL @ NOP,W,241,124,...,0.944,7,32,39,32,12,3,14,13,16.0
2,22023,1610612756,PHX,Phoenix Suns,22301194,2024-04-14,PHX @ MIN,W,244,125,...,0.833,13,19,32,28,13,3,12,24,19.0
3,22023,1610612749,MIL,Milwaukee Bucks,22301191,2024-04-14,MIL @ ORL,L,238,88,...,0.913,7,27,34,16,10,4,17,18,-25.0
4,22023,1610612754,IND,Indiana Pacers,22301188,2024-04-14,IND vs. ATL,W,239,157,...,0.8,12,36,48,41,10,6,14,24,42.0
5,22023,1610612741,CHI,Chicago Bulls,22301190,2024-04-14,CHI @ NYK,L,265,119,...,0.813,15,29,44,25,15,3,12,21,-1.0
6,22023,1610612742,DAL,Dallas Mavericks,22301196,2024-04-14,DAL @ OKC,L,241,86,...,0.824,12,29,41,19,8,2,14,15,-49.0
7,22023,1610612750,MIN,Minnesota Timberwolves,22301194,2024-04-14,MIN vs. PHX,L,241,106,...,0.806,12,24,36,22,7,5,24,20,-19.0
8,22023,1610612761,TOR,Toronto Raptors,22301189,2024-04-14,TOR @ MIA,L,240,103,...,0.857,13,28,41,23,6,3,19,21,-15.0
9,22023,1610612758,SAC,Sacramento Kings,22301200,2024-04-14,SAC vs. POR,W,241,121,...,0.815,13,38,51,29,11,6,14,19,39.0


In [15]:
games_df = games_df.set_index('TEAM_ID')


KeyError: "None of ['TEAM_ID'] are in the columns"

In [24]:
games_df.loc[1610612744]

Unnamed: 0_level_0,SEASON_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
TEAM_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1610612744,22023,GSW,Golden State Warriors,22301198,2024-04-14,GSW vs. UTA,W,239,123,45,...,0.826,12,30,42,35,10,6,9,14,7.0
1610612744,22023,GSW,Golden State Warriors,22301182,2024-04-12,GSW vs. NOP,L,240,109,40,...,0.762,11,42,53,27,6,5,16,15,-5.0
1610612744,22023,GSW,Golden State Warriors,22301169,2024-04-11,GSW @ POR,W,240,100,36,...,0.8,10,36,46,24,8,13,16,18,8.0
1610612744,22023,GSW,Golden State Warriors,22301155,2024-04-09,GSW @ LAL,W,241,134,47,...,0.667,4,38,42,37,3,12,15,17,14.0
1610612744,22023,GSW,Golden State Warriors,22301142,2024-04-07,GSW vs. UTA,W,241,118,49,...,1.0,11,36,47,32,9,8,13,19,8.0
1610612744,22023,GSW,Golden State Warriors,22301097,2024-04-05,GSW @ DAL,L,239,106,41,...,0.667,11,33,44,27,12,5,15,19,-2.0
1610612744,22023,GSW,Golden State Warriors,22301113,2024-04-04,GSW @ HOU,W,241,133,47,...,0.88,12,31,43,29,6,2,18,23,23.0
1610612744,22023,GSW,Golden State Warriors,22300589,2024-04-02,GSW vs. DAL,W,242,104,41,...,1.0,11,41,52,29,9,4,17,18,4.0
1610612744,22023,GSW,Golden State Warriors,22301085,2024-03-31,GSW @ SAS,W,241,117,45,...,0.846,12,29,41,34,15,3,15,14,4.0
1610612744,22023,GSW,Golden State Warriors,22301062,2024-03-29,GSW @ CHA,W,240,115,48,...,0.889,11,39,50,35,7,5,12,19,18.0


In [36]:
games_df_sorted = games_df.sort_values(['TEAM_ID', 'GAME_DATE'], ascending=[True, False])

In [38]:
games_df_sorted

Unnamed: 0_level_0,SEASON_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
TEAM_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1610612737,22023,ATL,Atlanta Hawks,22301188,2024-04-14,ATL @ IND,L,241,115,39,...,0.926,9,23,32,25,6,5,15,12,-42.0
1610612737,22023,ATL,Atlanta Hawks,22301178,2024-04-12,ATL @ MIN,L,240,106,40,...,0.68,9,31,40,23,4,1,14,25,-3.0
1610612737,22023,ATL,Atlanta Hawks,22301159,2024-04-10,ATL vs. CHA,L,240,114,43,...,0.625,7,31,38,35,7,2,16,20,-1.0
1610612737,22023,ATL,Atlanta Hawks,22301147,2024-04-09,ATL vs. MIA,L,292,111,45,...,0.786,17,42,59,28,13,2,15,23,-6.0
1610612737,22023,ATL,Atlanta Hawks,22301130,2024-04-06,ATL @ DEN,L,240,110,37,...,0.828,11,27,38,30,8,2,16,18,-32.0
1610612737,22023,ATL,Atlanta Hawks,22301124,2024-04-04,ATL @ DAL,L,239,95,34,...,0.667,11,45,56,21,7,2,21,22,-14.0
1610612737,22023,ATL,Atlanta Hawks,22301104,2024-04-03,ATL vs. DET,W,240,121,49,...,0.667,6,42,48,36,9,8,16,13,8.0
1610612737,22023,ATL,Atlanta Hawks,22301091,2024-04-01,ATL @ CHI,W,240,113,41,...,0.667,8,37,45,29,4,5,14,21,12.0
1610612737,22023,ATL,Atlanta Hawks,22301076,2024-03-30,ATL vs. MIL,L,239,113,42,...,0.833,9,30,39,24,5,3,11,26,-9.0
1610612737,22023,ATL,Atlanta Hawks,22301060,2024-03-28,ATL vs. BOS,W,265,123,51,...,0.6,17,36,53,24,5,5,11,17,1.0


In [None]:
#this is likely the endpoint we will use:
from nba_api.stats.endpoints import leaguegamefinder
from sqlalchemy import create_engine
from requests.exceptions import ReadTimeout
import pandas as pd
import random
import time 
import json

#create our engine for pushing to sql database
from dotenv import load_dotenv
import os

load_dotenv()

DATABASE_URL = os.getenv("DATABASE_URL")

engine = create_engine(DATABASE_URL)

#define seasons to gather data for 
seasons = ['2022-23', '2023-24', '2024-25']

for season in seasons:
    try:
        # Get all games for a season
        game_finder = leaguegamefinder.LeagueGameFinder(
            season_nullable=season,  # Format: YYYY-YY
            league_id_nullable='00',    # 00 is NBA
            season_type_nullable='Regular Season'  # or 'Playoffs'
        )
        
        # Convert to DataFrame
        games_df = game_finder.get_data_frames()[0]
        
        #make team ID the index, and sort the dataframe based on ID and gameDate
        games_df = games_df.set_index('TEAM_ID')
        games_df_sorted = games_df.sort_values(['TEAM_ID', 'GAME_DATE'], ascending=[True, False])
    
        #once we've created the dataframe, we want to push that to a SQL database
        filename = f"{season}_historic_game_data"
        games_df_sorted.to_sql(filename, engine, if_exists='replace', index=False)
        print(f"Created and stored dataframe for {season} season")
        print()
        time.sleep(round(random.uniform(3, 6), 1))

    #handle timeout/api rejection errors
    except (ReadTimeout, json.decoder.JSONDecodeError) as e:
        print(f"Error for {season}: {e} - retrying after 60 seconds")
        time.sleep(60)
        continue

print("Finished data gathering for specified seasons")

Created and stored dataframe for 2022-23 season

Created and stored dataframe for 2023-24 season

Created and stored dataframe for 2024-25 season

Finished data gathering for specified seasons


In [51]:
#set display options ot max size 
pd.set_option('display.max_rows', 2460)
pd.set_option('display.max_rows', 55)

games_df_sorted

Unnamed: 0_level_0,SEASON_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
TEAM_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1610612737,22024,ATL,Atlanta Hawks,0022400639,2025-01-25,ATL vs. TOR,L,238,94,33,...,0.680,8,35,43,28,14,5,22,20,-23.0
1610612737,22024,ATL,Atlanta Hawks,0022400623,2025-01-23,ATL vs. TOR,L,240,119,42,...,0.733,9,27,36,30,11,2,14,21,-3.0
1610612737,22024,ATL,Atlanta Hawks,0022400612,2025-01-22,ATL vs. DET,L,241,104,39,...,0.769,15,32,47,25,14,3,13,14,-10.0
1610612737,22024,ATL,Atlanta Hawks,0022400602,2025-01-20,ATL @ NYK,L,239,110,40,...,0.773,16,25,41,23,10,6,23,21,-9.0
1610612737,22024,ATL,Atlanta Hawks,0022400587,2025-01-18,ATL @ BOS,W,265,119,44,...,0.786,13,42,55,27,9,10,17,17,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1610612766,22024,CHA,Charlotte Hornets,0022400132,2024-11-01,CHA vs. BOS,L,241,109,42,...,0.500,12,30,42,20,5,5,14,26,-15.0
1610612766,22024,CHA,Charlotte Hornets,0022400117,2024-10-30,CHA vs. TOR,W,239,138,49,...,0.800,14,24,38,31,9,8,15,24,5.0
1610612766,22024,CHA,Charlotte Hornets,0022400088,2024-10-26,CHA vs. MIA,L,241,106,39,...,0.714,15,31,46,20,6,5,17,24,-8.0
1610612766,22024,CHA,Charlotte Hornets,0022400079,2024-10-25,CHA @ ATL,L,241,120,37,...,0.806,22,31,53,22,7,4,17,28,-5.0
