# === Import Libraries ===

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import time # TIME PROCESSING
import os # LOADING & EXPORTING DATA
import datetime
from kaggle_secrets import UserSecretsClient

!pip install nba_api
from nba_api.stats.static import players, teams
from nba_api.stats.endpoints import teamgamelog, playergamelog, commonplayoffseries, commonteamroster

Collecting nba_api


  Downloading nba_api-1.10.0-py3-none-any.whl.metadata (5.8 kB)






Downloading nba_api-1.10.0-py3-none-any.whl (285 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/285.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m204.8/285.3 kB[0m [31m6.0 MB/s[0m eta [36m0:00:01[0m

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m285.3/285.3 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h

Installing collected packages: nba_api


Successfully installed nba_api-1.10.0


# === Set up Kaggle credentials ===

In [2]:
KAGGLE_DIR = '/root/.kaggle'
if not os.path.exists(KAGGLE_DIR):
    os.mkdir(KAGGLE_DIR)

user_secrets = UserSecretsClient()
secret_value_key = user_secrets.get_secret("kaggle_key")
secret_value_user = user_secrets.get_secret("kaggle_username")

with open('/root/.kaggle/kaggle.json', 'w') as fid:
    fid.writelines(f'{{"username":"{secret_value_user}","key":"{secret_value_key}"}}')

!chmod 600 /root/.kaggle/kaggle.json

# === Download and Save Static Data (Teams and Players) ===

In [3]:
dict_players = players.get_players()
df_players = pd.DataFrame(dict_players)
df_players.to_csv('/kaggle/working/NBA_PLAYERS.csv', index=False)

dict_teams = teams.get_teams()
df_teams = pd.DataFrame(dict_teams)
df_teams.to_csv('/kaggle/working/NBA_TEAMS.csv', index=False)

# === Load existing NBA games dataset ===

In [4]:
nba_games = r'/kaggle/input/nba-data/NBA_GAMES.csv'
df_games = pd.read_csv(nba_games)
# Convert GAME_DATE to datetime for easier comparison and filtering
df_games['GAME_DATE_REAL'] = pd.to_datetime(df_games['GAME_DATE'])
df_games.sample(5)

  df_games['GAME_DATE_REAL'] = pd.to_datetime(df_games['GAME_DATE'])


Unnamed: 0,Team_ID,Game_ID,GAME_DATE,MATCHUP,WL,W,L,W_PCT,MIN,FGM,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,GAME_DATE_REAL,DATE_ADDED
2000,1610612752,22400797,"FEB 21, 2025",NYK @ CLE,L,37.0,19.0,0.661,240,42,...,23,32,25,8,3,16,12,105,2025-02-21,2025-05-04
1956,1610612751,22400284,"NOV 25, 2024",BKN @ GSW,W,8.0,10.0,0.444,240,41,...,35,42,23,6,3,11,18,128,2024-11-25,2025-05-04
1284,1610612743,22400484,"JAN 04, 2025",DEN @ SAS,W,20.0,14.0,0.588,265,47,...,44,58,32,11,7,7,17,122,2025-01-04,2025-05-04
447,1610612752,42400304,"MAY 27, 2025",NYK @ IND,L,1.0,3.0,0.25,240,38,...,31,44,17,7,4,17,27,121,2025-05-27,2025-05-30
17,1610612748,42400104,"APR 28, 2025",MIA vs. CLE,L,0.0,4.0,0.0,240,32,...,30,45,17,4,2,19,18,83,2025-04-28,2025-06-24


# === Get the latest game date for each team ===

In [5]:
# Step 1: Get the latest game date for each team
list_team_max_date = df_games[['Team_ID','GAME_DATE_REAL']]\
                        .groupby('Team_ID')\
                        .max()\
                        .sort_values('GAME_DATE_REAL',ascending = False)\
                        .reset_index()
# Create Data for API
list_team_max_date['GAME_DATE_FORMAT'] = list_team_max_date['GAME_DATE_REAL'].dt.strftime("%m/%d/%Y")

# Show Data 
list_team_max_date.sample(5)

Unnamed: 0,Team_ID,GAME_DATE_REAL,GAME_DATE_FORMAT
14,1610612748,2025-04-28,04/28/2025
23,1610612756,2025-04-13,04/13/2025
20,1610612759,2025-04-13,04/13/2025
17,1610612758,2025-04-13,04/13/2025
13,1610612753,2025-04-29,04/29/2025


# === Fetch new playoff game logs per team ===

In [6]:
# Step 2: Download new game logs (e.g., Playoffs) after the latest known date per team
df_games_new = pd.DataFrame()

for idx, row in list_team_max_date.iterrows():
    team_id = row['Team_ID']
    team_game_max_dt = row['GAME_DATE_FORMAT']
    
    # Query new game logs for each team starting from the latest known date
    # Extract DataFrame from API response
    df_temp_team_game_new = teamgamelog.TeamGameLog(
        team_id = team_id
        , season='2024-25'
        , season_type_all_star = 'Playoffs' #'Regular Season|Playoffs'
        , date_from_nullable =  team_game_max_dt #'05/07/2025'
    ).get_data_frames()[0]

    if not df_temp_team_game_new.empty:
        # Append new game logs to the cumulative DataFrame
        df_games_new = pd.concat([df_temp_team_game_new,df_games_new], ignore_index=True)
        print('Done - ' ,df_teams[df_teams['id'] == team_id]['full_name'].values[0])

    time.sleep(15)

Done -  Oklahoma City Thunder


Done -  Indiana Pacers


Done -  New York Knicks


Done -  Minnesota Timberwolves


Done -  Denver Nuggets


Done -  Boston Celtics


Done -  Golden State Warriors


Done -  Cleveland Cavaliers


Done -  Houston Rockets


Done -  Los Angeles Clippers


Done -  Detroit Pistons


Done -  Los Angeles Lakers


Done -  Milwaukee Bucks


Done -  Orlando Magic


Done -  Miami Heat


Done -  Memphis Grizzlies


# Add date and merge with historical games

In [7]:
str_today_date = datetime.date.today().strftime('%Y-%m-%d')

# Step 3: Parse new GAME_DATE values into datetime
df_games_new['GAME_DATE_REAL'] = pd.to_datetime(df_games_new['GAME_DATE'])
df_games_new['DATE_ADDED'] = str_today_date


df_games_full = pd.concat([df_games_new,df_games], ignore_index= True).reset_index(drop=True)
df_games_full.to_csv('/kaggle/working/NBA_GAMES.csv', index=False)

  df_games_new['GAME_DATE_REAL'] = pd.to_datetime(df_games_new['GAME_DATE'])


# === Load existing player games data ===

In [8]:
# READ OLD DATA
PLAYER_GAMES_FILE = '/kaggle/input/nba-data/NBA_PLAYER_GAMES.csv'
df_player_games = pd.read_csv(PLAYER_GAMES_FILE)
#"Apr 28, 2025
df_player_games['GAME_DATE_REAL'] = pd.to_datetime(
    df_player_games['GAME_DATE'], format='%b %d, %Y'
)
display(df_player_games.sample())

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,GAME_DATE_REAL,DATE_ADDED
22787,22024,1630283,22401177,"Apr 11, 2025",NOP vs. MIA,L,21,2,6,0.333,...,1,1,0,0,2,5,-12,2,2025-04-11,2025-05-04


# === Get latest game date per player ===

In [9]:
df_player_games_max = df_player_games[['Player_ID','GAME_DATE_REAL']]\
                        .groupby('Player_ID')\
                        .max()\
                        .sort_values('GAME_DATE_REAL', ascending = False)\
                        .reset_index()

df_player_games_max['GAME_DATE_FORMAT'] = df_player_games_max['GAME_DATE_REAL'].dt.strftime("%m/%d/%Y")
df_player_games_max

Unnamed: 0,Player_ID,GAME_DATE_REAL,GAME_DATE_FORMAT
0,1642277,2025-06-22,06/22/2025
1,1630169,2025-06-22,06/22/2025
2,1628983,2025-06-22,06/22/2025
3,1641717,2025-06-22,06/22/2025
4,1628396,2025-06-22,06/22/2025
...,...,...,...
561,1642434,2024-11-06,11/06/2024
562,1630543,2024-11-01,11/01/2024
563,1630618,2024-11-01,11/01/2024
564,1641707,2024-10-28,10/28/2024


# === Get teams from playoff bracket and fetch player rosters ===

In [10]:
df_playoff = commonplayoffseries.CommonPlayoffSeries().get_data_frames()[0]

df_playoff_teams = pd.concat(
    [
        df_playoff['HOME_TEAM_ID']
        , df_playoff['VISITOR_TEAM_ID']

    ]
    , ignore_index = True
    )\
    .drop_duplicates()\
    .reset_index(drop=True)

df_playoff_players = pd.DataFrame()

for team in df_playoff_teams:
    temp_players = commonteamroster.CommonTeamRoster(team_id = team).get_data_frames()[0][['TeamID', 'PLAYER_ID']]
    df_playoff_players = pd.concat([temp_players,df_playoff_players])
    time.sleep(15)

# === Get new player game logs ===

In [11]:
df_new_player_df = pd.DataFrame()

for player_id in df_playoff_players['PLAYER_ID']:
    row = df_player_games_max[df_player_games_max['Player_ID'] == player_id]
    try:
        if row.empty:
            continue
        temp_player_id = row['Player_ID'].values[0]
        temp_player_GAME_DATE_FORMAT = row['GAME_DATE_FORMAT'].values[0]
            
        temp_player_name = df_players[df_players['id'] == temp_player_id]['full_name'].values[0]
    
        
        temp_player_df = playergamelog.PlayerGameLog(
                                    player_id=temp_player_id
                                       , season='2024-25'
                                       , season_type_all_star = 'Playoffs'
                                       , date_from_nullable = temp_player_GAME_DATE_FORMAT
                                   ).get_data_frames()[0]
    
        if not temp_player_df.empty:
            # Append new game logs to the cumulative DataFrame
            df_new_player_df = pd.concat([temp_player_df,df_new_player_df], ignore_index=True)
            print(f'Data Loaded for - {temp_player_name}')
        time.sleep(15)
    except:
        print('FAILED -----', row)

# Step 3: Parse new GAME_DATE values into datetime
df_new_player_df['GAME_DATE_REAL'] = pd.to_datetime(df_new_player_df['GAME_DATE'])
df_new_player_df['DATE_ADDED'] = str_today_date


df_player_games_full = pd.concat([df_new_player_df,df_player_games], ignore_index= True).reset_index(drop=True)
df_player_games_full.to_csv('/kaggle/working/NBA_PLAYER_GAMES.csv', index=False)

Data Loaded for - James Harden


Data Loaded for - Kawhi Leonard


Data Loaded for - Kris Dunn


Data Loaded for - Bogdan Bogdanović


Data Loaded for - Jordan Miller


Data Loaded for - Cam Christie


Data Loaded for - Drew Eubanks


Data Loaded for - Kobe Brown


Data Loaded for - Norman Powell


Data Loaded for - Ben Simmons


Data Loaded for - Nicolas Batum


Data Loaded for - Ivica Zubac


Data Loaded for - Derrick Jones Jr.


Data Loaded for - Patty Mills


Data Loaded for - Christian Braun


Data Loaded for - Michael Porter Jr.


Data Loaded for - Julian Strawther


Data Loaded for - Russell Westbrook


Data Loaded for - Hunter Tyson


Data Loaded for - DeAndre Jordan


Data Loaded for - Peyton Watson


Data Loaded for - Nikola Jokić


Data Loaded for - Zeke Nnaji


Data Loaded for - Jalen Pickett


Data Loaded for - Jamal Murray


Data Loaded for - Vlatko Čančar


Data Loaded for - Aaron Gordon


Data Loaded for - Terrence Shannon Jr.


Data Loaded for - Donte DiVincenzo


Data Loaded for - Jaden McDaniels


Data Loaded for - Rob Dillingham


Data Loaded for - Anthony Edwards


Data Loaded for - Josh Minott


Data Loaded for - Nickeil Alexander-Walker


Data Loaded for - Mike Conley


Data Loaded for - Naz Reid


Data Loaded for - Jaylen Clark


Data Loaded for - Rudy Gobert


Data Loaded for - Julius Randle


Data Loaded for - Leonard Miller


Data Loaded for - Luka Garza


Data Loaded for - Jarred Vanderbilt


Data Loaded for - Dalton Knecht


Data Loaded for - Gabe Vincent


Data Loaded for - Bronny James


Data Loaded for - Jaxson Hayes


Data Loaded for - Maxi Kleber


Data Loaded for - Austin Reaves


Data Loaded for - Dorian Finney-Smith


Data Loaded for - Shake Milton


Data Loaded for - LeBron James


Data Loaded for - Alex Len


Data Loaded for - Rui Hachimura


Data Loaded for - Jordan Goodwin


Data Loaded for - Luka Dončić


Data Loaded for - Gary Payton II


Data Loaded for - Jonathan Kuminga


Data Loaded for - Brandin Podziemski


Data Loaded for - Moses Moody


Data Loaded for - Kevon Looney


Data Loaded for - Buddy Hield


Data Loaded for - Jimmy Butler III


Data Loaded for - Braxton Key


Data Loaded for - Gui Santos


Data Loaded for - Quinten Post


Data Loaded for - Draymond Green


Data Loaded for - Stephen Curry


Data Loaded for - Kevin Knox II


Data Loaded for - Trayce Jackson-Davis


Data Loaded for - Pat Spencer


Data Loaded for - Aaron Holiday


Data Loaded for - Amen Thompson


Data Loaded for - Jock Landale


Data Loaded for - Jalen Green


Data Loaded for - Fred VanVleet


Data Loaded for - Cam Whitmore


Data Loaded for - Dillon Brooks


Data Loaded for - Jabari Smith Jr.


Data Loaded for - Steven Adams


Data Loaded for - Reed Sheppard


Data Loaded for - Tari Eason


Data Loaded for - Nate Williams


Data Loaded for - Alperen Sengun


Data Loaded for - Jeff Green


Data Loaded for - Scotty Pippen Jr.


Data Loaded for - Kentavious Caldwell-Pope


Data Loaded for - Vince Williams Jr.


Data Loaded for - Santi Aldama


Data Loaded for - Lamar Stevens


Data Loaded for - Luke Kennard


Data Loaded for - Ja Morant


Data Loaded for - Jaren Jackson Jr.


Data Loaded for - Zach Edey


Data Loaded for - Jay Huff


Data Loaded for - Marvin Bagley III


Data Loaded for - John Konchar


Data Loaded for - Cole Anthony


Data Loaded for - Shai Gilgeous-Alexander


Data Loaded for - Dillon Jones


Data Loaded for - Luguentz Dort


Data Loaded for - Jaylin Williams


Data Loaded for - Chet Holmgren


Data Loaded for - Jalen Williams


Data Loaded for - Alex Caruso


Data Loaded for - Isaiah Joe


Data Loaded for - Ousmane Dieng


Data Loaded for - Aaron Wiggins


Data Loaded for - Cason Wallace


Data Loaded for - Ajay Mitchell


Data Loaded for - Kenrich Williams


Data Loaded for - Isaiah Hartenstein


Data Loaded for - Damian Lillard


Data Loaded for - Jericho Sims


Data Loaded for - Kevin Porter Jr.


Data Loaded for - Gary Trent Jr.


Data Loaded for - Chris Livingston


Data Loaded for - Bobby Portis


Data Loaded for - Brook Lopez


Data Loaded for - Taurean Prince


Data Loaded for - Ryan Rollins


Data Loaded for - Kyle Kuzma


Data Loaded for - AJ Green


Data Loaded for - Pat Connaughton


Data Loaded for - Giannis Antetokounmpo


Data Loaded for - Andre Jackson Jr.


Data Loaded for - Tyrese Haliburton


Data Loaded for - Bennedict Mathurin


Data Loaded for - Obi Toppin


Data Loaded for - Andrew Nembhard


Data Loaded for - Thomas Bryant


Data Loaded for - Jarace Walker


Data Loaded for - T.J. McConnell


Data Loaded for - Johnny Furphy


Data Loaded for - Tony Bradley


Data Loaded for - James Johnson


Data Loaded for - Aaron Nesmith


Data Loaded for - Ben Sheppard


Data Loaded for - Myles Turner


Data Loaded for - Pascal Siakam


Data Loaded for - Jalen Duren


Data Loaded for - Ronald Holland II


Data Loaded for - Cade Cunningham


Data Loaded for - Malik Beasley


Data Loaded for - Paul Reed


Data Loaded for - Tim Hardaway Jr.


Data Loaded for - Ausar Thompson


Data Loaded for - Tobias Harris


Data Loaded for - Dennis Schröder


Data Loaded for - Isaiah Stewart


Data Loaded for - Delon Wright


Data Loaded for - P.J. Tucker


Data Loaded for - Cameron Payne


Data Loaded for - Miles McBride


Data Loaded for - Josh Hart


Data Loaded for - Pacome Dadiet


Data Loaded for - Precious Achiuwa


Data Loaded for - OG Anunoby


Data Loaded for - Jalen Brunson


Data Loaded for - Tyler Kolek


Data Loaded for - Mitchell Robinson


Data Loaded for - Mikal Bridges


Data Loaded for - Karl-Anthony Towns


Data Loaded for - Landry Shamet


Data Loaded for - Ariel Hukporti


Data Loaded for - Anthony Black


Data Loaded for - Jonathan Isaac


Data Loaded for - Caleb Houstan


Data Loaded for - Desmond Bane


Data Loaded for - Paolo Banchero


Data Loaded for - Cory Joseph


Data Loaded for - Jett Howard


Data Loaded for - Gary Harris


Data Loaded for - Franz Wagner


Data Loaded for - Tristan da Silva


Data Loaded for - Wendell Carter Jr.


Data Loaded for - Goga Bitadze


Data Loaded for - Jayson Tatum


Data Loaded for - Jrue Holiday


Data Loaded for - Jaylen Brown


Data Loaded for - Kristaps Porziņģis


Data Loaded for - Derrick White


Data Loaded for - Payton Pritchard


Data Loaded for - Torrey Craig


Data Loaded for - JD Davison


Data Loaded for - Xavier Tillman


Data Loaded for - Jordan Walsh


Data Loaded for - Sam Hauser


Data Loaded for - Luke Kornet


Data Loaded for - Al Horford


Data Loaded for - Baylor Scheierman


Data Loaded for - Neemias Queta


Data Loaded for - Nikola Jović


Data Loaded for - Kel'el Ware


Data Loaded for - Pelle Larsson


Data Loaded for - Jaime Jaquez Jr.


Data Loaded for - Bam Adebayo


Data Loaded for - Tyler Herro


Data Loaded for - Keshad Johnson


Data Loaded for - Alec Burks


Data Loaded for - Kyle Anderson


Data Loaded for - Andrew Wiggins


Data Loaded for - Haywood Highsmith


Data Loaded for - Davion Mitchell


Data Loaded for - Duncan Robinson


Data Loaded for - Max Strus


Data Loaded for - Ty Jerome


Data Loaded for - Evan Mobley


Data Loaded for - Sam Merrill


Data Loaded for - Javonte Green


Data Loaded for - Craig Porter Jr.


Data Loaded for - Darius Garland


Data Loaded for - De'Andre Hunter


Data Loaded for - Tristan Thompson


Data Loaded for - Chuma Okeke


Data Loaded for - Jaylon Tyson


Data Loaded for - Jarrett Allen


Data Loaded for - Dean Wade


Data Loaded for - Isaac Okoro


FAILED -----     Player_ID GAME_DATE_REAL GAME_DATE_FORMAT
97    1628378     2025-05-13       05/13/2025


  df_new_player_df['GAME_DATE_REAL'] = pd.to_datetime(df_new_player_df['GAME_DATE'])


# === Create metadata for Kaggle Dataset Versioning ===

In [None]:
with open('/kaggle/working/dataset-metadata.json', 'w') as json_fid:
    json_fid.write('{\n  "title": "Update NBA_DATA",\n  "id": "salikhussaini49/nba-data",\n  "licenses": [{"name": "CC0-1.0"}]}')

!kaggle datasets version -m new