In [None]:
#Example of suppress warnings for Numpy version out of range (optional)
import warnings
warnings.filterwarnings("ignore", category=Warning)
warnings.simplefilter(action='ignore', category=FutureWarning)
#Libraries for Data
import json
import requests 
from requests.exceptions import ReadTimeout
import time
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#Specific NBA Libraries
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.endpoints import commonteamroster
from nba_api.stats.static import teams
from nba_api.stats.endpoints import teamestimatedmetrics

In [None]:
#Help with API Timeouts
from nba_api.stats.endpoints import commonplayerinfo
headers = {
    #Put own headers
}

In [None]:
#See every column of df or not
pd.set_option('display.max_columns', None)
#pd.reset_option('display.max_columns')

In [None]:
#Get all teams in NBA
nba_teams = teams.get_teams()
teams = pd.DataFrame(nba_teams)

In [None]:
teams

In [None]:
#Get all players in every teams' roster
rosters = []
for id in teams['id']:
    roster = commonteamroster.CommonTeamRoster(team_id = id)
    rosters.append(roster.get_data_frames()[0])
    time.sleep(1)
all_rosters = pd.concat(rosters, ignore_index=True)

In [None]:
playerCareerStats = {}
failed = []
for pid in all_rosters['PLAYER_ID']:
    try:
        career = playercareerstats.PlayerCareerStats(player_id = pid, headers = headers)
        playerCareerStats[pid] = career.get_data_frames()[0]
    except ReadTimeout:
        print(f"Timed out, appended {pid}")
        failed.append(pid)
    except Exception as e:
        print(f"Other error for {pid}: {e}")
        failed.append(pid)
    time.sleep(1)

In [None]:
#Fetch data for all failed requests until failed list is empty
while len(failed) > 0:
    current = failed.copy()
    failed.clear()  
    for pid in current:
        try:
            career = playercareerstats.PlayerCareerStats(player_id = pid)
            playerCareerStats[pid] = career.get_data_frames()[0]
        except ReadTimeout:
            print(f"Timed out, appended {pid}")
            failed.append(pid)
        except Exception as e:
            print(f"Other error for {pid}: {e}")
            failed.append(pid)
        time.sleep(1)

In [None]:
#Add team name and abbreviation to rosters df
teams_short = teams[['id','full_name','abbreviation']]
teams_short.rename(columns={"id": "TeamID"}, inplace=True)
#Do SQL type join
all_rosters = pd.merge(all_rosters, teams_short, on="TeamID", how="left")

In [None]:
#Hold data in separate csv for record keeping
all_rosters.to_csv('current_rosters.csv')

In [None]:
#Append career data into one big data frame
statsList = []
for value in playerCareerStats.values():
    statsList.append(value)
all_player_stats = pd.concat(statsList, ignore_index = True)

In [None]:
#Add player name to career data
all_player_stats = pd.merge(all_player_stats, all_rosters[['PLAYER', 'PLAYER_ID']], on="PLAYER_ID", how="left")

In [None]:
#Get past three seasons data and current season data and split into two different data frames
all_player_stats_2022_25 = all_player_stats[all_player_stats['SEASON_ID'].isin(['2022-23','2023-24','2024-25'])]
all_player_stats_current = all_player_stats[all_player_stats['SEASON_ID'].isin(['2025-26'])]

In [None]:
#Hold data in separate csv for record keeping
all_player_stats_2022_25.to_csv('player_stats_2022-25.csv')
all_player_stats_current.to_csv('player_stats_current.csv')

In [None]:
#Team Data for the last three seasons
seasons = []
#2022-23
team_metrics_2022 = teamestimatedmetrics.TeamEstimatedMetrics(season = '2022-23')
team_metrics_2022 = team_metrics_2022.get_data_frames()[0]
team_metrics_2022['SEASON'] = '2022-23'
seasons.append(team_metrics_2022)
#2023-24
team_metrics_2023 = teamestimatedmetrics.TeamEstimatedMetrics(season = '2023-24')
team_metrics_2023 = team_metrics_2023.get_data_frames()[0]
team_metrics_2023['SEASON'] = '2023-24'
seasons.append(team_metrics_2023)
#2024-25
team_metrics_2024 = teamestimatedmetrics.TeamEstimatedMetrics(season = '2024-25')
team_metrics_2024 = team_metrics_2024.get_data_frames()[0]
team_metrics_2024['SEASON'] = '2024-25'
seasons.append(team_metrics_2024)
#Aggregate
last_three_seasons_team = pd.concat(seasons, ignore_index = True)

In [None]:
#Team Data for current season
team_metrics_current = teamestimatedmetrics.TeamEstimatedMetrics()
team_metrics_current = team_metrics_current.get_data_frames()[0]

In [None]:
#Put team data into separate csv for record keeping
last_three_seasons_team.to_csv('team_metrics_2022-25.csv')
team_metrics_current.to_csv('team_metrics_current.csv')