### Import

In [4]:
import os
import random
import time
import pandas as pd
from tqdm import tqdm
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import players

# Get all player

In [5]:
active_player = players.get_active_players()

# all_player = players.get_players()
# print(all_player)

# Dictionary to store player id with full name as key
player_ids_by_name = {player['full_name']: player['id'] for player in active_player}
# print(player_ids_by_name)
print(f'Amount of players: {len(player_ids_by_name)}')

Amount of players: 544


### Access all player stats 

In [6]:
# Initialize list to hold each player's career DataFrame
player_df = []

# Limit retries to a maximum of 5 attempts per player
max_attempts = 5 

# Error counter
error_count = 0

# Loop over the player IDs
for full_name, player_id in tqdm(player_ids_by_name.items(), desc="Fetching Player Stats", unit="player"):
    # Debugging output: show which player is being processed
    # print(f"Fetching data for {full_name} (ID: {player_id})")

    success = False
    attempts = 0

    while not success and attempts < max_attempts:    
        try:
            career = playercareerstats.PlayerCareerStats(player_id=player_id)
            career_df = career.get_data_frames()[0]  
            # Add the player name as a column
            career_df['PlayerName'] = full_name  
            player_df.append(career_df)

            # Random waiting time
            waiting_time = random.randint(2, 5)
            time.sleep(waiting_time)

            success = True

        except Exception as e:
            sleep_time = random.randint(30, 35)
            #print(f"Attempt {attempts} failed. Sleeping for {sleep_time} seconds before retrying...")
            time.sleep(sleep_time)


            # print(f"Could not fetch data for {full_name} (ID: {player_id}): {e}")

            # # Convert e to string
            # error_message = str(e)

            # if "timeout=" in error_message:
            #     start_idx = error_message.find("timeout=") + len("timeout=")
            #     end_idx = error_message.find(")", start_idx)
            #     timeoutvalue = int(error_message[start_idx:end_idx])

            #     if timeoutvalue == 0:
            #         sleep_time = random.randint(30, 35)
            #         time.sleep(sleep_time)
            #     else:
            #         # Update timeout
            #         timeout += timeoutvalue
            
            # else:     
            #     # For unexpected errors, break out to avoid infinite loop  
            #     error_count += 1    
            #     print(f"Unexpected error for {full_name} (ID: {player_id}): {e}")
            #     break  

        # Increment attempt count and sleep if not successful
        attempts += 1
        
        # if not success:
        #     sleep_time = random.randint(30, 35)
        #     #print(f"Attempt {attempts} failed. Sleeping for {sleep_time} seconds before retrying...")
        #     time.sleep(sleep_time)


# Concatenate all individual player DataFrames into one large DataFrame
player_df = pd.concat(player_df, ignore_index=True)

Fetching Player Stats:   0%|          | 0/544 [00:00<?, ?player/s]

Fetching Player Stats: 100%|██████████| 544/544 [33:49<00:00,  3.73s/player]
  player_df = pd.concat(player_df, ignore_index=True)


In [None]:
print(player_df.count())
player_df

PLAYER_ID            3369
SEASON_ID            3369
LEAGUE_ID            3369
TEAM_ID              3369
TEAM_ABBREVIATION    3369
PLAYER_AGE           3369
GP                   3369
GS                   3369
MIN                  3369
FGM                  3369
FGA                  3369
FG_PCT               3369
FG3M                 3369
FG3A                 3369
FG3_PCT              3369
FTM                  3369
FTA                  3369
FT_PCT               3369
OREB                 3369
DREB                 3369
REB                  3369
AST                  3369
STL                  3369
BLK                  3369
TOV                  3369
PF                   3369
PTS                  3369
PlayerName           3369
dtype: int64


Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PlayerName
0,1630173,2020-21,00,1610612748,MIA,21.0,61,4,737.0,124,...,73,135,208,29,20,28,43,91,304,Precious Achiuwa
1,1630173,2021-22,00,1610612761,TOR,22.0,73,28,1725.0,265,...,146,327,473,82,37,41,84,151,664,Precious Achiuwa
2,1630173,2022-23,00,1610612761,TOR,23.0,55,12,1141.0,196,...,100,228,328,50,31,30,59,102,508,Precious Achiuwa
3,1630173,2023-24,00,1610612761,TOR,24.0,25,0,437.0,78,...,50,86,136,44,16,12,29,40,193,Precious Achiuwa
4,1630173,2023-24,00,1610612752,NYK,24.0,49,18,1187.0,157,...,141,210,351,53,30,56,54,103,372,Precious Achiuwa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3364,203967,2022-23,00,1610612756,PHX,29.0,37,12,533.0,76,...,39,103,142,57,13,5,36,69,215,Dario Šarić
3365,203967,2022-23,00,1610612760,OKC,29.0,20,0,273.0,51,...,14,51,65,17,7,2,19,30,147,Dario Šarić
3366,203967,2022-23,00,0,TOT,29.0,57,12,806.0,127,...,53,154,207,74,20,7,55,99,362,Dario Šarić
3367,203967,2023-24,00,1610612744,GSW,30.0,64,9,1098.0,181,...,73,210,283,144,31,10,78,112,515,Dario Šarić


### Save as CSV

In [8]:
# Define filename
filename = 'nba_player_log.csv'

# Get the current working directory
cwd = os.getcwd()

# Get the parent directory
parent_dir = os.path.dirname(cwd)

# Join data file path
data_file_path = os.path.join(parent_dir, 'data')

# Get a list of all ifc files from ifc file path
all_files = os.listdir(data_file_path)
print(f'all data files: {all_files}')
      
# Define full path including filename
file_path = os.path.join(data_file_path, filename)
print(f'new file path: {file_path}')

# Write the DataFrame to the file
player_df.to_csv(file_path, index=False)

all data files: ['nba_game_log_2019-2021.csv', 'nba_game_log_2022-2024.csv', 'nba_game_log_2025.csv', 'nba_historical_games.csv', 'preprocessed_nba_game_log.csv']
new file path: c:\Users\Markus\Documents\Git\GitHub\SportBet\data\nba_player_log.csv
