In [1]:
from nba_api.stats.endpoints import leaguegamelog, boxscoretraditionalv2
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

YEAR = 2023

COUNTER = 1
DIRECTION = "ASC"
LEAGUE = "00"
PLAYER_OR_TEAM = "T"
SEASON_TYPE = "Regular Season"
SORTER = "DATE"

def fetch_season_game_logs(season):
    # Create LeagueGameLog object instance for the season
    gamelog = leaguegamelog.LeagueGameLog(
        COUNTER, DIRECTION, LEAGUE, PLAYER_OR_TEAM, season, SEASON_TYPE, SORTER
    )
    
    # Execute request and fetch data
    data = gamelog.get_data_frames()[0] 
    
    # Filter columns and add Home/Away column
    columns_to_keep = ["TEAM_ID","TEAM_NAME","GAME_ID","MATCHUP","WL", "GAME_DATE"]
    filtered_data = data[columns_to_keep].copy()
    filtered_data['HOME/AWAY'] = filtered_data['MATCHUP'].apply(lambda x: 0 if '@' in x else 1)
    filtered_data['TEAM_2_HOME/AWAY'] = 1 - filtered_data['HOME/AWAY']
    
    return filtered_data

def fetch_multiple_seasons(start_year, end_year):
    all_seasons_data = []  # List to store data for all seasons
    
    # Loop through each season from start_year to end_year (inclusive)
    for year in range(start_year, end_year + 1):
        season = f"{year}-{str(year + 1)[-2:]}"  # Format season string (e.g., '2010-11')
        season_data = fetch_season_game_logs(season)
        all_seasons_data.append(season_data)
    
    # Concatenate all seasons' data into a single DataFrame
    combined_data = pd.concat(all_seasons_data, ignore_index=True)
    combined_data = combined_data.sort_values(by='GAME_ID', ascending=True)
    combined_data = combined_data.reset_index(drop=True)

    team_2_id = []
    team_2_name = []

    # Add new columns for team IDs and names from adjacent rows
    for i in range(len(combined_data)):
        if i % 2 == 0:  # Every even row
            team_2_id.append(combined_data.loc[i+1, 'TEAM_ID'])
            team_2_name.append(combined_data.loc[i+1, 'TEAM_NAME'])
        else:  # Every odd row
            team_2_id.append(combined_data.loc[i-1, 'TEAM_ID'])
            team_2_name.append(combined_data.loc[i-1, 'TEAM_NAME'])
    
    # Add new columns to the DataFrame
    combined_data['TEAM_2_ID'] = team_2_id
    combined_data['TEAM_2_NAME'] = team_2_name
    
    columns_order = ["GAME_ID", "GAME_DATE", "MATCHUP", "TEAM_NAME", "TEAM_ID", "TEAM_2_NAME", "TEAM_2_ID", "WL", "HOME/AWAY", "TEAM_2_HOME/AWAY"]
    combined_data = combined_data[columns_order]
    
    return combined_data

def fetch_player_stats(game_id):
    try:
        box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
        player_stats = box_score.player_stats.get_data_frame()
        player_stats['GAME_ID'] = game_id
        return player_stats
    except Exception as e:
        print(f"An error occurred while fetching data for GAME_ID {game_id}: {e}")
        return pd.DataFrame()

# Fetch game logs for the 2023 season
df = fetch_multiple_seasons(2023, 2023)

# Subset the dataframe to include only the first ten rows
df_subset = df.head(10)

# List to store player stats dataframes
all_player_stats = []

# Using ThreadPoolExecutor for parallel requests
start_time = time.time()
with ThreadPoolExecutor(max_workers=10) as executor:
    # Create a list of future tasks for the first ten rows
    futures = [executor.submit(fetch_player_stats, game_id) for game_id in df_subset['GAME_ID']]
    
    for future in as_completed(futures):
        try:
            player_stats = future.result()
            if not player_stats.empty:
                all_player_stats.append(player_stats)
        except Exception as e:
            print(f"An error occurred: {e}")

# Concatenate all player stats into a single dataframe
if all_player_stats:
    player_stats_df = pd.concat(all_player_stats, ignore_index=True)
else:
    player_stats_df = pd.DataFrame()

# Display the player statistics dataframe
print(player_stats_df)

# Example to merge player stats with the original dataframe based on GAME_ID
if not player_stats_df.empty:
    df_merged = df_subset.merge(player_stats_df, on='GAME_ID', how='left')
    # Display the merged dataframe
    print(df_merged)
else:
    print("No player statistics were fetched.")
end_time = time.time()
print(f"Time taken: {end_time - start_time} seconds")


ReadTimeout: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)