In [1]:
import pandas as pd 
import numpy as np 

from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import teamgamelog
from nba_api.stats.endpoints import BoxScoreAdvancedV3
from nba_api.stats.endpoints.teamestimatedmetrics import TeamEstimatedMetrics
from nba_api.stats.library.parameters import LeagueID

In [2]:
# Get team ID for the Toronto Raptors
team_dict = teams.get_teams()
raptors = [team for team in team_dict if team['abbreviation'] == 'TOR'][0]
raptors_id = raptors['id']

# Get team ID for matchup
opp_abbreviation = 'BOS'
opponent = [team for team in team_dict if team['abbreviation'] == opp_abbreviation][0]
opp_id = opponent['id']

# Set season
chosen_season='2023-24'

# Set game type
game_type = 'Regular Season'

In [3]:
# Get all of the Raptors games
raptors_game_log = teamgamelog.TeamGameLog(
    team_id=raptors_id,
    season=chosen_season,
    season_type_all_star = game_type,
    league_id_nullable = ''
)

# Access the game log data
raptors_games_data = raptors_game_log.team_game_log.get_dict()['data']

# Create a DataFrame with labeled columns
columns = raptors_game_log.expected_data['TeamGameLog']
raptors_df = pd.DataFrame(raptors_games_data, columns=columns)


# Print the head of the DataFrame
print(raptors_df.head())

      Team_ID     Game_ID     GAME_DATE      MATCHUP WL   W   L  W_PCT  MIN  \
0  1610612761  0022300923  MAR 09, 2024    TOR @ POR  L  23  41  0.359  265   
1  1610612761  0022300905  MAR 07, 2024    TOR @ PHX  L  23  40  0.365  240   
2  1610612761  0022300890  MAR 05, 2024  TOR vs. NOP  L  23  39  0.371  240   
3  1610612761  0022300875  MAR 03, 2024  TOR vs. CHA  W  23  38  0.377  240   
4  1610612761  0022300860  MAR 01, 2024  TOR vs. GSW  L  22  38  0.367  240   

   FGM  ...  FT_PCT  OREB  DREB  REB  AST  STL  BLK  TOV  PF  PTS  
0   43  ...   0.842     9    34   43   29    9    6   20  18  118  
1   43  ...   0.733     9    27   36   30    5    2    9  20  113  
2   36  ...   0.591    11    34   45   21    6    7   11  11   98  
3   42  ...   0.778    11    34   45   27    8    6   14  16  111  
4   41  ...   0.647    11    36   47   31    9    2   14  14  105  

[5 rows x 27 columns]


In [4]:
# List of columns to drop
columns_to_drop = ['Team_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'W', 'L', 'W_PCT', 'MIN']

# Drop the specified columns
raptors_df = raptors_df.drop(columns=columns_to_drop)

# Sort by 
raptors_df = raptors_df.sort_values('Game_ID', ascending=True)

# Rename column to merge later
raptors_df = raptors_df.rename(columns={'Game_ID': 'gameId'})

# Calculate the rolling average for each stat over the previous 5 games
rolling_average_stats = raptors_df.iloc[:, 7:].rolling(window=5, min_periods=1).mean()

# Calculate the overall season average for each stat
season_average_stats = raptors_df.iloc[:, 7:].mean()

# Fill NaN values with the overall season average
rolling_average_stats = rolling_average_stats.fillna(season_average_stats)

# Combine the rolling average stats with the original DataFrame
raptors_df = pd.concat([raptors_df, rolling_average_stats], axis=1)

# Rename the new columns with the 'last 5' prefix
new_column_names = [f'last_5_{col}' for col in rolling_average_stats.columns]
raptors_df.columns = list(raptors_df.columns[:-len(rolling_average_stats.columns)]) + new_column_names

print(raptors_df.head())

        gameId  FGM  FGA  FG_PCT  FG3M  FG3A  FG3_PCT  FTM  FTA  FT_PCT  ...  \
42  0022301216   47   93   0.505     6    32    0.188   16   21   0.762  ...   
43  0022301207   37   86   0.430    14    37    0.378   15   17   0.882  ...   
0   0022300923   43   91   0.473    16    43    0.372   16   19   0.842  ...   
1   0022300905   43   94   0.457    16    44    0.364   11   15   0.733  ...   
2   0022300890   36   93   0.387    13    44    0.295   13   22   0.591  ...   

    last_5_FT_PCT  last_5_OREB  last_5_DREB  last_5_REB  last_5_AST  \
42       0.762000    14.000000    28.000000   42.000000        32.0   
43       0.822000    11.000000    33.000000   44.000000        30.5   
0        0.828667    10.333333    33.333333   43.666667        30.0   
1        0.804750    10.000000    31.750000   41.750000        30.0   
2        0.762000    10.200000    32.200000   42.400000        28.2   

    last_5_STL  last_5_BLK  last_5_TOV  last_5_PF  last_5_PTS  
42    7.000000    2.000000  

In [5]:
# Function to get advanced stats for a specific game
def get_advanced_stats(game_id):
    boxscore_advanced = BoxScoreAdvancedV3(game_id)
    team_stats = boxscore_advanced.team_stats.get_data_frame()
    return team_stats

# Function to iterate through games and create a DataFrame
def create_advanced_stats_dataframe(game_ids):
    team_stats_list = []

    for game_id in game_ids:
        team_stats = get_advanced_stats(game_id)
        team_stats_list.append(team_stats)

    all_team_stats = pd.concat(team_stats_list, ignore_index=True)

    return all_team_stats

# Extract the list of game IDs from the 'Game_ID' column of 'raptors_df'
game_ids_list = raptors_df['gameId'].tolist()

team_adv_stats_df = create_advanced_stats_dataframe(game_ids_list)


In [6]:
print(team_adv_stats_df.head())

       gameId      teamId   teamCity       teamName teamTricode teamSlug  \
0  0022301216  1610612766  Charlotte        Hornets         CHA  hornets   
1  0022301216  1610612761    Toronto        Raptors         TOR  raptors   
2  0022301207  1610612761    Toronto        Raptors         TOR  raptors   
3  0022301207  1610612748      Miami           Heat         MIA     heat   
4  0022300923  1610612757   Portland  Trail Blazers         POR  blazers   

  minutes  estimatedOffensiveRating  offensiveRating  \
0  240:00                     122.9            121.4   
1  240:00                     113.5            117.2   
2  240:00                     103.5            105.1   
3  240:00                     116.2            114.3   
4  265:00                     120.1            119.6   

   estimatedDefensiveRating  ...  turnoverRatio  effectiveFieldGoalPercentage  \
0                     113.5  ...           16.3                         0.639   
1                     122.9  ...           1

In [7]:
# Extract the raptors advanced stats
raptors_adv_team_stats = team_adv_stats_df[team_adv_stats_df['teamTricode'] == 'TOR']

# Drop columns
columns_to_drop = ['teamId', 'teamCity', 'teamName', 'teamTricode', 'teamSlug', 'minutes']
raptors_adv_team_stats = raptors_adv_team_stats.drop(columns=columns_to_drop)

# Rename columns
raptors_adv_team_stats = raptors_adv_team_stats.rename(columns=lambda x: 'TOR_' + x if x != 'gameId' else x)
print(raptors_adv_team_stats.head())

       gameId  TOR_estimatedOffensiveRating  TOR_offensiveRating  \
1  0022301216                         113.5                117.2   
2  0022301207                         103.5                105.1   
5  0022300923                         106.9                111.3   
7  0022300905                         112.3                114.1   
8  0022300890                          94.5                 93.3   

   TOR_estimatedDefensiveRating  TOR_defensiveRating  TOR_estimatedNetRating  \
1                         122.9                121.4                    -9.5   
2                         116.2                114.3                   -12.7   
5                         120.1                119.6                   -13.2   
7                         119.0                121.2                    -6.7   
8                         133.9                135.0                   -39.3   

   TOR_netRating  TOR_assistPercentage  TOR_assistToTurnover  TOR_assistRatio  \
1           -4.3             

In [8]:
# Extract the opponents advanced stats
opposing_adv_team_stats = team_adv_stats_df[team_adv_stats_df['teamTricode'] != 'TOR']

# Drop columns from 'opposing_team_stats'
columns_to_drop = ['teamId', 'teamCity', 'teamName', 'teamTricode', 'teamSlug', 'minutes']
opposing_adv_team_stats = opposing_adv_team_stats.drop(columns=columns_to_drop)

opposing_adv_team_stats = opposing_adv_team_stats.rename(columns=lambda x: 'OPP_' + x if x != 'gameId' else x)
print(opposing_adv_team_stats.head())

       gameId  OPP_estimatedOffensiveRating  OPP_offensiveRating  \
0  0022301216                         122.9                121.4   
3  0022301207                         116.2                114.3   
4  0022300923                         120.1                119.6   
6  0022300905                         119.0                121.2   
9  0022300890                         133.9                135.0   

   OPP_estimatedDefensiveRating  OPP_defensiveRating  OPP_estimatedNetRating  \
0                         113.5                117.2                     9.5   
3                         103.5                105.1                    12.7   
4                         106.9                111.3                    13.2   
6                         112.3                114.1                     6.7   
9                          94.5                 93.3                    39.3   

   OPP_netRating  OPP_assistPercentage  OPP_assistToTurnover  OPP_assistRatio  \
0            4.3             

In [9]:
# Merge DataFrames based on the common 'gameId' column
combined_adv_stats = pd.merge(raptors_adv_team_stats, opposing_adv_team_stats, on='gameId')
print(combined_adv_stats)


        gameId  TOR_estimatedOffensiveRating  TOR_offensiveRating  \
0   0022301216                         113.5                117.2   
1   0022301207                         103.5                105.1   
2   0022300923                         106.9                111.3   
3   0022300905                         112.3                114.1   
4   0022300890                          94.5                 93.3   
..         ...                           ...                  ...   
59  0022300069                          94.1                 96.0   
60  0022300054                         101.6                104.0   
61  0022300046                         126.2                130.1   
62  0022300038                         103.2                104.9   
63  0022300031                         103.8                106.1   

    TOR_estimatedDefensiveRating  TOR_defensiveRating  TOR_estimatedNetRating  \
0                          122.9                121.4                    -9.5   
1        

In [11]:
game_data = pd.merge(raptors_df, combined_adv_stats, on='gameId')
game_data = game_data.drop(columns='gameId')
print(game_data)

game_data.to_csv('game_data.csv', index=False)


    FGM  FGA  FG_PCT  FG3M  FG3A  FG3_PCT  FTM  FTA  FT_PCT  OREB  ...  \
0    47   93   0.505     6    32    0.188   16   21   0.762    14  ...   
1    37   86   0.430    14    37    0.378   15   17   0.882     8  ...   
2    43   91   0.473    16    43    0.372   16   19   0.842     9  ...   
3    43   94   0.457    16    44    0.364   11   15   0.733     9  ...   
4    36   93   0.387    13    44    0.295   13   22   0.591    11  ...   
..  ...  ...     ...   ...   ...      ...  ...  ...     ...   ...  ...   
59   36   90   0.400    14    35    0.400   11   16   0.688     8  ...   
60   39  101   0.386    13    36    0.361   12   19   0.632    15  ...   
61   44   83   0.530    13    39    0.333   20   27   0.741    13  ...   
62   38   75   0.507    14    28    0.500   17   22   0.773     5  ...   
63   42   87   0.483    12    33    0.364    9   14   0.643     5  ...   

    OPP_turnoverRatio  OPP_effectiveFieldGoalPercentage  \
0                16.3                             0.