In [12]:
import pandas as pd 
import numpy as np 

from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import teamgamelog
from nba_api.stats.endpoints import BoxScoreAdvancedV3
from nba_api.stats.endpoints.teamestimatedmetrics import TeamEstimatedMetrics
from nba_api.stats.library.parameters import LeagueID

In [13]:
# Get team ID for the Toronto Raptors
team_dict = teams.get_teams()
raptors = [team for team in team_dict if team['abbreviation'] == 'TOR'][0]
raptors_id = raptors['id']

# Get team ID for matchup
opp_abbreviation = 'BOS'
opponent = [team for team in team_dict if team['abbreviation'] == opp_abbreviation][0]
opp_id = opponent['id']

# Set season
chosen_season='2022-23'

# Set game type
game_type = 'Regular Season'

In [14]:
# Get all of the Raptors games
raptors_game_log = teamgamelog.TeamGameLog(
    team_id=raptors_id,
    season=chosen_season,
    season_type_all_star = game_type,
    league_id_nullable = ''
)

# Access the game log data
raptors_games_data = raptors_game_log.team_game_log.get_dict()['data']

# Create a DataFrame with labeled columns
columns = raptors_game_log.expected_data['TeamGameLog']
raptors_df = pd.DataFrame(raptors_games_data, columns=columns)


# Print the head of the DataFrame
print(raptors_df.head())

      Team_ID     Game_ID     GAME_DATE      MATCHUP WL   W   L  W_PCT  MIN  \
0  1610612761  0022300923  MAR 09, 2024    TOR @ POR  L  23  41  0.359  265   
1  1610612761  0022300905  MAR 07, 2024    TOR @ PHX  L  23  40  0.365  240   
2  1610612761  0022300890  MAR 05, 2024  TOR vs. NOP  L  23  39  0.371  240   
3  1610612761  0022300875  MAR 03, 2024  TOR vs. CHA  W  23  38  0.377  240   
4  1610612761  0022300860  MAR 01, 2024  TOR vs. GSW  L  22  38  0.367  240   

   FGM  ...  FT_PCT  OREB  DREB  REB  AST  STL  BLK  TOV  PF  PTS  
0   43  ...   0.842     9    34   43   29    9    6   20  18  118  
1   43  ...   0.733     9    27   36   30    5    2    9  20  113  
2   36  ...   0.591    11    34   45   21    6    7   11  11   98  
3   42  ...   0.778    11    34   45   27    8    6   14  16  111  
4   41  ...   0.647    11    36   47   31    9    2   14  14  105  

[5 rows x 27 columns]


In [15]:
# List of columns to drop
columns_to_drop = ['Team_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'W', 'L', 'W_PCT', 'MIN']

# Drop the specified columns
raptors_df = raptors_df.drop(columns=columns_to_drop)

# Sort by 
raptors_df = raptors_df.sort_values('Game_ID', ascending=True)

# Rename column to merge later
raptors_df = raptors_df.rename(columns={'Game_ID': 'gameId'})

# Calculate the rolling average for each stat over the previous 5 games
rolling_average_stats = raptors_df.iloc[:, 7:].rolling(window=5, min_periods=1).mean()

# Calculate the overall season average for each stat
season_average_stats = raptors_df.iloc[:, 7:].mean()

# Fill NaN values with the overall season average
rolling_average_stats = rolling_average_stats.fillna(season_average_stats)

# Combine the rolling average stats with the original DataFrame
raptors_df = pd.concat([raptors_df, rolling_average_stats], axis=1)

# Rename the new columns with the 'last 5' prefix
new_column_names = [f'last_5_{col}' for col in rolling_average_stats.columns]
raptors_df.columns = list(raptors_df.columns[:-len(rolling_average_stats.columns)]) + new_column_names

print(raptors_df.head())

        gameId  FGM  FGA  FG_PCT  FG3M  FG3A  FG3_PCT  FTM  FTA  FT_PCT  ...  \
52  0022300031   42   87   0.483    12    33    0.364    9   14   0.643  ...   
50  0022300038   38   75   0.507    14    28    0.500   17   22   0.773  ...   
48  0022300046   44   83   0.530    13    39    0.333   20   27   0.741  ...   
46  0022300054   39  101   0.386    13    36    0.361   12   19   0.632  ...   
63  0022300069   36   90   0.400    14    35    0.400   11   16   0.688  ...   

    last_5_FT_PCT  last_5_OREB  last_5_DREB  last_5_REB  last_5_AST  \
52        0.64300     5.000000        36.00   41.000000       29.00   
50        0.70800     5.000000        34.50   39.500000       29.00   
48        0.71900     7.666667        32.00   39.666667       30.00   
46        0.69725     9.500000        31.75   41.250000       29.25   
63        0.69540     9.200000        33.20   42.400000       28.80   

    last_5_STL  last_5_BLK  last_5_TOV  last_5_PF  last_5_PTS  
52        4.00    1.000000  

In [16]:
# Function to get advanced stats for a specific game
def get_advanced_stats(game_id):
    boxscore_advanced = BoxScoreAdvancedV3(game_id)
    team_stats = boxscore_advanced.team_stats.get_data_frame()
    return team_stats

# Function to iterate through games and create a DataFrame
def create_advanced_stats_dataframe(game_ids):
    team_stats_list = []

    for game_id in game_ids:
        team_stats = get_advanced_stats(game_id)
        team_stats_list.append(team_stats)

    all_team_stats = pd.concat(team_stats_list, ignore_index=True)

    return all_team_stats

# Extract the list of game IDs from the 'Game_ID' column of 'raptors_df'
game_ids_list = raptors_df['gameId'].tolist()

team_adv_stats_df = create_advanced_stats_dataframe(game_ids_list)


In [17]:
print(team_adv_stats_df.head())

       gameId      teamId teamCity teamName teamTricode teamSlug minutes  \
0  0022300031  1610612761  Toronto  Raptors         TOR  raptors  240:00   
1  0022300031  1610612738   Boston  Celtics         BOS  celtics  240:00   
2  0022300038  1610612753  Orlando    Magic         ORL    magic  240:00   
3  0022300038  1610612761  Toronto  Raptors         TOR  raptors  240:00   
4  0022300046  1610612761  Toronto  Raptors         TOR  raptors  240:00   

   estimatedOffensiveRating  offensiveRating  estimatedDefensiveRating  ...  \
0                     103.8            106.1                     109.1  ...   
1                     109.1            110.2                     103.8  ...   
2                     118.0            124.8                     103.2  ...   
3                     103.2            104.9                     118.0  ...   
4                     126.2            130.1                     114.5  ...   

   turnoverRatio  effectiveFieldGoalPercentage  trueShootingPercenta

In [18]:
# Extract the raptors advanced stats
raptors_adv_team_stats = team_adv_stats_df[team_adv_stats_df['teamTricode'] == 'TOR']

# Drop columns
columns_to_drop = ['teamId', 'teamCity', 'teamName', 'teamTricode', 'teamSlug', 'minutes']
raptors_adv_team_stats = raptors_adv_team_stats.drop(columns=columns_to_drop)

# Rename columns
raptors_adv_team_stats = raptors_adv_team_stats.rename(columns=lambda x: 'TOR_' + x if x != 'gameId' else x)
print(raptors_adv_team_stats.head())

       gameId  TOR_estimatedOffensiveRating  TOR_offensiveRating  \
0  0022300031                         103.8                106.1   
3  0022300038                         103.2                104.9   
4  0022300046                         126.2                130.1   
7  0022300054                         101.6                104.0   
8  0022300069                          94.1                 96.0   

   TOR_estimatedDefensiveRating  TOR_defensiveRating  TOR_estimatedNetRating  \
0                         109.1                110.2                    -5.3   
3                         118.0                124.8                   -14.8   
4                         114.5                116.1                    11.7   
7                         112.3                115.0                   -10.6   
8                          87.3                 93.1                     6.8   

   TOR_netRating  TOR_assistPercentage  TOR_assistToTurnover  TOR_assistRatio  \
0           -4.1             

In [19]:
# Extract the opponents advanced stats
opposing_adv_team_stats = team_adv_stats_df[team_adv_stats_df['teamTricode'] != 'TOR']

# Drop columns from 'opposing_team_stats'
columns_to_drop = ['teamId', 'teamCity', 'teamName', 'teamTricode', 'teamSlug', 'minutes']
opposing_adv_team_stats = opposing_adv_team_stats.drop(columns=columns_to_drop)

opposing_adv_team_stats = opposing_adv_team_stats.rename(columns=lambda x: 'OPP_' + x if x != 'gameId' else x)
print(opposing_adv_team_stats.head())

       gameId  OPP_estimatedOffensiveRating  OPP_offensiveRating  \
1  0022300031                         109.1                110.2   
2  0022300038                         118.0                124.8   
5  0022300046                         114.5                116.1   
6  0022300054                         112.3                115.0   
9  0022300069                          87.3                 93.1   

   OPP_estimatedDefensiveRating  OPP_defensiveRating  OPP_estimatedNetRating  \
1                         103.8                106.1                     5.3   
2                         103.2                104.9                    14.8   
5                         126.2                130.1                   -11.7   
6                         101.6                104.0                    10.6   
9                          94.1                 96.0                    -6.8   

   OPP_netRating  OPP_assistPercentage  OPP_assistToTurnover  OPP_assistRatio  \
1            4.1             

In [20]:
# Merge DataFrames based on the common 'gameId' column
combined_adv_stats = pd.merge(raptors_adv_team_stats, opposing_adv_team_stats, on='gameId')
print(combined_adv_stats)


        gameId  TOR_estimatedOffensiveRating  TOR_offensiveRating  \
0   0022300031                         103.8                106.1   
1   0022300038                         103.2                104.9   
2   0022300046                         126.2                130.1   
3   0022300054                         101.6                104.0   
4   0022300069                          94.1                 96.0   
..         ...                           ...                  ...   
59  0022300890                          94.5                 93.3   
60  0022300905                         112.3                114.1   
61  0022300923                         106.9                111.3   
62  0022301207                         103.5                105.1   
63  0022301216                         113.5                117.2   

    TOR_estimatedDefensiveRating  TOR_defensiveRating  TOR_estimatedNetRating  \
0                          109.1                110.2                    -5.3   
1        

In [21]:
game_data = pd.merge(raptors_df, combined_adv_stats, on='gameId')
game_data = game_data.drop(columns='gameId')
print(game_data)

game_data.to_csv('game_data.csv', index=False)


    FGM  FGA  FG_PCT  FG3M  FG3A  FG3_PCT  FTM  FTA  FT_PCT  OREB  ...  \
0    42   87   0.483    12    33    0.364    9   14   0.643     5  ...   
1    38   75   0.507    14    28    0.500   17   22   0.773     5  ...   
2    44   83   0.530    13    39    0.333   20   27   0.741    13  ...   
3    39  101   0.386    13    36    0.361   12   19   0.632    15  ...   
4    36   90   0.400    14    35    0.400   11   16   0.688     8  ...   
..  ...  ...     ...   ...   ...      ...  ...  ...     ...   ...  ...   
59   36   93   0.387    13    44    0.295   13   22   0.591    11  ...   
60   43   94   0.457    16    44    0.364   11   15   0.733     9  ...   
61   43   91   0.473    16    43    0.372   16   19   0.842     9  ...   
62   37   86   0.430    14    37    0.378   15   17   0.882     8  ...   
63   47   93   0.505     6    32    0.188   16   21   0.762    14  ...   

    OPP_turnoverRatio  OPP_effectiveFieldGoalPercentage  \
0                11.2                             0.