# Training Module

## Import Libraries

In [29]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.endpoints import commonallplayers
from nba_api.stats.endpoints import playergamelogs
from unidecode import unidecode

## Load Data

In [126]:
# Grab the active players from the 2024-25 season
all_players_2024 = commonallplayers.CommonAllPlayers(season='2024-25').get_data_frames()[0]
active_players = all_players_2024[all_players_2024['ROSTERSTATUS'] == 1]
active_player_ids = list(active_players['PERSON_ID'])


In [132]:
active_players.head()

Unnamed: 0,PERSON_ID,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FIRST_LAST,ROSTERSTATUS,FROM_YEAR,TO_YEAR,PLAYERCODE,PLAYER_SLUG,TEAM_ID,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CODE,TEAM_SLUG,GAMES_PLAYED_FLAG,OTHERLEAGUE_EXPERIENCE_CH
10,1630173,"Achiuwa, Precious",Precious Achiuwa,1,2020,2024,precious_achiuwa,precious_achiuwa,1610612752,New York,Knicks,NYK,knicks,knicks,Y,0
22,203500,"Adams, Steven",Steven Adams,1,2013,2024,steven_adams,steven_adams,1610612745,Houston,Rockets,HOU,rockets,rockets,Y,0
24,1628389,"Adebayo, Bam",Bam Adebayo,1,2017,2024,bam_adebayo,bam_adebayo,1610612748,Miami,Heat,MIA,heat,heat,Y,0
29,1630534,"Agbaji, Ochai",Ochai Agbaji,1,2022,2024,ochai_agbaji,ochai_agbaji,1610612761,Toronto,Raptors,TOR,raptors,raptors,Y,0
41,1630583,"Aldama, Santi",Santi Aldama,1,2021,2024,santi_aldama,santi_aldama,1610612763,Memphis,Grizzlies,MEM,grizzlies,grizzlies,Y,1


In [127]:
def create_active_player_stats(player_ids):
    active_player_stats = {'PLAYER_ID': [], 'FG_PCT': [], 'FT_PCT': [], '3PM': [], 'PTS': [], 'REB': [], 'AST': [], 'STL': [], 'BLK': [], 'TOV': []}
    for player_id in player_ids:
        career = playercareerstats.PlayerCareerStats(player_id=player_id).get_data_frames()[0]
        career = career[career['SEASON_ID'].isin(['2022-23', '2023-24'])]
        # Aggregate the stats for the player
        active_player_stats['PLAYER_ID'].append(player_id)
        active_player_stats['FG_PCT'].append(career['FG_PCT'].mean())
        active_player_stats['FT_PCT'].append(career['FT_PCT'].mean())
        active_player_stats['3PM'].append(career['FG3M'].mean())
        active_player_stats['PTS'].append(career['PTS'].mean())
        active_player_stats['REB'].append(career['REB'].mean())
        active_player_stats['AST'].append(career['AST'].mean())
        active_player_stats['STL'].append(career['STL'].mean())
        active_player_stats['BLK'].append(career['BLK'].mean())
        active_player_stats['TOV'].append(career['TOV'].mean())
    return pd.DataFrame(active_player_stats)

In [128]:
active_player_stats = create_active_player_stats(active_player_ids)

In [133]:
active_player_stats['PLAYER_NAME'] = active_player_stats['PLAYER_ID'].apply(lambda x: active_players[active_players['PERSON_ID'] == x]['DISPLAY_FIRST_LAST'].values[0])

In [136]:
# Move the last column to the front
cols = list(active_player_stats)
cols.insert(0, cols.pop(cols.index('PLAYER_NAME')))
active_player_stats = active_player_stats.loc[:, cols]

In [138]:
active_player_stats.to_csv('data/active_player_stats.csv', index=False)

## 9-Cat Ranking

In [30]:
active_player_stats = pd.read_csv('../data/active_player_stats.csv')
active_player_stats.head()

Unnamed: 0,PLAYER_NAME,PLAYER_ID,FG_PCT,FT_PCT,3PM,PTS,REB,AST,STL,BLK,TOV
0,Precious Achiuwa,1630173,0.4925,0.633,20.25,409.5,325.5,61.0,30.75,41.5,56.25
1,Steven Adams,203500,0.597,0.364,0.0,361.0,485.0,97.0,36.0,46.0,79.0
2,Bam Adebayo,1628389,0.5305,0.7805,8.0,1448.0,712.5,259.0,84.5,63.5,174.5
3,Ochai Agbaji,1630534,0.41375,0.7085,51.25,344.25,138.25,58.25,27.5,25.75,42.25
4,Santi Aldama,1630583,0.4525,0.6855,100.0,675.0,361.5,117.5,44.0,51.0,64.5


In [31]:
def gen_z_score(row, pop_stats):
    """Compute the z-score for a given player"""
    z_score = 0
    for col in pop_stats.keys():
        z_score += (row[col] - pop_stats[col]['mean']) / pop_stats[col]['sd']
    return z_score

    

In [32]:
def gen_agg_z_score(player_stats):
    # Cat 1: Points
    player_ranked_by_pts = player_stats.sort_values(by='PTS', ascending=False).iloc[:200]
    pop_mean_pts = player_ranked_by_pts['PTS'].mean()
    pop_sd_pts = player_ranked_by_pts['PTS'].std()
    # Cat 2: FG %
    player_ranked_by_fg_pct = player_stats.sort_values(by='FG_PCT', ascending=False).iloc[:200]
    pop_mean_fg_pct = player_ranked_by_fg_pct['FG_PCT'].mean()
    pop_sd_fg_pct = player_ranked_by_fg_pct['FG_PCT'].std()
    # Cat 3: FT %
    player_ranked_by_ft_pct = player_stats.sort_values(by='FT_PCT', ascending=False).iloc[:200]
    pop_mean_ft_pct = player_ranked_by_ft_pct['FT_PCT'].mean()
    pop_sd_ft_pct = player_ranked_by_ft_pct['FT_PCT'].std()
    # Cat 4: Threes
    player_ranked_by_threes = player_stats.sort_values(by='3PM', ascending=False).iloc[:200]
    pop_mean_threes = player_ranked_by_threes['3PM'].mean()
    pop_sd_threes = player_ranked_by_threes['3PM'].std()
    # Cat 5: Assists
    player_ranked_by_asts = player_stats.sort_values(by='AST', ascending=False).iloc[:200]
    pop_mean_asts = player_ranked_by_asts['AST'].mean()
    pop_sd_asts = player_ranked_by_asts['AST'].std()
    # Cat 6: Rebounds
    player_ranked_by_rebs = player_stats.sort_values(by='REB', ascending=False).iloc[:200]
    pop_mean_rebs = player_ranked_by_rebs['REB'].mean()
    pop_sd_rebs = player_ranked_by_rebs['REB'].std()
    # Cat 7: Steals
    player_ranked_by_stls = player_stats.sort_values(by='STL', ascending=False).iloc[:200]
    pop_mean_stls = player_ranked_by_stls['STL'].mean()
    pop_sd_stls = player_ranked_by_stls['STL'].std()
    # Cat 8: Blocks
    player_ranked_by_blks = player_stats.sort_values(by='BLK', ascending=False).iloc[:200]
    pop_mean_blks = player_ranked_by_blks['BLK'].mean()
    pop_sd_blks = player_ranked_by_blks['BLK'].std()
    # Cat 9: TOs
    player_ranked_by_tovs = player_stats.sort_values(by='TOV', ascending=False).iloc[:200]
    pop_mean_tovs = player_ranked_by_tovs['TOV'].mean()
    pop_sd_tovs = player_ranked_by_tovs['TOV'].std()
    
    # Add the population stats to a dictionary
    pop_stats = {'PTS': {'mean': pop_mean_pts, 'sd': pop_sd_pts},
                 'FG_PCT': {'mean': pop_mean_fg_pct, 'sd': pop_sd_fg_pct},
                 'FT_PCT': {'mean': pop_mean_ft_pct, 'sd': pop_sd_ft_pct},
                 '3PM': {'mean': pop_mean_threes, 'sd': pop_sd_threes},
                 'AST': {'mean': pop_mean_asts, 'sd': pop_sd_asts},
                 'REB': {'mean': pop_mean_rebs, 'sd': pop_sd_rebs},
                 'STL': {'mean': pop_mean_stls, 'sd': pop_sd_stls},
                 'BLK': {'mean': pop_mean_blks, 'sd': pop_sd_blks},
                 'TOV': {'mean': pop_mean_tovs, 'sd': pop_sd_tovs}}
    # Compute the aggregate z-score
    player_stats['Z_SCORE'] = player_stats.apply(lambda row: gen_z_score(row, pop_stats), axis=1)
    return player_stats

In [33]:
# Generate the aggregate z-score for each player
active_player_stats_w_z_score = gen_agg_z_score(active_player_stats)

In [34]:
active_player_stats.head()

Unnamed: 0,PLAYER_NAME,PLAYER_ID,FG_PCT,FT_PCT,3PM,PTS,REB,AST,STL,BLK,TOV,Z_SCORE
0,Precious Achiuwa,1630173,0.4925,0.633,20.25,409.5,325.5,61.0,30.75,41.5,56.25,-13.469413
1,Steven Adams,203500,0.597,0.364,0.0,361.0,485.0,97.0,36.0,46.0,79.0,-16.022628
2,Bam Adebayo,1628389,0.5305,0.7805,8.0,1448.0,712.5,259.0,84.5,63.5,174.5,2.699556
3,Ochai Agbaji,1630534,0.41375,0.7085,51.25,344.25,138.25,58.25,27.5,25.75,42.25,-14.701338
4,Santi Aldama,1630583,0.4525,0.6855,100.0,675.0,361.5,117.5,44.0,51.0,64.5,-8.770833


In [38]:
# Sort the players by z-score in descending order
active_player_stats_w_z_score = active_player_stats_w_z_score.sort_values(by='Z_SCORE', ascending=False)
active_player_stats_w_z_score.to_csv('../data/active_player_stats_w_z_score.csv', index=False)

In [39]:
active_player_stats_w_z_score.iloc[:200]

Unnamed: 0,PLAYER_NAME,PLAYER_ID,FG_PCT,FT_PCT,3PM,PTS,REB,AST,STL,BLK,TOV,Z_SCORE
232,Nikola Jokić,203999,0.60750,0.81950,70.00,1887.50,896.50,693.00,97.50,57.50,242.00,13.684092
489,Victor Wembanyama,1641705,0.46500,0.79600,128.00,1522.00,755.00,274.00,88.00,254.00,260.00,12.797552
113,Luka Dončić,1629029,0.49150,0.76400,234.50,2254.00,608.00,607.50,94.50,35.50,259.00,12.128554
153,Shai Gilgeous-Alexander,1628983,0.52250,0.88950,76.50,2194.50,372.00,418.00,131.00,66.00,177.00,10.041760
128,Anthony Edwards,1630162,0.46000,0.79600,201.50,1997.50,444.00,377.50,113.00,50.00,250.00,9.368886
...,...,...,...,...,...,...,...,...,...,...,...,...
27,RJ Barrett,1629628,0.47625,0.72875,75.25,942.75,249.50,146.75,23.25,14.75,104.00,-9.142815
195,Talen Horton-Tucker,1629659,0.40750,0.77850,59.00,607.00,165.50,211.50,43.00,24.00,95.50,-9.166685
41,Bol Bol,1629626,0.58100,0.77400,26.00,428.50,271.00,43.00,20.00,55.00,65.50,-9.375760
358,Mason Plumlee,203486,0.66125,0.68000,0.00,489.50,408.50,136.25,26.50,27.75,67.50,-9.377169


## Player Game Logs

In [2]:
test_log = playergamelogs.PlayerGameLogs(player_id_nullable=203507).get_data_frames()[0]
test_log.head()

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,AVAILABLE_FLAG,MIN_SEC
0,2016-17,203507,Giannis Antetokounmpo,Giannis,1610612749,MIL,Milwaukee Bucks,21601207,2017-04-10T00:00:00,MIL vs. CHA,...,1,79,75,5,66,1,1,67,2,36:27
1,2016-17,203507,Giannis Antetokounmpo,Giannis,1610612749,MIL,Milwaukee Bucks,21601191,2017-04-08T00:00:00,MIL @ PHI,...,32,13,54,28,34,1,4,38,1,40:17
2,2016-17,203507,Giannis Antetokounmpo,Giannis,1610612749,MIL,Milwaukee Bucks,21601175,2017-04-06T00:00:00,MIL @ IND,...,13,13,30,73,56,33,4,54,1,35:17
3,2016-17,203507,Giannis Antetokounmpo,Giannis,1610612749,MIL,Milwaukee Bucks,21601159,2017-04-04T00:00:00,MIL @ OKC,...,13,46,73,79,71,1,4,71,1,24:26
4,2016-17,203507,Giannis Antetokounmpo,Giannis,1610612749,MIL,Milwaukee Bucks,21601144,2017-04-02T00:00:00,MIL vs. DAL,...,13,6,12,41,2,1,4,2,1,39:32


In [25]:
# Grab the game logs for the top 300 players
active_players = pd.read_csv('../data/active_player_stats_w_z_score.csv')
# Convert all player names to ASCII
active_players['PLAYER_NAME'] = active_players['PLAYER_NAME'].apply(lambda x: unidecode(x))
top_300_players = active_players.iloc[:300]
player_ids = list(top_300_players['PLAYER_ID'])

0               Nikola Jokić
1          Victor Wembanyama
2                Luka Dončić
3    Shai Gilgeous-Alexander
4            Anthony Edwards
Name: PLAYER_NAME, dtype: object


NameError: name 'unidecode' is not defined

In [4]:
# Group all the game logs for the top 300 players into a single DataFrame

player_logs = pd.DataFrame()
for player_id in player_ids:
    # Get player name
    player_name = active_players[active_players['PLAYER_ID'] == player_id]['PLAYER_NAME'].values[0]
    with open('../data/log_file.txt', 'a') as f:
        try:
            f.write(f'Getting logs for player {player_name} ({player_id})\n')
            logs_2022_23 = playergamelogs.PlayerGameLogs(player_id_nullable=player_id, season_nullable='2022-23').get_data_frames()[0]
            logs_2023_24 = playergamelogs.PlayerGameLogs(player_id_nullable=player_id, season_nullable='2023-24').get_data_frames()[0]
            if len(logs_2022_23) == 0:
                f.write(f'No logs for {player_name} ({player_id}) in 2022-23\n')
                player_logs = pd.concat([player_logs, logs_2023_24])
            elif len(logs_2023_24) == 0:
                f.write(f'No logs for {player_name} ({player_id}) in 2023-24\n')
                player_logs = pd.concat([player_logs, logs_2022_23])
            else:
                player_logs = pd.concat([player_logs, logs_2022_23, logs_2023_24])
            f.write(f'Player logs shape: {player_logs.shape}\n')
            f.write('\n')
        except ValueError:
            f.write(f'Error for player {player_name} ({player_id})\n')
            f.write('\n')

In [5]:
len(player_logs['PLAYER_ID'].unique())

288

In [6]:
player_logs.shape

(34619, 69)

In [7]:
# Save the player logs to a CSV
player_logs.to_csv('../data/player_logs.csv', index=False)

In [8]:
player_logs.head()

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,AVAILABLE_FLAG,MIN_SEC
0,2022-23,203999,Nikola Jokić,Nikola,1610612743,DEN,Denver Nuggets,22201213,2023-04-08T00:00:00,DEN @ UTA,...,17,69,69,57,66,1,30,67,1,26:38
1,2022-23,203999,Nikola Jokić,Nikola,1610612743,DEN,Denver Nuggets,22201183,2023-04-04T00:00:00,DEN @ HOU,...,5,56,59,69,63,1,30,61,1,24:41
2,2022-23,203999,Nikola Jokić,Nikola,1610612743,DEN,Denver Nuggets,22201126,2023-03-27T00:00:00,DEN vs. PHI,...,37,25,32,35,12,1,1,16,1,34:57
3,2022-23,203999,Nikola Jokić,Nikola,1610612743,DEN,Denver Nuggets,22201109,2023-03-25T00:00:00,DEN vs. MIL,...,17,15,13,14,35,1,30,33,1,31:53
4,2022-23,203999,Nikola Jokić,Nikola,1610612743,DEN,Denver Nuggets,22201083,2023-03-22T00:00:00,DEN @ WAS,...,5,25,13,4,20,1,30,16,1,33:36


In [9]:
player_logs = pd.read_csv('../data/player_logs.csv')

In [11]:
player_logs.shape

(34619, 69)

## Training

In [30]:
# Import the dataset
player_logs_df = pd.read_csv('../data/player_logs.csv')
player_logs_df['PLAYER_NAME'] = player_logs_df['PLAYER_NAME'].apply(lambda x: unidecode(x))

In [31]:
# Convert the 'GAME_DATE' column to a datetime object
player_logs_df['GAME_DATE'] = pd.to_datetime(player_logs_df['GAME_DATE'])

# Create a new column "IS_HEAD_TO_HEAD" to indicate if the game was a head-to-head matchup
player_logs_df['IS_HEAD_2_HEAD'] = player_logs_df['GAME_DATE'].diff(-1).dt.days == 1
player_logs_df['IS_HEAD_2_HEAD'] = player_logs_df['IS_HEAD_2_HEAD'].apply(lambda x: 1 if x else 0)

# Ensure the last row is set to 0 as there's no next game to compare
player_logs_df.iloc[-1, player_logs_df.columns.get_loc('IS_HEAD_2_HEAD')] = 0

# Create a new column "IS_HOME_GAME" to indicate if the game was played at home
player_logs_df["IS_HOME_GAME"] = player_logs_df["MATCHUP"].apply(lambda x: 1 if "vs." in x else 0)

# Create a new column "OPPONENT" to indicate the opposing team
player_logs_df["OPPONENT"] = player_logs_df["MATCHUP"].apply(lambda x: x.split(" ")[2])

In [32]:
# Columns for the target output
target_columns = ["MIN", "PTS", "FG_PCT", "FT_PCT", "FG3M", "AST", "REB", "BLK", "STL", "TOV"]

# Columns for the input features
input_columns = ["PLAYER_NAME", "OPPONENT", "IS_HOME_GAME", "IS_HEAD_2_HEAD"]

# Select only the necessary columns from the dataset
data = player_logs_df[input_columns + target_columns].copy()
data.head()

Unnamed: 0,PLAYER_NAME,OPPONENT,IS_HOME_GAME,IS_HEAD_2_HEAD,MIN,PTS,FG_PCT,FT_PCT,FG3M,AST,REB,BLK,STL,TOV
0,Nikola Jokic,UTA,0,0,26.633333,6,0.4,1.0,0,10,10,0,2,4
1,Nikola Jokic,HOU,0,0,24.683333,14,0.538,0.0,0,4,10,3,2,8
2,Nikola Jokic,PHI,1,0,34.956667,25,0.727,0.818,0,12,17,2,0,3
3,Nikola Jokic,MIL,1,0,31.883333,31,0.5,0.909,1,11,6,0,1,3
4,Nikola Jokic,WAS,0,0,33.6,31,0.75,1.0,2,7,12,0,3,2


In [33]:
# Encode 'PLAYER_NAME' and 'TEAM_ABBREVIATION'
label_encoders = {
    "PLAYER_NAME": LabelEncoder(),
    "OPPONENT": LabelEncoder()
}
print(data.head())
data["PLAYER_NAME"] = label_encoders["PLAYER_NAME"].fit_transform(data["PLAYER_NAME"])
data["OPPONENT"] = label_encoders["OPPONENT"].fit_transform(data["OPPONENT"])

# Save the label encoders
joblib.dump(label_encoders, '../models/label_encoders.pkl')
print(data.head())

    PLAYER_NAME OPPONENT  IS_HOME_GAME  IS_HEAD_2_HEAD        MIN  PTS  \
0  Nikola Jokic      UTA             0               0  26.633333    6   
1  Nikola Jokic      HOU             0               0  24.683333   14   
2  Nikola Jokic      PHI             1               0  34.956667   25   
3  Nikola Jokic      MIL             1               0  31.883333   31   
4  Nikola Jokic      WAS             0               0  33.600000   31   

   FG_PCT  FT_PCT  FG3M  AST  REB  BLK  STL  TOV  
0   0.400   1.000     0   10   10    0    2    4  
1   0.538   0.000     0    4   10    3    2    8  
2   0.727   0.818     0   12   17    2    0    3  
3   0.500   0.909     1   11    6    0    1    3  
4   0.750   1.000     2    7   12    0    3    2  
   PLAYER_NAME  OPPONENT  IS_HOME_GAME  IS_HEAD_2_HEAD        MIN  PTS  \
0          223        28             0               0  26.633333    6   
1          223        10             0               0  24.683333   14   
2          223        22   

In [34]:
scaler = StandardScaler()

print(data.head())
# Apply scaling to the input features excluding categorical encoded columns
data[target_columns] = scaler.fit_transform(data[target_columns])
print(data.head())

   PLAYER_NAME  OPPONENT  IS_HOME_GAME  IS_HEAD_2_HEAD        MIN  PTS  \
0          223        28             0               0  26.633333    6   
1          223        10             0               0  24.683333   14   
2          223        22             1               0  34.956667   25   
3          223        16             1               0  31.883333   31   
4          223        29             0               0  33.600000   31   

   FG_PCT  FT_PCT  FG3M  AST  REB  BLK  STL  TOV  
0   0.400   1.000     0   10   10    0    2    4  
1   0.538   0.000     0    4   10    3    2    8  
2   0.727   0.818     0   12   17    2    0    3  
3   0.500   0.909     1   11    6    0    1    3  
4   0.750   1.000     2    7   12    0    3    2  
   PLAYER_NAME  OPPONENT  IS_HOME_GAME  IS_HEAD_2_HEAD       MIN       PTS  \
0          223        28             0               0  0.013180 -0.808177   
1          223        10             0               0 -0.194092  0.058527   
2          223 

In [35]:
# Split data into features (X) and target (y)
X = data.drop(columns=target_columns)
y = data[target_columns]

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shapes of the training and test sets
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (27695, 4)
X_test shape: (6924, 4)
y_train shape: (27695, 10)
y_test shape: (6924, 10)


In [36]:


# Initialize the XGBoost regressor with a basic configuration
xgb_model = XGBRegressor(objective='reg:squarederror', n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42)

# Wrap XGBoost in MultiOutputRegressor to handle multiple target variables
multi_xgb_model = MultiOutputRegressor(xgb_model)

# Train the model on the training data
multi_xgb_model.fit(X_train, y_train)

# Save the model
joblib.dump(multi_xgb_model, '../models/nba_player_stats_model.pkl')

# Predict on the test set
y_pred = multi_xgb_model.predict(X_test)

# Calculate Mean Squared Error for each output
mse_per_category = mean_squared_error(y_test, y_pred, multioutput='raw_values')

# Display the MSE for each stat category
mse_dict = dict(zip(target_columns, mse_per_category))
mse_dict

{'MIN': 0.6606037408992004,
 'PTS': 0.6476881942100526,
 'FG_PCT': 0.9626093982734096,
 'FT_PCT': 0.8921970694667479,
 'FG3M': 0.7569417239709983,
 'AST': 0.6247238582169226,
 'REB': 0.6769746361741462,
 'BLK': 0.7797525992810671,
 'STL': 0.9647761698362477,
 'TOV': 0.8222933135603229}

## Testing

In [37]:
# Import the model

model = joblib.load('../models/nba_player_stats_model.pkl')

PLAYER_NAME = "Nikola Jokic"
OPPONENT = "GSW"
IS_HOME_GAME = 1
IS_HEAD_2_HEAD = 1

# Load the label encoders
label_encoders = joblib.load('../models/label_encoders.pkl')

# Obtain the player and opponent label
player_name_label = label_encoders["PLAYER_NAME"].transform([PLAYER_NAME])[0]
player_name_label

223

In [22]:
label_encoders

{'PLAYER_NAME': LabelEncoder(), 'OPPONENT': LabelEncoder()}