In [32]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [33]:
import os
project_folder = '/content/drive/MyDrive/nba-project'
os.chdir(project_folder)
models_folder = os.path.join(os.getcwd(), 'ml_models_v2')
os.getcwd()

'/content/drive/MyDrive/nba-project'

In [34]:
from datetime import datetime, date
import re
import pickle

import pandas as pd
from sklearn.pipeline import Pipeline
from sqlalchemy import create_engine

import custom_modules.basketball_reference_rodrixx as brr
import custom_modules.postprocessing_lib_rodrixx as post
import custom_modules.preprocessing_lib_rodrixx as prep

In [35]:
season = 2025

In [36]:
getter = brr.BasketballReferenceGetter()
raw_df = getter.extract_player_stats_multiple(season, mvp = False, advanced = True, ranks = True)

In [37]:
cols_tot_rank = [col for col in raw_df.columns if '_tot' in col or '_rank' in col]
cols_to_drop = ['G', 'GS', 'GT', 'Tm', 'FG_tot', '3PA_tot', '2PA_tot', 'FGA_rank_tot', 'Tm', 'Pos', 'Age', 'G', 'GS', 'FGA_pg', 'FG%', '3P_pg', '3PA_pg', '3P%', '2PA_pg', '2P%', 'eFG%', 'FT%', 'ORB_pg', 'DRB_pg', 'PF_pg', 'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'OBPM', 'DBPM']
cols_to_drop += cols_tot_rank
cols_to_drop.append('Trp-Dbl') if 'Trp-Dbl' in raw_df.columns else None

In [38]:
pipe_prep = Pipeline(steps = [
    ('DropPlayersMultiTeams', prep.DropPlayersMultiTeams()),
    ('SetIndex', prep.SetIndex()),
    ('DropColumns', prep.DropColumns(cols_to_drop)),
    ('DropPlayers', prep.DropPlayers()),
])

pre_df = pipe_prep.fit_transform(raw_df)

In [50]:
pre_df.columns

Index(['MP_pg', 'FG_pg', '2P_pg', 'FT_pg', 'FTA_pg', 'TRB_pg', 'AST_pg',
       'STL_pg', 'BLK_pg', 'TOV_pg', 'PTS_pg', '%GS', 'Season', 'PER', 'USG%',
       'OWS', 'DWS', 'WS', 'WS/48', 'BPM', 'VORP', '%W', '%G'],
      dtype='object')

In [39]:
models = os.listdir(models_folder)

predictions_list = []

for file in models:
    with open(os.path.join(models_folder, file), 'rb') as obj:
        file_content = obj.read()
        model = pickle.loads(file_content)
        prediction = model.predict(pre_df)
        model_type = re.match('^model_(.+)\.pkl$', os.path.basename(obj.name)).group(1)
        prediction_series = pd.Series(prediction, index = pre_df.index, name = f'PredShare_{model_type}')
        predictions_list.append(prediction_series)

In [40]:
prediction_df = pd.concat(predictions_list, axis = 1)
games_played_series = pre_df['%G']

In [41]:
post_df = post.get_processed_prediction(prediction_df, games_played_series, num_contenders = 15, max_votes = 1000)
post_df['Datetime'] = date.today()

 0.10292169 0.07448129 0.07342443 0.07025608 0.05225153 0.04810936
 0.04022973 0.03686342 0.03463839]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_results.iloc[:num_contenders, col_index_adj] = df_results.iloc[:num_contenders, col_index_noadj]
 0.05855548 0.05632604 0.05010479 0.04339134 0.04012795 0.03930902
 0.03812753 0.03288722 0.03239442]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_results.iloc[:num_contenders, col_index_adj] = df_results.iloc[:num_contenders, col_index_noadj]
 0.13869916 0.12431848 0.12389711 0.12354662 0.11649648 0.11369224
 0.10732012 0.10268358 0.09319097]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_results.iloc[:num_contenders, col_index_adj] = df_results.iloc[:num_contenders, col_index_noadj]


In [42]:
final_df = pd.concat([post_df, pre_df], axis = 1)
final_df = pd.concat([final_df, pipe_prep['DropColumns'].drop_df], axis = 1)
final_df.reset_index().drop(columns=['Season']).reset_index(drop=True)
final_df.columns = map(post.format_column_name, final_df.columns)

ValueError: cannot insert Season, already exists

In [48]:
list(final_df.columns)

['%G',
 'PredShare_xgbv2',
 'PredShare_lgbmv2',
 'PredShare_rfv2',
 'PredShare_xgbv2_Adj',
 'PredVotes_xgbv2',
 'PredRank_xgbv2',
 'PredShare_lgbmv2_Adj',
 'PredVotes_lgbmv2',
 'PredRank_lgbmv2',
 'PredShare_rfv2_Adj',
 'PredVotes_rfv2',
 'PredRank_rfv2',
 'Datetime',
 'MP_pg',
 'FG_pg',
 '2P_pg',
 'FT_pg',
 'FTA_pg',
 'TRB_pg',
 'AST_pg',
 'STL_pg',
 'BLK_pg',
 'TOV_pg',
 'PTS_pg',
 '%GS',
 'Season',
 'PER',
 'USG%',
 'OWS',
 'DWS',
 'WS',
 'WS/48',
 'BPM',
 'VORP',
 '%W',
 '%G',
 'G',
 'GS',
 'GT',
 'Tm',
 'FG_tot',
 '3PA_tot',
 '2PA_tot',
 'FGA_rank_tot',
 'Tm',
 'Pos',
 'Age',
 'G',
 'GS',
 'FGA_pg',
 'FG%',
 '3P_pg',
 '3PA_pg',
 '3P%',
 '2PA_pg',
 '2P%',
 'eFG%',
 'FT%',
 'ORB_pg',
 'DRB_pg',
 'PF_pg',
 'TS%',
 '3PAr',
 'FTr',
 'ORB%',
 'DRB%',
 'TRB%',
 'AST%',
 'STL%',
 'BLK%',
 'TOV%',
 'OBPM',
 'DBPM',
 'MP_rank_pg',
 'FG_rank_pg',
 'FGA_rank_pg',
 'FG%_rank',
 '3P_rank_pg',
 '3PA_rank_pg',
 '3P%_rank',
 '2P_rank_pg',
 '2PA_rank_pg',
 '2P%_rank',
 'eFG%_rank',
 'FT_rank_pg',
 