In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
project_folder = '/content/drive/MyDrive/nba-project'
os.chdir(project_folder)
models_folder = os.path.join(os.getcwd(), 'ml_models_v2')
os.getcwd()

'/content/drive/MyDrive/nba-project'

In [3]:
!pip install cloudscraper



In [4]:
from datetime import datetime, date
import re
import pickle

import pandas as pd
from sklearn.pipeline import Pipeline
from sqlalchemy import create_engine

import custom_modules.basketball_reference_rodrixx as brr
import custom_modules.postprocessing_lib_rodrixx as post
import custom_modules.preprocessing_lib_rodrixx as prep

In [5]:
season = 2026
mvp_max_votes = 1000

In [6]:
getter = brr.BasketballReferenceGetter()
raw_df = getter.extract_player_stats_multiple(season, mvp = False, advanced = True, ranks = True)

In [7]:
cols_tot_rank = [col for col in raw_df.columns if '_tot' in col or '_rank' in col]
cols_to_drop = ['G', 'GS', 'GT', 'Tm', 'Pos', 'Age', 'FGA_pg', 'FG%', '3P_pg', '3PA_pg', '3P%', '2PA_pg', '2P%', 'eFG%', 'FT%', 'ORB_pg', 'DRB_pg', 'PF_pg', 'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'OBPM', 'DBPM']
cols_to_drop += cols_tot_rank
cols_to_drop.append('Trp-Dbl') if 'Trp-Dbl' in raw_df.columns else None

In [8]:
pipe_prep = Pipeline(steps = [
    ('DropPlayersMultiTeams', prep.DropPlayersMultiTeams()),
    ('SetIndex', prep.SetIndex()),
    ('DropColumns', prep.DropColumns(cols_to_drop)),
    ('DropPlayers', prep.DropPlayers()),
])

pre_df = pipe_prep.fit_transform(raw_df)

In [9]:
pre_df.columns

Index(['MP_pg', 'FG_pg', '2P_pg', 'FT_pg', 'FTA_pg', 'TRB_pg', 'AST_pg',
       'STL_pg', 'BLK_pg', 'TOV_pg', 'PTS_pg', '%GS', 'PER', 'USG%', 'OWS',
       'DWS', 'WS', 'WS/48', 'BPM', 'VORP', '%W', '%G'],
      dtype='object')

In [10]:
models = os.listdir(models_folder)

predictions_list = []

for file in models:
    with open(os.path.join(models_folder, file), 'rb') as obj:
        file_content = obj.read()
        model = pickle.loads(file_content)
        prediction = model.predict(pre_df)
        model_type = re.match('^model_(.+)\.pkl$', os.path.basename(obj.name)).group(1)
        prediction_series = pd.Series(prediction, index = pre_df.index, name = f'PredShare_{model_type}')
        predictions_list.append(prediction_series)

  model_type = re.match('^model_(.+)\.pkl$', os.path.basename(obj.name)).group(1)


In [11]:
prediction_df = pd.concat(predictions_list, axis = 1)
games_played_series = pre_df['%G']

In [12]:
post_df = post.get_processed_prediction(prediction_df, games_played_series, num_contenders = 15, max_votes = 1000)
post_df['Datetime'] = date.today()

In [13]:
final_df = pd.concat([post_df, pre_df], axis = 1)
final_df = pd.concat([final_df, pipe_prep['DropColumns'].drop_df], axis = 1)
final_df.reset_index().drop(columns=['Season']).reset_index(drop=True)
final_df.columns = map(post.format_column_name, final_df.columns)

In [14]:
[col for col in set(final_df.columns.to_list()) if final_df.columns.to_list().count(col) > 1]

[]