In [1]:
import preprocessing_lib_rodrixx as prep
import postprocessing_lib_rodrixx as post

import pandas as pd
import numpy as np

import os

from sklearn.pipeline import Pipeline

import warnings
warnings.filterwarnings('ignore')

import mlflow

In [2]:
dataframes_path = os.path.join(os.getcwd(), 'dataframes')

In [3]:
df_2022 = pd.read_pickle(os.path.join(dataframes_path, 'stats_2022.pkl'))
df_total = pd.read_pickle(os.path.join(dataframes_path, 'stats_1980_to_2021.pkl'))

In [4]:
# Average number of MVP contenders from 1980 to 2021
df_total[df_total['Share'] > 0].groupby('Season')['Season'].count().mean()

16.666666666666668

In [5]:
df_total[(df_total['Share'] > 0) & (df_total['Season'] != 1980)].groupby('Season')['Share'].sum().mean()

2.60170731707317

In [6]:
df_total[(df_total['Share'] > 0) & (df_total['Season'] > 2017)].groupby('Season')['MaxVotes'].mean()

Season
2018    1010
2019    1010
2020    1010
2021    1010
Name: MaxVotes, dtype: int32

In [7]:
cols_to_drop = ['Rk', 'GT', 'FG_tot', '3PA_tot', '2PA_tot', 'FGA_rank_tot', 'Tm', 'Pos']
cols_to_filter = ['PER', 'WS/48', 'BPM', 'USG%']
cols_to_ohe = ['Pos', 'Tm']

In [8]:
# Pipeline for Dataframe preprocessing, in the format the model expects
pipe_clean = Pipeline(steps = [
    ('DropPlayersMultiTeams', prep.DropPlayersMultiTeams()),
    ('OutlierFilter', prep.OutlierFilter(q = .0005, col_to_filter = cols_to_filter)),
    ('SetIndex', prep.SetIndex()),
    ('DropColumns', prep.DropColumns(cols_to_drop)),
    ('DropPlayers', prep.DropPlayers()),
])

In [9]:
df_2022 = pipe_clean.fit_transform(df_2022)

In [10]:
logged_model = 'runs:/713eebe2475b41048ab8b75030057f58/model'

model = mlflow.pyfunc.load_model(logged_model)

In [11]:
prediction = model.predict(df_2022)


In [12]:
df_results = post.get_processed_prediction(pd.Series(prediction, index = df_2022.index, name = 'PredShare'), pipe_clean['DropPlayers'].players_list_, num_contenders = 13)

In [13]:
df_results.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Player,PredShare,PredShare_Adj,PredVotes,PredRank
Rk,Season,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
290,2022,Nikola Jokić,0.558789,0.569,575,1
12,2022,Giannis Antetokounmpo,0.371695,0.378,382,2
162,2022,Joel Embiid,0.355384,0.361,365,3
141,2022,Luka Dončić,0.351305,0.357,361,4
602,2022,Trae Young,0.181908,0.185,187,5
390,2022,Ja Morant,0.134281,0.137,138,6
526,2022,Jayson Tatum,0.131112,0.134,135,7
274,2022,LeBron James,0.107958,0.11,111,8
195,2022,Rudy Gobert,0.091816,0.093,94,9
59,2022,Devin Booker,0.082172,0.083,84,10
