In [1]:
from datetime import datetime, date
import os
import re
import pickle
import random

import pandas as pd
from sklearn.pipeline import Pipeline
import mlflow

import utils.postprocessing_lib_rodrixx as post
import utils.preprocessing_lib_rodrixx as prep

import warnings
warnings.filterwarnings('ignore')

In [2]:
dfs_path = os.path.join(os.getcwd(), 'dataframes')
df = pd.read_pickle(os.path.join(dfs_path, 'stats_1982_to_2022.pkl'))
df_2022 = df[df['Season'] == 2022]

In [3]:
path_rf = 'runs:/ca376189801e48cd905ced1f13f73600/model'
path_xgb = 'runs:/f217bfcb4a094f648e661a3400747eaa/model'
path_ens = 'runs:/ab01e06711554c1d8e7695357f2d9714/model'

In [4]:
model_rf = mlflow.pyfunc.load_model(path_rf)
model_xgb = mlflow.pyfunc.load_model(path_xgb)
model_ens =  mlflow.pyfunc.load_model(path_ens)

In [5]:
cols_tot = [col for col in df_2022.columns if '_tot' in col]
cols_to_drop = ['Rk', 'G', 'GS', 'GT', 'Tm', 'Votes', 'MaxVotes', 'Share']
cols_to_drop += cols_tot
col_to_ohe = 'Pos'

In [6]:
pipe_prep = Pipeline(steps = [
    ('DropPlayersMultiTeams', prep.DropPlayersMultiTeams()),
    ('SetIndex', prep.SetIndex()),
    ('OHE', prep.OHE(col_to_ohe)),
    ('DropColumns', prep.DropColumns(cols_to_drop)),
    ('DropPlayers', prep.DropPlayers()),
])

In [7]:
pre_df = pipe_prep.fit_transform(df_2022)

In [8]:
prediction_rf = model_rf.predict(pre_df)

In [9]:
prediction_rf_series = pd.Series(prediction_rf, index = pre_df.index, name = 'PredShare_rf')

In [10]:
prediction_rf_df = pd.concat([pipe_prep['DropPlayers'].players_list, prediction_rf_series], axis = 1)

In [11]:
prediction_rf_df.sort_values('PredShare_rf', ascending = False, inplace = True)

In [26]:
def voting_simulator(shares_df, num_voters = 101, num_contenders = 30):
    shares_df.sort_values('PredShare_rf', ascending = False, inplace = True)
    shares_df['PredShare_rf_adj'] = 0
    shares_df.iloc[:num_contenders, 2] = prediction_rf_df.iloc[:num_contenders, 1]
    votes_list = []
    for voter in range(num_voters):
        shares_df_choices = shares_df.copy()
        for score in [10, 7, 5, 3, 1]:
            player = random.choices(shares_df_choices['Player'].to_list(), weights = (shares_df_choices['PredShare_rf_adj'] * 1010).to_list(), )[0]
            player_idx = shares_df_choices[shares_df_choices['Player'] == player].index
            shares_df_choices.drop(index = player_idx, inplace = True)
            votes_list.append([voter, player, score])
    return pd.DataFrame(votes_list, columns = ['Voter', 'Player', 'Points'])


In [27]:
votes_df = voting_simulator(prediction_rf_df)

In [22]:
votes_df.groupby('Player')['Points'].sum().sort_values(ascending = False)

Player
Nikola Jokić             332
Giannis Antetokounmpo    300
Joel Embiid              255
Ja Morant                231
Luka Dončić              208
Rudy Gobert              185
LeBron James             179
Kevin Durant             118
Chris Paul               103
Devin Booker              89
Jayson Tatum              73
Karl-Anthony Towns        66
Trae Young                60
Robert Williams           51
Joe Johnson               36
Kyrie Irving              32
Stephen Curry             32
Anthony Davis             30
Brandon Clarke            29
Jimmy Butler              28
James Harden              27
JaVale McGee              23
Donovan Mitchell          23
Pascal Siakam             22
Dejounte Murray           20
Jared Harper              18
Jordan Bell               17
Deandre Ayton             15
DeMar DeRozan             14
Jaden Springer            10
Name: Points, dtype: int64

In [25]:
prediction_rf_df.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,Player,PredShare_rf,PredShare_rf_adj
Rk,Season,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
290,2022,Nikola Jokić,0.457057,0.457057
12,2022,Giannis Antetokounmpo,0.317009,0.317009
162,2022,Joel Embiid,0.287215,0.287215
141,2022,Luka Dončić,0.256457,0.256457
390,2022,Ja Morant,0.211957,0.211957
274,2022,LeBron James,0.179085,0.179085
195,2022,Rudy Gobert,0.170852,0.170852
154,2022,Kevin Durant,0.142675,0.142675
602,2022,Trae Young,0.11069,0.11069
526,2022,Jayson Tatum,0.093665,0.093665


In [15]:
random.choices(prediction_rf_df['Player'].to_list(), weights = prediction_rf_df['PredShare_rf_adj'].to_list())

['LeBron James']