In [2]:
import pandas as pd 
import itertools
import deans_factors
%load_ext autoreload
%autoreload 2

### 1. Load data from season

In [3]:
season = pd.read_csv('lakers_season_18_19_absolute.csv')

### 2. Get roster from respective season

In [4]:
roster = pd.read_csv('../lakers_rosters.csv',sep=';')
roster = roster[roster['Season']=='18_19']['Player']

### 3. Get active roster (max 40 games missed in season) and all the possible lineups

In [5]:
active_roster = deans_factors.get_active_roster(season,roster)

In [6]:
combinations = deans_factors.get_combinations(active_roster)

In [7]:
active_roster

['Lonzo Ball',
 'Kentavious Caldwell-Pope',
 'Tyson Chandler',
 'Josh Hart',
 'Brandon Ingram',
 'LeBron James',
 'Kyle Kuzma',
 'JaVale McGee',
 'Rajon Rondo',
 'Lance Stephenson',
 'Moritz Wagner']

In [8]:
deans_factors_season = deans_factors.deans_factors_season(season,combinations)

In [9]:
deans_factors_season = pd.DataFrame(deans_factors_season)

In [10]:
deans_factors_season.head()

Unnamed: 0,player1,player2,player3,player4,player5,off_fg_perc,def_fg_perc,off_to_perc,def_to_perc,off_rb_perc,def_rb_perc,off_ft_factor,def_ft_factor
0,Lonzo Ball,Kentavious Caldwell-Pope,Tyson Chandler,Josh Hart,Brandon Ingram,0.546599,0.538868,0.147611,0.061883,0.052128,0.389499,0.0,0.196958
1,Lonzo Ball,Kentavious Caldwell-Pope,Tyson Chandler,Josh Hart,LeBron James,0.563372,0.541096,0.149827,0.063978,0.054815,0.43275,0.0,0.194908
2,Lonzo Ball,Kentavious Caldwell-Pope,Tyson Chandler,Josh Hart,Kyle Kuzma,0.546172,0.541761,0.13606,0.062767,0.052341,0.394443,0.0,0.202433
3,Lonzo Ball,Kentavious Caldwell-Pope,Tyson Chandler,Josh Hart,JaVale McGee,0.567592,0.544645,0.145643,0.063525,0.077479,0.423406,0.0,0.198579
4,Lonzo Ball,Kentavious Caldwell-Pope,Tyson Chandler,Josh Hart,Rajon Rondo,0.530504,0.54099,0.170722,0.062853,0.052019,0.388554,0.0,0.199698


In [11]:
import warnings
warnings.filterwarnings('ignore')

In [12]:
def calculate_probas(deans_factors,tmp_active,model):
        
    probas = []
    for i, row in deans_factors.iterrows():
        probas.append(model.predict_proba([row[5:]])[0][1])
    return probas

In [13]:
def calculate_shapley(deans_factors,active_players):
    shapley_values={}
    for player in active_players:
        respective_player = deans_factors[(deans_factors['player1']==player) | (deans_factors['player2']==player) | (deans_factors['player3']==player) | (deans_factors['player4']==player) | (deans_factors['player5']==player) ]
        
        combinations = len(respective_player.index)
        number_players = len(active_players)
        
        shapley = 1/(combinations*number_players)*(respective_player['probas'].sum())
        
        shapley_values[player] = shapley
        
    return shapley_values

In [14]:
def to_shap(season,model):
    
    tmp_active = deans_factors.get_active_roster(season,roster)
    
    tmp_combinations = deans_factors.get_combinations(tmp_active)
    
    game_deans = pd.DataFrame(deans_factors.deans_factors_season(season,tmp_combinations))
    
    filled_game_deans = game_deans.fillna(0)
    
    # return filled_game_deans
    
    probas = calculate_probas(filled_game_deans,tmp_active,model)
    
    filled_game_deans['probas'] = probas
    
    shapley_values = calculate_shapley(filled_game_deans,tmp_active)
    
    return shapley_values

In [43]:
def calculate_probas_xgb(deans_factors,tmp_active,model):
        
    probas = []
    for i, row in deans_factors.iterrows():
        probas.append(model.predict_proba([row[5:]])[0][1])
    return probas

In [111]:
def to_shap_xgb(season,model):
    
    tmp_active = deans_factors.get_active_roster(season,roster)
    
    tmp_combinations = deans_factors.get_combinations(tmp_active)
    
    game_deans = pd.DataFrame(deans_factors.deans_factors_season(season,tmp_combinations))
    
    filled_game_deans = game_deans.fillna(0)
    
    # return filled_game_deans
    
    # return filled_game_deans
    
    # filled_game_deans = xgb.DMatrix(filled_game_deans,enable_categorical=True)
    
    filled_game_deans = filled_game_deans.rename(renaming,axis='columns')
    
    filled_game_deans_tmp = filled_game_deans.drop(['player1','player2','player3','player4','player5'], axis=1)
    
    probas = model.predict_proba(filled_game_deans_tmp)[:,1]



    
    # probas = calculate_probas(filled_game_deans,tmp_active,model)
    
    filled_game_deans['probas'] = probas
    
    shapley_values = calculate_shapley(filled_game_deans,tmp_active)
    
    return shapley_values

In [15]:
# importing different models

In [16]:
import pickle

model_log_reg = pickle.load(open('../models/logreg_game_outcome_v2.pkl', 'rb'))
model_dec_tree = pickle.load(open('../models/dtree_game_outcome_v1.pkl', 'rb'))
# model_xgbcl = pickle.load(open('../models/xgbcl_game_outcome_v1.pkl', 'rb'))

In [17]:
model_xgbcl = pickle.load(open('../models/xgbcl_game_outcome_v1.pkl', 'rb'))

  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.



In [18]:
shap_values = to_shap(season,model_log_reg)

In [19]:
shap_values

{'Lonzo Ball': 0.00010438606810115442,
 'Kentavious Caldwell-Pope': 0.00012342898301390812,
 'Tyson Chandler': 0.00015066345011733155,
 'Josh Hart': 0.00013032241796579199,
 'Brandon Ingram': 0.0001214937416728379,
 'LeBron James': 0.00017347071903457827,
 'Kyle Kuzma': 0.00012767277847711218,
 'JaVale McGee': 0.00018049136415674845,
 'Rajon Rondo': 7.551936261942508e-05,
 'Lance Stephenson': 0.00010748485897159257,
 'Moritz Wagner': 0.00011552646059233958}

In [20]:
shap_values_dec_tree = to_shap(season,model_dec_tree)

In [21]:
shap_values_dec_tree

{'Lonzo Ball': 0.01086001689594504,
 'Kentavious Caldwell-Pope': 0.014715395050724393,
 'Tyson Chandler': 0.01775306928600342,
 'Josh Hart': 0.015008344948464711,
 'Brandon Ingram': 0.014496803263270332,
 'LeBron James': 0.022142835208703474,
 'Kyle Kuzma': 0.014234760582065973,
 'JaVale McGee': 0.02295762919715016,
 'Rajon Rondo': 0.010525857376156779,
 'Lance Stephenson': 0.014188399757262033,
 'Moritz Wagner': 0.014810357972034623}

In [23]:
import xgboost as xgb

In [117]:
# Column name matching (Necessary for xgboost model)
renaming = {'off_fg_perc': 'off_eff_fg_perc', 'def_fg_perc': 'def_eff_fg_perc','off_to_perc':'off_tov_perc','def_to_perc':'def_tov_perc','off_rb_perc':'off_reb_perc','def_rb_perc':'def_reb_perc','off_ft_factor':'off_free_throw_factor','def_ft_factor':'def_free_throw_factor'}

In [113]:
xgb_shap_values = to_shap_xgb(season,model_xgbcl)

In [115]:
xgb_shap_values = pd.DataFrame(xgb_shap_values,index=[0])

In [116]:
xgb_shap_values.to_csv('../data/xgbcl_shap_lakers_18_19.csv')

In [31]:
log_reg_shap_values = pd.DataFrame(shap_values,index=[0])

In [32]:
log_reg_shap_values.to_csv('../data/log_reg_shap_lakers_18_19.csv')

In [33]:
dec_tree_shap_values = pd.DataFrame(shap_values_dec_tree,index=[0])

In [34]:
dec_tree_shap_values.to_csv('../data/dec_tree_shap_lakers_18_19.csv')

### Result: SHAP Vales for the whole season!