# Notebook Imports

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from tqdm import tqdm
from sklearn.model_selection import RandomizedSearchCV

import statistics
from statistics import mode

import joblib

import time

<a id='sp3p_link'></a>
### Second Pick - Phase 3

In [None]:
df_reshaped = pd.read_pickle('./saved_files/data_final.pickle')
# defining features and targets

# selected features for the 1st iter with Ordinal Encoding
sp_3p_features = df_reshaped[['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3', 'left_pick_order_4', 'left_pick_order_5',
                              'right_pick_order_1', 'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4', 'right_pick_order_5',
                              'left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3', 'left_attack_damage_4', 'left_attack_damage_5', 
                              'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3', 'left_receive_damage_4', 'left_receive_damage_5',
                              'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3', 'left_kill_count_4', 'left_kill_count_5',
                              'right_attack_damage_1', 'right_attack_damage_2', 'right_attack_damage_3','right_attack_damage_4', 'right_attack_damage_5',
                              'right_receive_damage_1', 'right_receive_damage_2', 'right_receive_damage_3', 'right_receive_damage_4', 'right_receive_damage_5',
                              'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3', 'right_kill_count_4', 'right_kill_count_5',
                              'left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4', 
#                              'left_postban', 'right_postban',
                              'left_preban_1', 'left_preban_2',
                              'right_preban_1', 'right_preban_2',
                              'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3', 'left_attribute_cd_4', 'left_attribute_cd_5',
                              'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4', 'left_job_cd_5',
                              'right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3', 'right_hero_code_4', 'right_hero_code_5',
                              'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3','right_attribute_cd_4', 'right_attribute_cd_5',
                              'right_job_cd_1', 'right_job_cd_2', 'right_job_cd_3', 'right_job_cd_4', 'right_job_cd_5',
                              'first_pick', 'is_win']]

sp_3p_target = df_reshaped[['left_hero_code_5']]

X_train_sp_3p, X_test_sp_3p, y_train_sp_3p, y_test_sp_3p = train_test_split(sp_3p_features, sp_3p_target, test_size= 0.2, shuffle = True, random_state= 890)



# Scaling 
# Define numeric labels
sp_3p_numeric = ['left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3', 'left_attack_damage_4', 'left_attack_damage_5', 
                 'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3', 'left_receive_damage_4', 'left_receive_damage_5',
                 'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3', 'left_kill_count_4', 'left_kill_count_5', 
                 'right_attack_damage_1', 'right_attack_damage_2', 'right_attack_damage_3','right_attack_damage_4', 'right_attack_damage_5',
                 'right_receive_damage_1', 'right_receive_damage_2', 'right_receive_damage_3', 'right_receive_damage_4', 'right_receive_damage_5',
                 'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3', 'right_kill_count_4', 'right_kill_count_5']


In [None]:
sp_3p_scaler = StandardScaler()
sp_3p_scaler.fit(X_train_sp_3p[sp_3p_numeric])

In [None]:
X_train_sp_3p_trans = pd.DataFrame()
X_test_sp_3p_trans = pd.DataFrame()
X_train_sp_3p_trans[sp_3p_numeric] = sp_3p_scaler.transform(X_train_sp_3p[sp_3p_numeric])
X_test_sp_3p_trans[sp_3p_numeric] = sp_3p_scaler.transform(X_test_sp_3p[sp_3p_numeric])

In [None]:
# Encoding the categorical features
# Initializing an ordinal encoder for categorical variables
sp_3p_enc = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)
# categorical labels for the 1st iter
sp_3p_categorical = ['left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4',
#                     'left_postban', 'right_postban',
                     'left_preban_1', 'left_preban_2',
                     'right_preban_1', 'right_preban_2',
                     'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3', 'left_attribute_cd_4', 'left_attribute_cd_5',
                     'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4', 'left_job_cd_5',
                     'right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3', 'right_hero_code_4', 'right_hero_code_5',
                     'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3','right_attribute_cd_4', 'right_attribute_cd_5', 
                     'right_job_cd_1', 'right_job_cd_2', 'right_job_cd_3', 'right_job_cd_4', 'right_job_cd_5']

X_train_sp_3p_trans[sp_3p_categorical] = sp_3p_enc.fit_transform(X_train_sp_3p[sp_3p_categorical].to_numpy())
X_test_sp_3p_trans[sp_3p_categorical] = sp_3p_enc.transform(X_test_sp_3p[sp_3p_categorical].to_numpy())

In [None]:
X_train_sp_3p_trans.index
X_test_sp_3p_trans.index

RangeIndex(start=0, stop=1979, step=1)

In [None]:
X_train_sp_3p = X_train_sp_3p.reset_index(drop=True)
X_test_sp_3p = X_test_sp_3p.reset_index(drop=True)

In [None]:
X_train_sp_3p.index
X_test_sp_3p.index

RangeIndex(start=0, stop=1979, step=1)

In [None]:
X_test_sp_3p.isna().sum()
X_test_sp_3p_trans.isna().sum()

left_attack_damage_1    0
left_attack_damage_2    0
left_attack_damage_3    0
left_attack_damage_4    0
left_attack_damage_5    0
                       ..
right_job_cd_1          0
right_job_cd_2          0
right_job_cd_3          0
right_job_cd_4          0
right_job_cd_5          0
Length: 63, dtype: int64

In [None]:
# add features left out of scaling
l0f_sp_3p = ['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3', 'left_pick_order_4', 'left_pick_order_5',
             'right_pick_order_1', 'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4', 'right_pick_order_5',
             'first_pick', 'is_win']

X_train_sp_3p_trans[l0f_sp_3p] = X_train_sp_3p[l0f_sp_3p]
X_test_sp_3p_trans[l0f_sp_3p] = X_test_sp_3p[l0f_sp_3p]

In [None]:
X_test_sp_3p_trans[l0f_sp_3p]

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,left_pick_order_5,right_pick_order_1,right_pick_order_2,right_pick_order_3,right_pick_order_4,right_pick_order_5,first_pick,is_win
0,1,2,3,4,5,1,2,3,4,5,0,2
1,1,2,3,4,5,1,2,3,4,5,0,2
2,1,2,3,4,5,1,2,3,4,5,1,2
3,1,2,3,4,5,1,2,3,4,5,0,1
4,1,2,3,4,5,1,2,3,4,5,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...
1974,1,2,3,4,5,1,2,3,4,5,0,2
1975,1,2,3,4,5,1,2,3,4,5,0,1
1976,1,2,3,4,5,1,2,3,4,5,0,2
1977,1,2,3,4,5,1,2,3,4,5,1,1


In [None]:
X_test_sp_3p_trans.isna().sum()

left_attack_damage_1    0
left_attack_damage_2    0
left_attack_damage_3    0
left_attack_damage_4    0
left_attack_damage_5    0
                       ..
right_pick_order_3      0
right_pick_order_4      0
right_pick_order_5      0
first_pick              0
is_win                  0
Length: 75, dtype: int64

In [None]:
# Create a copy of training data after encoding
new_X_train_sp_3p = X_train_sp_3p.copy()

In [None]:
X_train_sp_3p.columns

Index(['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
       'left_pick_order_4', 'left_pick_order_5', 'right_pick_order_1',
       'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4',
       'right_pick_order_5', 'left_attack_damage_1', 'left_attack_damage_2',
       'left_attack_damage_3', 'left_attack_damage_4', 'left_attack_damage_5',
       'left_receive_damage_1', 'left_receive_damage_2',
       'left_receive_damage_3', 'left_receive_damage_4',
       'left_receive_damage_5', 'left_kill_count_1', 'left_kill_count_2',
       'left_kill_count_3', 'left_kill_count_4', 'left_kill_count_5',
       'right_attack_damage_1', 'right_attack_damage_2',
       'right_attack_damage_3', 'right_attack_damage_4',
       'right_attack_damage_5', 'right_receive_damage_1',
       'right_receive_damage_2', 'right_receive_damage_3',
       'right_receive_damage_4', 'right_receive_damage_5',
       'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3',
       'r

In [None]:
# Change columns names in the copied data to concatinate with the original data
new_X_train_sp_3p.columns = ['left_pick_order_1_orig', 'left_pick_order_2_orig', 'left_pick_order_3_orig', 'left_pick_order_4_orig', 'left_pick_order_5_orig',
                             'right_pick_order_1_orig', 'right_pick_order_2_orig', 'right_pick_order_3_orig', 'right_pick_order_4_orig', 'right_pick_order_5_orig',
                             'left_attack_damage_1_orig', 'left_attack_damage_2_orig', 'left_attack_damage_3_orig', 'left_attack_damage_4_orig', 'left_attack_damage_5_orig', 
                             'left_receive_damage_1_orig', 'left_receive_damage_2_orig', 'left_receive_damage_3_orig', 'left_receive_damage_4_orig', 'left_receive_damage_5_orig',
                             'left_kill_count_1_orig', 'left_kill_count_2_orig', 'left_kill_count_3_orig', 'left_kill_count_4_orig', 'left_kill_count_5_orig',
                             'right_attack_damage_1_orig', 'right_attack_damage_2_orig', 'right_attack_damage_3_orig', 'right_attack_damage_4_orig', 'right_attack_damage_5_orig',
                             'right_receive_damage_1_orig', 'right_receive_damage_2_orig', 'right_receive_damage_3_orig', 'right_receive_damage_4_orig', 'right_receive_damage_5_orig', 
                             'right_kill_count_1_orig', 'right_kill_count_2_orig', 'right_kill_count_3_orig', 'right_kill_count_4_orig', 'right_kill_count_5_orig',
                             'left_hero_code_1_orig', 'left_hero_code_2_orig', 'left_hero_code_3_orig', 'left_hero_code_4_orig', 
                             'left_preban_1_orig', 'left_preban_2_orig',
                             'right_preban_1_orig', 'right_preban_2_orig', 
                             'left_attribute_cd_1_orig', 'left_attribute_cd_2_orig', 'left_attribute_cd_3_orig', 'left_attribute_cd_4_orig', 'left_attribute_cd_5_orig',
                             'left_job_cd_1_orig', 'left_job_cd_2_orig', 'left_job_cd_3_orig', 'left_job_cd_4_orig', 'left_job_cd_5_orig', 
                             'right_hero_code_1_orig', 'right_hero_code_2_orig', 'right_hero_code_3_orig', 'right_hero_code_4_orig', 'right_hero_code_5_orig',
                             'right_attribute_cd_1_orig', 'right_attribute_cd_2_orig', 'right_attribute_cd_3_orig', 'right_attribute_cd_4_orig', 'right_attribute_cd_5_orig',
                             'right_job_cd_1_orig', 'right_job_cd_2_orig', 'right_job_cd_3_orig', 'right_job_cd_4_orig', 'right_job_cd_5_orig',
                             'first_pick_orig', 'is_win_orig']

In [None]:
len(new_X_train_sp_3p.columns)

75

In [None]:
# Cocatenate original data with encoded data for later use in the function
dict_sp_3p = pd.concat((X_train_sp_3p_trans,new_X_train_sp_3p.reset_index(drop=True)), axis= 1).copy()
len(dict_sp_3p.columns)

150

In [None]:
dict_sp_3p[['left_hero_code_1', 'left_hero_code_1_orig']]

Unnamed: 0,left_hero_code_1,left_hero_code_1_orig
0,33.0,Sea Phantom Politis
1,25.0,New Moon Luna
2,19.0,Laia
3,13.0,Death Dealer Ray
4,19.0,Laia
...,...,...
7908,18.0,Jenua
7909,39.0,Zio
7910,15.0,Dragon Bride Senya
7911,13.0,Death Dealer Ray


<a id='sp3p_link'></a>
### Tunning Hyperparameters for the Second Pick - Phase 3

In [None]:
# RandomizedSearchCV parameters
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 500, num = 10)]
# Number of features to consider at every split
max_features = ['sqrt', 'log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 50, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'estimator__n_estimators': n_estimators,
               'estimator__max_features': max_features,
               'estimator__max_depth': max_depth,
               'estimator__min_samples_split': min_samples_split,
               'estimator__min_samples_leaf': min_samples_leaf,
               'estimator__bootstrap': bootstrap}
print(random_grid)
{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 233, 266, 300, 333, 366, 400, 433, 466, 500]}

{'estimator__n_estimators': [200, 233, 266, 300, 333, 366, 400, 433, 466, 500], 'estimator__max_features': ['sqrt', 'log2'], 'estimator__max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None], 'estimator__min_samples_split': [2, 5, 10], 'estimator__min_samples_leaf': [1, 2, 4], 'estimator__bootstrap': [True, False]}


{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 233, 266, 300, 333, 366, 400, 433, 466, 500]}

In [None]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = MultiOutputClassifier(RandomForestClassifier())
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=890)
# Fit the random search model
rf_random.fit(X_train_sp_3p_trans, y_train_sp_3p)

Fitting 3 folds for each of 100 candidates, totalling 300 fits




[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=266; total time=   5.1s
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=266; total time=   5.1s
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=266; total time=   6.0s
[CV] END estimator__bootstrap=True, estimator__max_depth=46, estimator__max_features=sqrt, estimator__min_samples_leaf=4, estimator__min_samples_split=5, estimator__n_estimators=300; total time=   5.4s
[CV] END estimator__bootstrap=True, estimator__max_depth=46, estimator__max_features=sqrt, estimator__min_samples_leaf=4, estimator__min_samples_split=5, estimator__n_estimators=300; total tim

In [None]:
# View the best parameters from fitting the random search
rf_random.best_params_

{'estimator__n_estimators': 333,
 'estimator__min_samples_split': 2,
 'estimator__min_samples_leaf': 1,
 'estimator__max_features': 'sqrt',
 'estimator__max_depth': 42,
 'estimator__bootstrap': False}

In [None]:
# Using RandomForestClassifier for multioutput and evaluaing the result after Ordinal Encoding
classifier_sp_3p = MultiOutputClassifier(RandomForestClassifier(random_state=890, max_depth = 42, n_estimators=333, min_samples_split = 2, min_samples_leaf = 1, max_features = 'sqrt', bootstrap = False))
classifier_sp_3p.fit(X_train_sp_3p_trans, y_train_sp_3p)
predictions_sp_3p = classifier_sp_3p.predict(X_test_sp_3p_trans)
classifier_sp_3p.score(X_test_sp_3p_trans, y_test_sp_3p)

0.5785750378979283

In [None]:
X_train_sp_3p_trans.isna().sum()

left_attack_damage_1    0
left_attack_damage_2    0
left_attack_damage_3    0
left_attack_damage_4    0
left_attack_damage_5    0
                       ..
right_pick_order_3      0
right_pick_order_4      0
right_pick_order_5      0
first_pick              0
is_win                  0
Length: 75, dtype: int64

In [None]:
#saving the model
joblib.dump(classifier_sp_3p, './saved_files/classifier_sp_3p.joblib')

['./saved_files/classifier_sp_3p.joblib']