In [11]:
import numpy as np
import pandas as pd
import random
import warnings
import time

from sklearn.model_selection import cross_val_score, RandomizedSearchCV, KFold
from xgboost import XGBClassifier
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier 

In [4]:
data = pd.read_csv('data_featureselection.csv')
random.seed(10)
X = data.iloc[:,1:]; y = data.iloc[:,0]
data.head()

Unnamed: 0,team_one_win,team_one_obj_damage,team_two_dragons,team_two_double_kills,team_one_dragons,team_one_first_inhibitor,team_one_assists,team_two_enemy_monsters_killed,team_two_cs,team_two_kills,...,team_two_barons,team_two_damage,team_one_jungle_avg_xp_diff,team_one_kills,team_one_bot_avg_xp_diff,team_two_obj_damage,team_one_enemy_monsters_killed,team_one_healing,team_one_first_tower,team_one_barons
0,0,0.198563,4,4,2,0,0.588652,0.297521,0.460517,0.740741,...,2,0.459944,3.441667,0.597826,-103.466667,0.582378,0.126126,0.384546,1,0
1,0,0.399344,1,2,4,1,0.099291,0.380165,0.518819,0.37037,...,1,0.358009,-77.2,0.184783,-113.4,0.35175,0.027027,0.198796,1,0
2,1,0.459898,0,1,3,1,0.312057,0.0,0.342435,0.185185,...,0,0.191652,3.0,0.369565,104.4,0.063946,0.081081,0.169561,1,0
3,1,0.129875,0,1,2,1,0.248227,0.033058,0.219188,0.197531,...,0,0.116541,0.0,0.369565,163.2,0.079341,0.036036,0.134021,0,0
4,0,0.214706,3,2,1,0,0.198582,0.198347,0.464945,0.259259,...,1,0.343545,-63.366667,0.152174,-124.266667,0.609278,0.081081,0.26182,0,0


In [12]:
warnings.simplefilter('ignore')

clf1 = KNeighborsClassifier(n_neighbors=1)
clf2 = RandomForestClassifier(n_estimators=100, random_state=1, n_jobs=-1)
clf3 = GaussianNB()
clf4 = XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.5, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.3, max_delta_step=0, max_depth=3,
              min_child_weight=2,monotone_constraints='()',
              n_estimators=1600, n_jobs=-1, nthread=-1, num_parallel_tree=1,
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              subsample=0.9,  tree_method='exact', validate_parameters=1,
              verbosity=None)
clf5 = SVC(kernel='linear', random_state = 1)
clf6 = MLPClassifier(hidden_layer_sizes=(4),activation="relu",random_state=1)

lr = LogisticRegression()

print('5-fold cross validation:\n')
for clf, label in zip([clf1, clf2, clf3, clf4, clf5, clf6, lr], 
                      ['KNN', 
                       'Random Forest', 
                       'Naive Bayes',
                       'XGBoost',
                       'SVM',
                       'MLP',
                       'Logistic']):
    start_time = time.time()
    scores = model_selection.cross_val_score(clf, X, y, 
                                              cv=5, scoring='accuracy')
    print("Accuracy: %0.4f (+/- %0.4f) [%s] [time execution: %0.4f]" 
          % (scores.mean(), scores.std(), label, time.time() - start_time))

5-fold cross validation:

Accuracy: 0.7953 (+/- 0.0104) [KNN] [time execution: 2.9451]
Accuracy: 0.9680 (+/- 0.0021) [Random Forest] [time execution: 15.6440]
Accuracy: 0.9484 (+/- 0.0025) [Naive Bayes] [time execution: 0.1287]
Accuracy: 0.9721 (+/- 0.0024) [XGBoost] [time execution: 112.6887]
Accuracy: 0.9731 (+/- 0.0035) [SVM] [time execution: 1063.4048]
Accuracy: 0.9511 (+/- 0.0059) [MLP] [time execution: 700.6825]
Accuracy: 0.9438 (+/- 0.0056) [Logistic] [time execution: 1.0919]


We found that .. model has good performance

In [9]:
#meta_classifier is simply the classifier that makes a final prediction among all the predictions by using those predictions as features.

# rf + xgboost + SVM + MLP
sclf = StackingClassifier(classifiers=[clf2, clf4, clf5, clf6], meta_classifier = clf4)
# all models
sclf_2 = StackingClassifier(classifiers=[clf1, clf2, clf3, clf4, clf5, clf6, lr], 
                          meta_classifier=clf4)
# three best models rf + xgboost + SVM
sclf_3 = StackingClassifier(classifiers=[clf2, clf4, clf5], 
                          meta_classifier=clf4)
print('5-fold cross validation:\n')
for clf, label in zip([sclf, sclf_2, sclf_3], 
                      [
                       'StackingClassifier',
                       'StackingClassifier_2',
                       'StackingClassifier_3']):

    scores = model_selection.cross_val_score(clf, X, y, 
                                              cv=5, scoring='accuracy')
    print("Accuracy: %0.4f (+/- %0.4f) [%s]" 
          % (scores.mean(), scores.std(), label))

5-fold cross validation:

Accuracy: 0.9698 (+/- 0.0032) [StackingClassifier]
Accuracy: 0.9706 (+/- 0.0023) [StackingClassifier_2]
Accuracy: 0.9739 (+/- 0.0026) [StackingClassifier_3]


In [None]:
# try others
