In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
import pandas as pd
import numpy as np

# Train Test Split and Data Normalization

The code below is ran once each iteration of testing to ensure randomness but not among tests for each hyperparameter only once all 3 tests are finished

In [5]:
data = pd.read_csv('leaguedata.txt')
print(data)
for column in data.columns:
    data[column] = data[column] / data[column].abs().max()

#print(data)
data = data.drop("gameId", axis=1)
X = data.drop("blueWins", axis=1)
y = data.pop("blueWins")

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

          gameId  blueWins  blueWardsPlaced  blueWardsDestroyed  \
0     4519157822         0               28                   2   
1     4523371949         0               12                   1   
2     4521474530         0               15                   0   
3     4524384067         0               43                   1   
4     4436033771         0               75                   4   
...          ...       ...              ...                 ...   
9874  4527873286         1               17                   2   
9875  4527797466         1               54                   0   
9876  4527713716         0               23                   1   
9877  4527628313         0               14                   4   
9878  4523772935         1               18                   0   

      blueFirstBlood  blueKills  blueDeaths  blueAssists  blueEliteMonsters  \
0                  1          9           6           11                  0   
1                  0          5      

# Experimentation with different Learning Rates

I run through iterations of learning rate ranging from 0.2 to 2 with increments of 0.2 to the learning rate

In [22]:
j = 0.2
while(j <= 2):
    clf = AdaBoostClassifier(n_estimators=50, random_state=0,learning_rate=j)
    clf.fit(X_train,y_train)
    print("accuracy with learning rate of ",j,": ",clf.score(X_test, y_test))
    j+=0.2

accuracy with learning rate of  0.2 :  0.7307692307692307
accuracy with learning rate of  0.4 :  0.728744939271255
accuracy with learning rate of  0.6000000000000001 :  0.7257085020242915
accuracy with learning rate of  0.8 :  0.7252024291497976
accuracy with learning rate of  1.0 :  0.7262145748987854
accuracy with learning rate of  1.2 :  0.7246963562753036
accuracy with learning rate of  1.4 :  0.7186234817813765
accuracy with learning rate of  1.5999999999999999 :  0.7090080971659919
accuracy with learning rate of  1.7999999999999998 :  0.7246963562753036
accuracy with learning rate of  1.9999999999999998 :  0.26720647773279355


# Experimentation with different Numbers of Estimators

Same process as learning rate above with a range of 50-500 estimators

In [23]:
j = 50
while(j <= 500):
    clf = AdaBoostClassifier(n_estimators=j, random_state=0,learning_rate=1)
    clf.fit(X_train,y_train)
    print("accuracy with ",j," estimators: ",clf.score(X_test, y_test))
    j+=50

accuracy with  50  estimators:  0.7262145748987854
accuracy with  100  estimators:  0.7196356275303644
accuracy with  150  estimators:  0.7171052631578947
accuracy with  200  estimators:  0.7165991902834008
accuracy with  250  estimators:  0.72165991902834
accuracy with  300  estimators:  0.7115384615384616
accuracy with  350  estimators:  0.7100202429149798
accuracy with  400  estimators:  0.7140688259109311
accuracy with  450  estimators:  0.7130566801619433
accuracy with  500  estimators:  0.7140688259109311


# Determing in the interaction between Estimators and Learning Rate

Combine the processes above to see how the two hyperparameters interact with eachother as they decrease or increase just with a decreased scale of estimators 50-250 and learning rate 0.5-2 for decreased testing time

In [24]:
i = 50
while(i <= 250):
    j = 0.5
    while(j <= 2):
        clf = AdaBoostClassifier(n_estimators=i, random_state=0,learning_rate=j)
        clf.fit(X_train,y_train)
        print("accuracy with ",i," estimators and a learning rate of ",j,": ",clf.score(X_test, y_test))
        j+=0.5
    print("\n")
    i+=50

accuracy with  50  estimators and a learning rate of  0.5 :  0.7282388663967612
accuracy with  50  estimators and a learning rate of  1.0 :  0.7262145748987854
accuracy with  50  estimators and a learning rate of  1.5 :  0.715080971659919
accuracy with  50  estimators and a learning rate of  2.0 :  0.26720647773279355


accuracy with  100  estimators and a learning rate of  0.5 :  0.7302631578947368
accuracy with  100  estimators and a learning rate of  1.0 :  0.7196356275303644
accuracy with  100  estimators and a learning rate of  1.5 :  0.7105263157894737
accuracy with  100  estimators and a learning rate of  2.0 :  0.26720647773279355


accuracy with  150  estimators and a learning rate of  0.5 :  0.7267206477732794
accuracy with  150  estimators and a learning rate of  1.0 :  0.7171052631578947
accuracy with  150  estimators and a learning rate of  1.5 :  0.7110323886639676
accuracy with  150  estimators and a learning rate of  2.0 :  0.26720647773279355


accuracy with  200  esti

# Determining Interaction between Algorithm and Number of Estimators

In [11]:
algorithms = ["SAMME","SAMME.R"]
i = 50
while(i <= 500):
    j = 0
    while(j <= 1):
        clf = AdaBoostClassifier(n_estimators=i, random_state=0,learning_rate=1,algorithm=algorithms[j])
        clf.fit(X_train,y_train)
        print(clf.score(X_test, y_test))
        j+=1
    print("\n")
    i+=50

0.736336032388664
0.7358299595141701


0.7419028340080972
0.7383603238866396


0.7419028340080972
0.728744939271255


0.7444331983805668
0.7332995951417004


0.7444331983805668
0.7312753036437247


0.7439271255060729
0.7262145748987854


0.742914979757085
0.728744939271255


0.742914979757085
0.728744939271255


0.7419028340080972
0.7307692307692307


0.7424089068825911
0.7307692307692307




# Determining the Interaction between Algorithm and Learning Rate

In [12]:
algorithms = ["SAMME","SAMME.R"]
i = 0.2
while(i <= 2):
    j = 0
    while(j <= 1):
        clf = AdaBoostClassifier(n_estimators=50, random_state=0,learning_rate=i,algorithm=algorithms[j])
        clf.fit(X_train,y_train)
        print(clf.score(X_test, y_test))
        j+=1
    print("\n")
    i+=0.2

0.7408906882591093
0.7489878542510121


0.736336032388664
0.743421052631579


0.7353238866396761
0.7388663967611336


0.7398785425101214
0.7383603238866396


0.736336032388664
0.7358299595141701


0.7439271255060729
0.7373481781376519


0.7403846153846154
0.7383603238866396


0.7444331983805668
0.736336032388664


0.7378542510121457
0.7408906882591093


0.2667004048582996
0.26568825910931176


