In [40]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, RidgeClassifierCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
import plotly.express as px
import pickle


In [23]:
#loading the dataset in
df = pd.read_csv("data/league_match_stats_cleaned.csv")
print("Rows and Columns:", df.shape)
df.head()

Rows and Columns: (75897, 56)


Unnamed: 0,MatchStatsId,SummonerMatchFk,MinionsKilled,DmgDealt,DmgTaken,TurretDmgDealt,TotalGold,Lane,Win,item1,...,VisionPerMin,DmgPerGold,DmgEfficiency,GamePhase,ItemCount,ObjectiveParticipation,ChampionId,ChampionName,RankId,RankName
0,1,1,30,4765,12541,0,7058,BOTTOM,0,3870,...,2.295831,0.67512,0.379954,Mid,6,0,902,Milio,7,Diamond
1,2,2,29,8821,14534,1,9618,BOTTOM,0,3870,...,2.523901,0.917135,0.606922,Mid,6,0,902,Milio,7,Diamond
2,3,3,34,6410,19011,3,9877,BOTTOM,1,3870,...,2.495712,0.648982,0.337173,Late,6,0,16,Soraka,7,Diamond
3,7,7,28,3775,12061,0,6344,BOTTOM,0,3870,...,2.147971,0.59505,0.312992,Mid,6,0,902,Milio,7,Diamond
4,8,8,36,4217,13464,0,7403,BOTTOM,0,3870,...,1.886792,0.569634,0.313206,Mid,4,0,267,Nami,7,Diamond


In [24]:
df.columns

Index(['MatchStatsId', 'SummonerMatchFk', 'MinionsKilled', 'DmgDealt',
       'DmgTaken', 'TurretDmgDealt', 'TotalGold', 'Lane', 'Win', 'item1',
       'item2', 'item3', 'item4', 'item5', 'item6', 'kills', 'deaths',
       'assists', 'PrimaryKeyStone', 'PrimarySlot1', 'PrimarySlot2',
       'PrimarySlot3', 'SecondarySlot1', 'SecondarySlot2', 'SummonerSpell1',
       'SummonerSpell2', 'CurrentMasteryPoints', 'EnemyChampionFk',
       'DragonKills', 'BaronKills', 'visionScore', 'Role', 'SummonerMatchId',
       'ChampionFk', 'SummonerFk', 'MatchFk', 'MatchId', 'GameDuration',
       'QueueType', 'RankFk', 'Patch', 'KDA', 'GameDurationMin', 'GoldPerMin',
       'CSPerMin', 'DmgPerMin', 'VisionPerMin', 'DmgPerGold', 'DmgEfficiency',
       'GamePhase', 'ItemCount', 'ObjectiveParticipation', 'ChampionId',
       'ChampionName', 'RankId', 'RankName'],
      dtype='object')

In [25]:
#Convert the data into something that can be predicted on 
y = df["RankId"]

X_full = df.drop(columns=["MatchStatsId", "SummonerMatchFk", "EnemyChampionFk", "MatchFk", "MatchId", "QueueType", "RankFk", "Patch", "ChampionName", "RankId", "RankName", "Lane", "Role", "GamePhase"], axis=0)
#need to turn lane into smth numeric
#need to turn role into smth numeric
#need to turn game phase into smth numeric?

In [26]:
# Creating the Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_full, y, test_size=0.2, random_state=67)
print("Train shape:", X_train.shape, "Test shape:", X_test.shape)

Train shape: (60717, 42) Test shape: (15180, 42)


In [None]:
#Regression Suite

#Vanilla Logistic Regression 
vanilla_logistic_regression = LogisticRegression(random_state=0, max_iter=100).fit(X_train, y_train)
pickle.dump(vanilla_logistic_regression, open("models/vanilla_log_reg.sav", "wb"))

#Ridge Classifier CV
ridge_classification_CV = RidgeClassifierCV(alphas=np.logspace(-2, 10, num=100)).fit(X_train, y_train)
pickle.dump(ridge_classification_CV, open("models/ridge_class_CV.sav", "wb"))



lbfgs failed to converge after 100 iteration(s) (status=1):
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



In [38]:
print("Vanilla logistic regression accuracy:")
print(f'Train: {vanilla_logistic_regression.score(X_train, y_train)}')
print(f'Test: {vanilla_logistic_regression.score(X_test, y_test)}')

print("Ridge Classification CV accuracy: ")
print(f'Train: {ridge_classification_CV.score(X_train, y_train)}')
print(f'Test: {ridge_classification_CV.score(X_test, y_test)}')


Vanilla logistic regression accuracy:
Train: 0.45259153120213447
Test: 0.4486824769433465
Ridge Classification CV accuracy: 
Train: 0.48953340909465226
Test: 0.4886034255599473


In [44]:
#Tree Suite
#regular decision trees
vanilla_decision_tree = DecisionTreeClassifier(random_state=0).fit(X_train, y_train)
pickle.dump(vanilla_decision_tree, open("models/vanilla_tree.sav", "wb"))

#random forest trees
random_forest_tree = RandomForestClassifier(random_state=0).fit(X_train, y_train)
pickle.dump(random_forest_tree, open("models/randomforest_tree.sav", "wb"))

#adaboost tree
adaboost_tree = AdaBoostClassifier(random_state=0).fit(X_train, y_train)
pickle.dump(adaboost_tree, open("models/adaboost_tree.sav", "wb"))

#gradientboost tree
gradient_boost_tree = GradientBoostingClassifier(random_state=0).fit(X_train, y_train)
pickle.dump(gradient_boost_tree, open("models/gradientboost_tree.sav", "wb"))

In [45]:
print("Vanilla decision tree accuracy:")
print(f'Train: {vanilla_decision_tree.score(X_train, y_train)}')
print(f'Test: {vanilla_decision_tree.score(X_test, y_test)}')

print("Random Forest decision tree accuracy:")
print(f'Train: {random_forest_tree.score(X_train, y_train)}')
print(f'Test: {random_forest_tree.score(X_test, y_test)}')

print("Adaboost decision tree accuracy:")
print(f'Train: {adaboost_tree.score(X_train, y_train)}')
print(f'Test: {adaboost_tree.score(X_test, y_test)}')

print("Gradient Boosting decision tree accuracy:")
print(f'Train: {gradient_boost_tree.score(X_train, y_train)}')
print(f'Test: {gradient_boost_tree.score(X_test, y_test)}')


Vanilla decision tree accuracy:
Train: 1.0
Test: 0.939064558629776
Random Forest decision tree accuracy:
Train: 1.0
Test: 0.9075098814229249
Adaboost decision tree accuracy:
Train: 0.5024293031605646
Test: 0.5036231884057971
Gradient Boosting decision tree accuracy:
Train: 0.7293179834313289
Test: 0.7119894598155467
