In [18]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from scipy.stats import randint
        
DATA_DIR = '../../../data/'
print(os.listdir(DATA_DIR))

['CleanLoLMatchHistory2.csv', 'openaq.csv', '.DS_Store', 'LoLMatchHistory.txt', 'CleanLoLMatchHistory.csv', 'LoL Match Making Statistics.ipynb', 'LolUpdatedStats.txt', 'CleanLoLMatchHistoryUpdate.csv']


In [5]:
filename = DATA_DIR + 'CleanLoLMatchHistory2.csv'
df = pd.read_csv(filename)
df.head()

Unnamed: 0,Level,Champion,Score,Game Mode,Creep Score,Gold,Minutes,Date,Rank,Experience Gained,Kills,Deaths,Assists,Game Status
0,15,Vayne,C / +188 XP,Summoner's Rift,169,9.8k,33:44:00,10/14/2018,C,188,3,6,6,-1
1,16,Shen,B- / +223 XP,Summoner's Rift,144,11.8k,40:20:00,10/13/2018,B-,223,5,7,15,-1
2,12,Shen,A-,Summoner's Rift,39,6.2k,25:50:00,10/13/2018,A-,0,0,0,13,1
3,12,Vayne,A+ / +146 XP,Summoner's Rift,137,9.9k,21:05,10/13/2018,A+,146,9,2,8,1
4,12,Vayne,B+ / +144 XP,Summoner's Rift,86,8.4k,20:53,10/13/2018,B+,144,6,2,6,1


In [6]:
def parse_gold(x):
    if('k' in x):
        x = x.replace('k','')
        return float(x) * 1000
    else:
        return float(x)

df.columns = df.columns.str.lower().str.replace(' ', '_')
df['gold'] = df['gold'].apply(parse_gold)


In [8]:
def change_to_zero(x):
    if x == -1:
        return 0
    else: return 1

### Hyperparameter tuning for Logistic Regression

In [11]:
X = np.array(df.loc[:,['level','deaths','kills']])
y = df.loc[:,'game_status'].apply(change_to_zero)

# Setup the hyperparameter grid
c_space = np.logspace(-5, 8, 15)
param_grid = {'C': c_space}

# Create the classifier: logreg
logreg = LogisticRegression()



#Instantiate the GridSearchCV object: logreg_cv
logreg_cv = GridSearchCV(logreg, param_grid, cv=2)

# Fit the classifier to the data
logreg_cv.fit(X, y) 

print("Tuned Logistic Regression Parameters: {}".format(logreg_cv.best_params_))
print("Best score is {}".format(logreg_cv.best_score_))

Tuned Logistic Regression Parameters: {'C': 0.05179474679231213}
Best score is 0.8035714285714286


### Hyperparameter tuning for Decision Tree Classifier

In [34]:
X = np.array(df.loc[:,['level','deaths','kills']])
y = np.array(df.loc[:,'game_status'].apply(change_to_zero))

# Setup the parameters and distributions to sample from: param_dist
param_dist = {"max_depth": [1, None],
              "max_features": randint(1, 4),
              "min_samples_leaf": randint(1, 4),
              "criterion": ["gini", "entropy"]}

# Instantiate a Decision Tree classifier: tree
tree = DecisionTreeClassifier()

# Instantiate the RandomizedSearchCV object: tree_cv
tree_cv = RandomizedSearchCV(tree, param_dist, cv=5)

# Fit it to the data
tree_cv.fit(X,y)
        
# Print the tuned parameters and score
print("Tuned Decision Tree Parameters: {}".format(tree_cv.best_params_))
print("Best score is {}".format(tree_cv.best_score_))

Tuned Decision Tree Parameters: {'max_depth': 1, 'criterion': 'gini', 'max_features': 3, 'min_samples_leaf': 2}
Best score is 0.8035714285714286
