In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

In [4]:
def confusion_plots(y_fit, y_predict, y_train=y_train, y_test=y_test):
    fig, axs = plt.subplots(1, 2, figsize=(10, 4))
    train_cm = confusion_matrix(y_train, y_fit)
    sns.heatmap(train_cm, annot=True, fmt="d", cbar=False, ax=axs[0])
    axs[0].set(title="Training")
    val_cm = confusion_matrix(y_test, y_predict)
    sns.heatmap(val_cm, annot=True, fmt="d", cbar=False, ax=axs[1])
    axs[1].set(title="Testing")
    fig.text(0.5, 0.04, 'Predict', ha='center')
    fig.text(0.07, 0.5, 'True', va='center', rotation='vertical')
    plt.show()

In [3]:
train = pd.read_csv('data/train_8_12.csv', index_col=[0])
X_train = train.drop(["win", "matchId"], axis=1)
y_train = train['win'].astype(int)

test = pd.read_csv('data/test_8_12.csv', index_col=[0])
X_test = test.drop(["win", "matchId"], axis=1)
y_test = test['win'].astype(int)

## Logistic Regression

In [74]:
lr = LogisticRegression()
lr.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## GridSearch CV for RandomForest

In [41]:
rf = RandomForestClassifier(random_state=42)
param_grid = {"n_estimators": np.linspace(150, 190, 40).astype(int),
              "max_depth": [8, 9]}
grid = GridSearchCV(rf, param_grid, scoring="accuracy", n_jobs=-1, verbose=3, cv=10)
grid.fit(X_train, y_train)

Fitting 10 folds for each of 80 candidates, totalling 800 fits
[CV 1/10] END ....max_depth=8, n_estimators=150;, score=0.698 total time=   1.5s
[CV 2/10] END ....max_depth=8, n_estimators=150;, score=0.755 total time=   1.5s
[CV 3/10] END ....max_depth=8, n_estimators=150;, score=0.604 total time=   1.5s
[CV 4/10] END ....max_depth=8, n_estimators=150;, score=0.679 total time=   1.5s
[CV 5/10] END ....max_depth=8, n_estimators=150;, score=0.736 total time=   1.4s
[CV 7/10] END ....max_depth=8, n_estimators=150;, score=0.755 total time=   1.4s
[CV 6/10] END ....max_depth=8, n_estimators=150;, score=0.698 total time=   1.5s
[CV 8/10] END ....max_depth=8, n_estimators=150;, score=0.615 total time=   1.4s
[CV 1/10] END ....max_depth=8, n_estimators=151;, score=0.698 total time=   0.9s
[CV 9/10] END ....max_depth=8, n_estimators=150;, score=0.673 total time=   0.9s
[CV 2/10] END ....max_depth=8, n_estimators=151;, score=0.736 total time=   0.9s
[CV 10/10] END ...max_depth=8, n_estimators=15

In [42]:
pd.DataFrame(grid.cv_results_).sort_values("mean_test_score", ascending=False)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
57,1.200674,0.037865,0.055233,0.009654,9,167,"{'max_depth': 9, 'n_estimators': 167}",0.754717,0.754717,0.584906,0.716981,0.716981,0.698113,0.735849,0.634615,0.692308,0.769231,0.705842,0.054668,1
58,1.367939,0.166780,0.063499,0.020416,9,168,"{'max_depth': 9, 'n_estimators': 168}",0.754717,0.735849,0.584906,0.716981,0.754717,0.698113,0.735849,0.634615,0.673077,0.769231,0.705806,0.056172,2
53,1.166232,0.046970,0.048469,0.004233,9,163,"{'max_depth': 9, 'n_estimators': 163}",0.754717,0.754717,0.566038,0.716981,0.716981,0.698113,0.735849,0.634615,0.692308,0.769231,0.703955,0.058966,3
60,1.221055,0.048836,0.055309,0.008556,9,170,"{'max_depth': 9, 'n_estimators': 170}",0.754717,0.735849,0.566038,0.716981,0.754717,0.698113,0.735849,0.634615,0.673077,0.769231,0.703919,0.060362,4
63,1.223986,0.056994,0.056237,0.010360,9,173,"{'max_depth': 9, 'n_estimators': 173}",0.754717,0.735849,0.584906,0.698113,0.754717,0.698113,0.735849,0.634615,0.673077,0.769231,0.703919,0.056082,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36,1.291215,0.016264,0.062901,0.006929,8,186,"{'max_depth': 8, 'n_estimators': 186}",0.716981,0.735849,0.584906,0.660377,0.716981,0.698113,0.716981,0.615385,0.653846,0.769231,0.686865,0.054155,75
34,1.399010,0.080612,0.066748,0.021894,8,184,"{'max_depth': 8, 'n_estimators': 184}",0.716981,0.735849,0.584906,0.660377,0.716981,0.698113,0.716981,0.615385,0.653846,0.769231,0.686865,0.054155,75
38,1.291094,0.014080,0.062277,0.006184,8,188,"{'max_depth': 8, 'n_estimators': 188}",0.716981,0.754717,0.584906,0.660377,0.716981,0.698113,0.716981,0.596154,0.653846,0.769231,0.686829,0.058866,78
37,1.491757,0.140456,0.069230,0.015573,8,187,"{'max_depth': 8, 'n_estimators': 187}",0.716981,0.754717,0.584906,0.660377,0.716981,0.698113,0.716981,0.596154,0.653846,0.769231,0.686829,0.058866,78
