### Imports

In [2]:
from fastai.imports import *
from fastai.structured import *
from sklearn.gaussian_process import GaussianProcessClassifier
import numpy as np
import pandas as pd
df = pd.read_csv('data/transform_hot_full.csv')

### Helper functions

In [13]:
def split_random_masks(a, n1, n2):
    msk = np.random.rand(len(a)) < (n1 + n2)
    msk1 = ((np.random.rand(len(a)) < n1/(n1+n2)) & msk)
    msk2 = (~msk1 & msk)
    return ~msk, msk1, msk2

def rmse(x,y):
    return math.sqrt(((x-y)**2).mean())

def print_score(m):
    res = ["rmse(trn):",rmse(m.predict(X_train), y_train), " rmse(val):",rmse(m.predict(X_valid), y_valid),
                " scr(trn):",m.score(X_train, y_train), " scr(val):",m.score(X_valid, y_valid)]
    if hasattr(m, 'oob_score_'): res.append(m.oob_score_)
    print(res)

In [18]:
def hero_one_hot(game_map = '', winners = [], losers = []):
    new_row = pd.Series(index = col_names, dtype = 'boolean')
    new_row[:] = False
    for x in hero_names:
        if x in winners:
            new_row['yours_'+x] = True
        if x in losers:
            new_row['theirs_'+x] = True
    new_row[game_map] = True
    return new_row

### Training

#### Prepare X and Y for train and val sets.

In [None]:
df_trn, y_trn, nas = proc_df(dummies_data, 'outcome')
val_ratio = 0.2
tst_ratio = 0.1
#y_trn=y_trn.astype('bool')
#n_trn = int(len(df_trn) * train_required_ratio)
tr_mask, val_mask, test_mask = split_random_masks(df_trn, val_ratio, tst_ratio)
X_train = df_trn[tr_mask].copy()
y_train = y_trn[tr_mask].copy()
X_valid = df_trn[tr_mask].copy()
y_valid = y_trn[tr_mask].copy()
X_test = df_trn[tr_mask].copy()
y_test = y_trn[tr_mask].copy()

print("train: ",X_train.shape, "  val:",X_valid.shape, " tst: ", X_test.shape)
apply_cats(X_valid, X_train)

#### Training binary logistic regression model

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
model = LogisticRegression(random_state=0).fit(X_valid, y_valid)

In [None]:
game_map = 'Garden of Terror'
game_type = 'UnrankedDraft'
winners=['Auriel', 'Muradin']
losers=['Illidian', 'Abathur']
row = [hero_one_hot(game_map = game_map, winners = winners, losers=losers)]
model.predict(row)

In [None]:
model.score(X_valid, y_valid)

In [None]:
j = 0
C_param_range = [0.75,0.9,1,1.1,1.5,2]
acc_table = pd.DataFrame(columns = ['C_parameter','Accuracy'])
acc_table['C_parameter'] = C_param_range
for i in C_param_range:
    # Apply logistic regression model to training data
    lr = LogisticRegression(solver = 'lbfgs', penalty = 'l2', C = i,random_state = 0)
    lr.fit(X_train, y_train)
    
    # Predict using model
    y_pred = lr.predict(X_test)
    
    # Saving accuracy score in table
    acc_table.iloc[j,1] = accuracy_score(y_test,y_pred)
    j += 1
acc_table

In [None]:
j = 0
S_param_range = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
acc_table = pd.DataFrame(columns = ['C_parameter','Accuracy', 'Time'])
acc_table['C_parameter'] = S_param_range
result = %timeit -n1 -r1 -o
for i in S_param_range:

    # Apply logistic regression model to training data
    lr = LogisticRegression(solver = i, penalty = 'l2', C = 1,random_state = 0) #pen:
    lr.fit(X_train, y_train)
    
    # Predict using model
    y_pred = lr.predict(X_test)
    
    # Saving accuracy score in table
    acc_table.iloc[j,1] = accuracy_score(y_test,y_pred)
    acc_table.iloc[j,2] = result
    
    j += 1
print(result)
acc_table

In [None]:
j = 0
P_param_range = ['l1', 'l2', 'elasticnet', 'none']
acc_table = pd.DataFrame(columns = ['P_parameter','Accuracy'])
acc_table['P_parameter'] = P_param_range
for i in P_param_range:
    # Apply logistic regression model to training data
    lr = LogisticRegression(solver = 'saga', penalty = i, C = 1,random_state = 0, l1_ratio=0.5)
    lr.fit(X_train, y_train)
    
    # Predict using model
    y_pred = lr.predict(X_test)
    
    # Saving accuracy score in table
    acc_table.iloc[j,1] = accuracy_score(y_test,y_pred)
    j += 1
acc_table