In [1]:
import numpy as np
import pandas as pd

import joblib

data = pd.read_csv('test_set.csv')

model = joblib.load('lr_pipeline.pkl')

In [2]:
# Cleaning
data = data.loc[data['Purp_Gold']!=0]
data = data.loc[data['Blue_Gold']!=0]
data = data.reset_index(drop=True)

# Too early to call
data = data.loc[(data['Blue_Gold']>=5000)&(data['Purp_Gold']>=5000)]

# Some games had bookies' odds switched
def swap_odds(data):
    data = pd.concat([data.iloc[:, :-5], data['Purp_BookOdds'], 
                      data['Blue_BookOdds'], data.iloc[:, -3:]], axis=1)
    data = data.rename(columns={'Purp_BookOdds':'Blue_BookOdds',
                                'Blue_BookOdds':'Purp_BookOdds'})
    return data

for game in [9, 17, 22]:
    curr_data = data.loc[data['GameID']==game]
    curr_data = swap_odds(curr_data)
    
    data = data.loc[data['GameID']!=game]
    data = pd.concat([data, curr_data], axis=0)
    data = data.sort_index()

# Reset the index
data = data.reset_index(drop=True)

In [3]:
data_prob = pd.concat([data.iloc[:, :-5], 1 / (data.iloc[:, -5:-1] / 0.93), data['Blue_Won']], axis=1)

ys = data.iloc[:, -5:]
ys_prob = data_prob.iloc[:, -5:]

# Indices
blue_won = ys.loc[ys['Blue_Won']==1].index
blue_lost = ys.loc[ys['Blue_Won']==0].index

Brier's score for metric<br>
Account for independance<br>
Use different metric

In [4]:
def compute_brier(y_true, y_pred, y_pred_books):
    score = sum((y_pred - y_true) ** 2) / len(y_pred)
    book_score = sum((y_pred_books - y_true) ** 2) / len(y_pred_books)
    
    return [score, book_score]
    
    print('---------------------')
    print('Your score:', round(score, 4),
          '\nBookies\' score:', round(book_score, 4))
    print('---------------------')

In [5]:
y1_brier = compute_brier(1, ys_prob.iloc[blue_won]['Blue_ModelOdds'], ys_prob.iloc[blue_won]['Blue_BookOdds'])
y0_brier = compute_brier(0, ys_prob.iloc[blue_lost]['Purp_ModelOdds'], ys_prob.iloc[blue_lost]['Purp_BookOdds'])

print('Brier scores when y = 1:\n---------------------\nModel score:', round(y1_brier[0], 4), '\nBookier\' score:',
      round(y1_brier[1], 4), '\n---------------------\n')

print('Brier scores when y = 0:\n---------------------\nModel score:', round(y0_brier[0], 4), '\nBookier\' score:',
      round(y0_brier[1], 4), '\n---------------------')

Brier scores when y = 1:
---------------------
Model score: 0.1011 
Bookier' score: 0.1535 
---------------------

Brier scores when y = 0:
---------------------
Model score: 0.4437 
Bookier' score: 0.4278 
---------------------


In [6]:
from sklearn.metrics import confusion_matrix

print('Model predictions confusion matrix:')
display(confusion_matrix(ys_prob['Blue_ModelOdds'].round(), ys_prob['Blue_Won']))

print('\nBookies\' predictions confusion matrix:')
display(confusion_matrix(ys_prob['Blue_BookOdds'].round(), ys_prob['Blue_Won']))

Model predictions confusion matrix:


array([[365,  71],
       [170, 556]], dtype=int64)


Bookies' predictions confusion matrix:


array([[417, 114],
       [118, 513]], dtype=int64)

*Layout: 

TN | FP<br>
FN | TP

In [7]:
from sklearn.metrics import accuracy_score

print('Model accuracy:')
display(round(accuracy_score(ys_prob['Blue_ModelOdds'].round(), ys_prob['Blue_Won']), 4))

print('\nBookies accuracy:')
display(round(accuracy_score(ys_prob['Blue_BookOdds'].round(), ys_prob['Blue_Won']), 4))

Model accuracy:


0.7926


Bookies accuracy:


0.8003

Say that these "base" metrics don't account for independance

Fix: Random sampling distribution from each mach (1 game from each match, aggregation, repeat)

In [8]:
# Weighted sample
accuracy_scores = []
brier_scores = []

for _ in range(5):
    samples = []
    
    for game in data_prob['GameID'].unique():
        curr_data = data_prob.loc[data_prob['GameID']==game]
        samples.append(curr_data.sample(1).index[0])
        
    data_sample = data_prob.iloc[samples][['Blue_BookOdds', 'Blue_ModelOdds', 'Blue_Won']]
    
    accuracy_scores.append([accuracy_score(data_sample['Blue_ModelOdds'].round(), data_sample['Blue_Won']),
                            accuracy_score(data_sample['Blue_BookOdds'].round(), data_sample['Blue_Won'])])
    brier_scores.append(compute_brier(data_sample['Blue_Won'], data_sample['Blue_ModelOdds'],
                                      data_sample['Blue_BookOdds']))
    
    samples = []

In [9]:
model_acc_mean = np.mean([i[0] for i in accuracy_scores])
book_acc_mean = np.mean([i[1] for i in accuracy_scores])

model_brier_mean = np.mean([i[0] for i in brier_scores])
book_brier_mean = np.mean([i[1] for i in brier_scores])

In [10]:
print(21*'-' + '\nAccuracy:\n\nModel:', round(model_acc_mean, 4), '\nBookies:', round(book_acc_mean, 4), '\n' + 21*'-',
      '\n\n' + 21*'-' + '\nBrier:\n\nModel:', round(model_brier_mean, 4), '\nBookeis:', round(book_brier_mean, 4),
      '\n' + 21*'-')

---------------------
Accuracy:

Model: 0.8545 
Bookies: 0.8091 
--------------------- 

---------------------
Brier:

Model: 0.1303 
Bookeis: 0.1671 
---------------------


In [34]:
data_prob.loc[(data_prob['Blue_Won']==1)&(data_prob['Blue_ModelOdds']>0.5)&(data_prob['Blue_BookOdds']<0.5)]

Unnamed: 0,GameID,Blue_Team,Purp_Team,Blue_Tower,Blue_Inhib,Blue_Baron,Blue_Dragon,Purp_Tower,Purp_Inhib,Purp_Baron,Purp_Dragon,Blue_Kills,Blue_Assists,Blue_Gold,Purp_Kills,Purp_Assists,Purp_Gold,Blue_BookOdds,Purp_BookOdds,Blue_ModelOdds,Purp_ModelOdds,Blue_Won
268,3.0,G2 Esports,Fnatic,0,0,0,1,0,0,0,0,2,1,13072,3,2,12965,0.430556,0.574074,0.550296,0.449275,1
269,3.0,G2 Esports,Fnatic,0,0,0,1,0,0,0,0,2,1,13206,3,2,13110,0.430556,0.574074,0.547059,0.451456,1
270,3.0,G2 Esports,Fnatic,0,0,0,1,0,0,0,0,2,1,13587,3,2,13513,0.467337,0.537572,0.54386,0.453659,1
271,3.0,G2 Esports,Fnatic,0,0,0,1,0,0,0,0,2,1,14020,3,2,13646,0.467337,0.537572,0.588608,0.413333,1
272,3.0,G2 Esports,Fnatic,0,0,0,1,0,0,0,0,2,1,14173,3,2,13889,0.467337,0.537572,0.574074,0.424658,1
273,3.0,G2 Esports,Fnatic,0,0,0,1,0,0,0,0,2,1,14669,3,2,14108,0.467337,0.537572,0.611842,0.3875,1
752,14.0,Legacy Esports,INTZ,5,1,1,1,1,0,0,4,8,12,55981,9,26,49617,0.349624,0.673913,0.93,0.014294,1
753,14.0,Legacy Esports,INTZ,5,1,1,1,1,0,0,4,8,12,56105,9,26,50096,0.349624,0.673913,0.93,0.017429,1
754,14.0,Legacy Esports,INTZ,5,1,1,1,1,0,0,4,8,12,56105,9,26,50096,0.372,0.65035,0.93,0.017429,1
755,14.0,Legacy Esports,INTZ,5,1,1,1,1,0,0,4,8,12,56235,9,26,50354,0.372,0.65035,0.93,0.018716,1


In [24]:
data_prob.shape

(1299, 22)

In [25]:
pd.set_option('max_columns', data_prob.shape[1])
pd.set_option('max_rows', data_prob.shape[0])

y = 0<br>
I was right, books weren't:80<br>
Books were right, I wasn't:106<br><br>
y = 1<br>
I was right, books weren't:112<br>
Books were right, I wasn't:66<br>