In [95]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

seed = 1234
np.random.seed(seed)  

In [96]:
M = np.load('data/winrate_mat.npy')
X, y = M[:, :-1], M[:, -1]

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.25, random_state=seed)

### Baseline #1: Sum of winrates

In [97]:
y_pred = np.sum(X[:, :5], axis=1) > np.sum(X[:, 5:10], axis=1)

In [98]:
accuracy_score(y, y_pred)

0.5507513797459938

### Baseline #2: Majority of winrates by position

In [99]:
count = np.sum(X[:, :5] > X[:, 5:10], axis=1)

In [100]:
y_pred = count > 5/2
accuracy_score(y, y_pred)

0.542871866480484

### Baseline #3: Majority of winrates by sorted order

In [101]:
count = np.sum(np.sort(X[:, :5], axis=1) > np.sort(X[:, 5:10], axis=1), axis=1)

In [102]:
y_pred = count > 5/2
accuracy_score(y, y_pred)

0.5499368309063103

### Baseline #4: Median winrates

In [103]:
y_pred = np.median(X[:, :5], axis=1) > np.mean(X[:, 5:10], axis=1)
accuracy_score(y, y_pred)

0.5451825254338719