# Given info about fighter 1 and fighter 2 -> predict the winner

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, train_test_split
import torch
import os
from tqdm.auto import tqdm as tqdmauto

  from .autonotebook import tqdm as notebook_tqdm


In [71]:
df = pd.read_csv('data/cleaned_data.csv')

In [3]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [4]:
train_evs = train_df[["R_ev", "B_ev"]].to_numpy()
X_train = train_df.drop(["R_fighter", "B_fighter", "Winner", "R_ev", "B_ev"], axis=1).to_numpy()
y_train = train_df[["Winner"]].to_numpy()
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)

test_evs = test_df[["R_ev", "B_ev"]].to_numpy()
X_test = test_df.drop(["R_fighter", "B_fighter", "Winner", "R_ev", "B_ev"], axis=1).to_numpy()
y_test = test_df[["Winner"]].to_numpy()
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)

train_evs = torch.tensor(train_evs, dtype=torch.float32)
test_evs = torch.tensor(test_evs, dtype=torch.float32)

Much code from https://machinelearningmastery.com/building-a-binary-classification-model-in-pytorch/

In [5]:
# model architectures
import torch.nn as nn
class ShallowBinaryClassifier(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.hidden = nn.Linear(input_size, 180)
        self.relu = nn.ReLU()
        self.output = nn.Linear(180, 1)
        self.sigmoid = nn.Sigmoid()
 
    def forward(self, x):
        x = self.relu(self.hidden(x))
        x = self.sigmoid(self.output(x))
        return x

class DeepBinaryClassifier(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.layer1 = nn.Linear(input_size, 60)
        self.act1 = nn.ReLU()
        self.layer2 = nn.Linear(60, 60)
        self.act2 = nn.ReLU()
        self.layer3 = nn.Linear(60, 60)
        self.act3 = nn.ReLU()
        self.output = nn.Linear(60, 1)
        self.sigmoid = nn.Sigmoid()
 
    def forward(self, x):
        x = self.act1(self.layer1(x))
        x = self.act2(self.layer2(x))
        x = self.act3(self.layer3(x))
        x = self.sigmoid(self.output(x))
        return x

# Train for winner

In [19]:
# train 
import copy
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
 
def model_train_winner(model, X_train, y_train, X_val, y_val, n_epochs = 250, batch_size=10, lr=0.0001  ):
    # loss function and optimizer
    loss_fn = nn.BCELoss()  # binary cross entropy
    optimizer = optim.Adam(model.parameters(), lr=lr)
    batch_start = torch.arange(0, len(X_train), batch_size)
 
    # Hold the best model
    best_acc = - np.inf   # init to negative infinity
    best_weights = None
 
    for epoch in tqdmauto(range(n_epochs)):
        model.train()
        with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=True) as bar:
            bar.set_description(f"Epoch {epoch}")
            for start in bar:
                # take a batch
                X_batch = X_train[start:start+batch_size]
                y_batch = y_train[start:start+batch_size]
                # forward pass
                y_pred = model(X_batch)
                loss = loss_fn(y_pred, y_batch)
                # backward pass
                optimizer.zero_grad()
                loss.backward()
                # update weights
                optimizer.step()
                # print progress
                acc = (y_pred.round() == y_batch).float().mean()
                bar.set_postfix(
                    loss=float(loss),
                    acc=float(acc)
                )
        # evaluate accuracy at end of each epoch
        if (epoch % 10 == 0):
            model.eval()
            y_pred = model(X_val)
            acc = (y_pred.round() == y_val).float().mean()
            acc = float(acc)
            print("Acc:", acc)
            if acc > best_acc:
                best_acc = acc
                best_weights = copy.deepcopy(model.state_dict())
    # restore model and return best accuracy
    model.load_state_dict(best_weights)
    return best_acc

In [20]:
def train(train_func, model, X_train, y_train, X_test, y_test, n_epochs = 250, batch_size=10, lr=0.0001):
	
	acc = train_func(model, X_train, y_train, X_test, y_test, epochs, batch_size, lr)
	print(f"Performance: {acc*100:.2f}%")



In [23]:
train_func = model_train_winner
model = ShallowBinaryClassifier(X_train.shape[1])
# model = DeepBinaryClassifier(X_train.shape[1])
epochs = 50
batch_size = 10
lr = 0.0001

train(train_func, model, X_train, y_train, X_test, y_test, epochs, batch_size, lr)

  2%|▏         | 1/50 [00:00<00:24,  1.99it/s]

Acc: 0.584269642829895


 22%|██▏       | 11/50 [00:04<00:15,  2.53it/s]

Acc: 0.6026557683944702


 42%|████▏     | 21/50 [00:08<00:11,  2.59it/s]

Acc: 0.5955055952072144


 62%|██████▏   | 31/50 [00:12<00:07,  2.50it/s]

Acc: 0.6077630519866943


 82%|████████▏ | 41/50 [00:16<00:03,  2.49it/s]

Acc: 0.6077630519866943


100%|██████████| 50/50 [00:19<00:00,  2.51it/s]

Performance: 60.78%





In [None]:
#TODO test sometimes during the run, every 50 epochs perhaps?

We do not only care about the winner, but also the odds, since some fights will be easier to predict, due to the odds being very high for one of the fighters. Therefore, calculate how much you earn when compared to the odds

In [95]:
# make a validation func to see how much you would earn based on odds

def test_odds_performance(X_test, y_test, test_evs, description="Training sequence i \n", result_txt_file_path="results.txt"):
	# load the best model
	model.eval()
	with torch.no_grad():
		y_pred = model(X_test).round()

		# set y pred to random numbers
		# y_pred = torch.randint(0, 2, (len(y_test), 1)).float() #TODO remove after
		# print(y_pred)
		correct = (y_pred == y_test).float()

		predicted_earnings, potential_earnings = 0, 0

		for i in range(len(correct)):
			potential_earnings += test_evs[i][y_test[i].long()].item()
			is_correct = correct[i].item()
			if (is_correct == 1):
				predicted_earnings += test_evs[i][y_test[i].long()].item() #what you win if you bet 100
			else:
				predicted_earnings -= 100 # bet 100 each time
		print(description)
		print(f"For {len(y_test)} test samples: ")
		print("Accuracy: ", round(torch.mean(correct).item(),4))
		print("When betting 100: ")
		print("Predicted earnings: ", round(predicted_earnings,2))
		print("Potential earnings: ", round(potential_earnings, 2))


In [94]:
description = f"Model with params: epochs: {epochs}, bs: {batch_size}, lr: {lr}"
test_odds_performance(X_test, y_test, test_evs, description=description)

Model with params: epochs: 50, bs: 10, lr: 0.0001
For 979 test samples: 
Accuracy:  0.61
When betting 100: 
Predicted earnings:  85254.83
Potential earnings:  169201.78


In [96]:
#sanity check no overlap between train and test
print("Train and test indices do not overlap: ", len(set(train_df.index).intersection(set(test_df.index))) == 0)


Train and test indices do not overlap:  True


If the predicted earnings are positive, you are making money