In [283]:
import sqlite3
import numpy as np
import pandas as pd

con = sqlite3.connect("database.sqlite")

In [284]:
# country_id="21518" and league_id="21518" define La Liga
# id="43040" or team_api_id="8633" define Real Madrid
# id="43039" or team_api_id="9783" define RC Deportivo de La Coruña (opponent of Real Madrid in stage 38)
df = pd.read_sql_query('SELECT id, home_team_goal, away_team_goal, '
                       'home_team_api_id, away_team_api_id, '
                       'B365H, B365D, B365A '
                       'FROM Match '
                       'WHERE country_id!="21518" '
                       'ORDER BY season, stage ASC', con)
df_test = pd.read_sql_query('SELECT id, home_team_goal, away_team_goal, '
                       'home_team_api_id, away_team_api_id, '
                       'B365H, B365D, B365A '
                       'FROM Match '
                       'WHERE country_id="21518" '
                       'ORDER BY season, stage ASC', con)
df.dropna(inplace=True)
df_test.dropna(inplace=True)

In [285]:
def prepareDF(dataframe):
    teams = dataframe['home_team_api_id'].unique()
    points = {}
    points_against_teams = {}
    for team_id in teams:
        points[team_id] = {}
        points_against_teams[team_id] = {}
        for team_id_inner in teams:
                if team_id != team_id_inner:
                    points_against_teams[team_id][team_id_inner] = {}
    for i, value in enumerate(dataframe.values):
        home_team_api_id = dataframe['home_team_api_id'].values[i]
        away_team_api_id = dataframe['away_team_api_id'].values[i]
        home_team_goal = dataframe['home_team_goal'].values[i]
        away_team_goal = dataframe['away_team_goal'].values[i]
        id_of_match = dataframe['id'].values[i]
        if home_team_goal > away_team_goal:
            points_against_teams[home_team_api_id][away_team_api_id][id_of_match] = 3
            points_against_teams[away_team_api_id][home_team_api_id][id_of_match] = 0
            points[home_team_api_id][id_of_match] = 3
            points[away_team_api_id][id_of_match] = 0
        elif home_team_goal < away_team_goal:
            points_against_teams[home_team_api_id][away_team_api_id][id_of_match] = 0
            points_against_teams[away_team_api_id][home_team_api_id][id_of_match] = 3
            points[home_team_api_id][id_of_match] = 0
            points[away_team_api_id][id_of_match] = 3
        else:
            points_against_teams[home_team_api_id][away_team_api_id][id_of_match] = 1
            points_against_teams[away_team_api_id][home_team_api_id][id_of_match] = 1
            points[home_team_api_id][id_of_match] = 1
            points[away_team_api_id][id_of_match] = 1
    bet_ratio = []
    home_pitlf = []
    away_pitlf = []
    home_vs_away_pitlf = []
    away_vs_home_pitlf = []
    for i, value in enumerate(dataframe.values):
        home_team_api_id = dataframe['home_team_api_id'].values[i]
        away_team_api_id = dataframe['away_team_api_id'].values[i]
        id_of_match = dataframe['id'].values[i]
        bet_ratio = dataframe['B365H'].values[i] / dataframe['B365A'].values[i]
        pitlf = 0
        for j, key in enumerate(points[home_team_api_id].keys()):
            if key == id_of_match:
                if j-5 >= 0:
                    for k, point in enumerate(points[home_team_api_id].values()):
                        if j - 5 <= k < j:
                            pitlf += point
                    home_pitlf.append(pitlf)
                else:
                    home_pitlf.append(np.nan)
        pitlf = 0
        for j, key in enumerate(points[away_team_api_id].keys()):
            if key == id_of_match:
                if j-5 >= 0:
                    for k, point in enumerate(points[away_team_api_id].values()):
                        if j - 5 <= k < j:
                            pitlf += point
                    away_pitlf.append(pitlf)
                else:
                    away_pitlf.append(np.nan)
        pitlf = 0
        for j, key in enumerate(points_against_teams[home_team_api_id][away_team_api_id].keys()):
            if key == id_of_match:
                if j-5 >= 0:
                    for k, point in enumerate(points_against_teams[home_team_api_id][away_team_api_id].values()):
                        if j - 5 <= k < j:
                            pitlf += point
                    home_vs_away_pitlf.append(pitlf)
                else:
                    home_vs_away_pitlf.append(np.nan)
        pitlf = 0
        for j, key in enumerate(points_against_teams[away_team_api_id][home_team_api_id].keys()):
            if key == id_of_match:
                if j-5 >= 0:
                    for k, point in enumerate(points_against_teams[away_team_api_id][home_team_api_id].values()):
                        if j - 5 <= k < j:
                            pitlf += point
                    away_vs_home_pitlf.append(pitlf)
                else:
                    away_vs_home_pitlf.append(np.nan)
    dataframe['bet_ratio'] = bet_ratio
    dataframe['home_pitlf'] = home_pitlf
    dataframe['away_pitlf'] = away_pitlf
    dataframe['home_vs_away_pitlf'] = home_vs_away_pitlf
    dataframe['away_vs_home_pitlf'] = away_vs_home_pitlf
    dataframe.dropna(inplace=True)

In [286]:
prepareDF(df)
prepareDF(df_test)

In [287]:
import pyro
import pyro.distributions as dist
from pyro.nn import PyroModule, PyroSample
import torch
import torch.nn as nn
from pyro.infer.autoguide import AutoDiagonalNormal
from pyro.infer import SVI, Trace_ELBO, Predictive

In [288]:
class BayesianRegression(PyroModule):
    def __init__(self, input_features=4, h1=20, h2=20):
        super().__init__()
        self.input_features=input_features
        self.fc1 = PyroModule[nn.Linear](input_features, h1)
        self.fc1.weight = PyroSample(dist.Normal(0., 1.).expand([h1, input_features]).to_event(2))
        self.fc1.bias = PyroSample(dist.Normal(0., 1.).expand([h1]).to_event(1))
        self.fc2 = PyroModule[nn.Linear](h1, h2)
        self.fc2.weight = PyroSample(dist.Normal(0., 1.).expand([h2, h1]).to_event(2))
        self.fc2.bias = PyroSample(dist.Normal(0., 1.).expand([h2]).to_event(1))
        self.fc3 = PyroModule[nn.Linear](h2, 1)
        self.fc3.weight = PyroSample(dist.Normal(0., 1.).expand([1, h2]).to_event(2))
        self.fc3.bias = PyroSample(dist.Normal(0., 1.).expand([1]).to_event(1))
        self.relu = nn.ReLU()

    def forward(self, x, y=None):
        x = x.reshape(-1, self.input_features)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        mu = self.fc3(x).squeeze()
        sigma = pyro.sample("sigma", dist.Uniform(0., 1.))
        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Normal(mu, sigma), obs=y)
        return mu

In [289]:
model = BayesianRegression()
guide = AutoDiagonalNormal(model)
adam = pyro.optim.Adam({"lr": 1e-3})
svi = SVI(model, guide, adam, loss=Trace_ELBO())

In [290]:
x_data = torch.tensor([df['home_pitlf'].values, df['away_pitlf'].values, df['home_vs_away_pitlf'].values, df['away_vs_home_pitlf'].values], dtype=torch.float)
x_data_test = torch.tensor([df_test['home_pitlf'].values, df_test['away_pitlf'].values, df_test['home_vs_away_pitlf'].values, df_test['away_vs_home_pitlf'].values], dtype=torch.float)
y_data = torch.tensor(df['bet_ratio'].values, dtype=torch.float)


In [291]:
num_iterations = 2000
pyro.clear_param_store()
for j in range(num_iterations):
    # calculate the loss and take a gradient step
    loss = svi.step(x_data, y_data)
    if j % 100 == 0:
        print("[iteration %04d] loss: %.4f" % (j + 1, loss / len(x_data)))

[iteration 0001] loss: 141726.2841
[iteration 0101] loss: 19337.1310
[iteration 0201] loss: 11034.8644
[iteration 0301] loss: 6518.6449
[iteration 0401] loss: 5814.6074
[iteration 0501] loss: 2663.4163
[iteration 0601] loss: 8237.4140
[iteration 0701] loss: 2752.7416
[iteration 0801] loss: 1705.2802
[iteration 0901] loss: 851.0010
[iteration 1001] loss: 1522.2543
[iteration 1101] loss: 8223.1542
[iteration 1201] loss: 999.7855
[iteration 1301] loss: 1240.1978
[iteration 1401] loss: 563.5156
[iteration 1501] loss: 887.9284
[iteration 1601] loss: 769.7806
[iteration 1701] loss: 824.3283
[iteration 1801] loss: 786.0371
[iteration 1901] loss: 1015.9442


In [292]:
predictive = Predictive(model, guide=guide, num_samples=500)
preds = predictive(x_data_test)

y_pred = preds['obs'].T.detach().numpy().mean(axis=1)
y_std = preds['obs'].T.detach().numpy().std(axis=1)

In [293]:
error = 0
within = 0
for i, bet in enumerate(y_pred):
    error += abs(bet - df_test['bet_ratio'].values[i])
    if abs(bet - df_test['bet_ratio'].values[i]) < 0.5:
        within += 1
avg_error = error / len(y_pred)
within_percent = within / len(y_pred)
print(avg_error)
print(within_percent)


0.242338105587302
0.9992289899768697
