In [1]:
# import statements
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup, Comment
import time
from datetime import datetime
from collections import defaultdict
import matplotlib.pyplot as plt
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
# Import necessary modules
from sklearn.model_selection import train_test_split,GridSearchCV,cross_validate,cross_val_predict
from sklearn.metrics import mean_squared_error
from math import sqrt
import sklearn.metrics
from sklearn.metrics import r2_score
from sklearn.metrics import classification_report,confusion_matrix,make_scorer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
#from basketball_reference_web_scraper import 
import pickle
import random
from client import Nba_Season
# from skopt import BayesSearchCV
# from skopt.space import Real, Integer, Categorical
from collections import defaultdict
import NeuralNet as nn
# import autobnn as ab
import tensorflow as tf
import numpy as np
# import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import pyro
import pyro.distributions as dist
from pyro.infer import SVI, Trace_ELBO
from pyro.nn import PyroModule, PyroSample
import torch
import torch.nn as nn
from torch.nn import Softmax

  from .autonotebook import tqdm as notebook_tqdm


## Define simple BNN using Pyro containing 1 hidden layer

In [2]:
# load old samples and features
features = np.genfromtxt('old_samps_feats/2015-2023_nba_features_norm_inj.csv',delimiter=',')
samples = np.genfromtxt('old_samps_feats/2015-2023_nba_samples_inj.csv',delimiter=',')
samples_1d = [0 if j[0] == 0 else 1 for j in samples]
feat_train, feat_test, samp_train, samp_test = train_test_split(features, samples, test_size=0.25, random_state=1)
x_train = torch.FloatTensor(feat_train)
x_test = torch.FloatTensor(feat_test)
y_train = torch.FloatTensor(samp_train)
y_test = torch.FloatTensor(samp_test)

In [64]:
class BNN(PyroModule):
    def __init__(self, in_dim=1, out_dim=1, hid_dim=5, prior_scale=1.):
        super().__init__()

        self.activation = nn.ReLU()
        self.layer1 = PyroModule[nn.Linear](in_dim, hid_dim)
        self.layer2 = PyroModule[nn.Linear](hid_dim, out_dim)

        # Set layer parameters as random variables
        self.layer1.weight = PyroSample(dist.Normal(0., prior_scale).expand([in_dim, hid_dim]).to_event(2))
        self.layer1.bias = PyroSample(dist.Normal(0., prior_scale).expand([hid_dim,]).to_event(1))
        self.layer2.weight = PyroSample(dist.Normal(0., prior_scale).expand([hid_dim, out_dim]).to_event(2))
        self.layer2.bias = PyroSample(dist.Normal(0., prior_scale).expand([out_dim,]).to_event(1)) # output bias term

    def forward(self, x, y=None):
        z1 = self.activation(x @ self.layer1.weight + self.layer1.bias) # pass thru first layer
        z2 = self.activation(z1 @ self.layer2.weight + self.layer2.bias) # pass thru output layer

        with pyro.plate("data"):
            y_hat = Softmax(dim=1)(z2) # scale output via softmax for setting categorical priors
            # y_hat = Softmax(dim=0)(x)
            obs = pyro.sample("obs", dist.Categorical(y_hat), obs=y)

        return z2

In [66]:
from pyro.infer import MCMC, NUTS

model = BNN(in_dim=16,hid_dim=16,out_dim=2)

pyro.set_rng_seed(1)

# Define Hamiltonian Monte Carlo (HMC) kernel
# NUTS = "No-U-Turn Sampler" (https://arxiv.org/abs/1111.4246), gives HMC an adaptive step size
nuts_kernel = NUTS(model, jit_compile=False)  # jit_compile=True is faster but requires PyTorch 1.6+

# Define MCMC sampler, get 50 posterior samples
mcmc = MCMC(nuts_kernel, num_samples=50)

mcmc.run(x_train, y_train.T)

Warmup:   0%|          | 0/100 [00:00, ?it/s]

Sample: 100%|██████████| 100/100 [07:31,  4.51s/it, step size=1.17e-02, acc. prob=0.450]


In [81]:
from pyro.infer import Predictive

# get predictions based on posteriors
predictive = Predictive(model=model, posterior_samples=mcmc.get_samples(), return_sites=['obs','_RETURN'])

train_preds = predictive(x_train)['obs'].T.float().mean(axis=1) # each x in training produces 50 predictions (0 or 1), take average
test_preds = predictive(x_test)['obs'].T.float().mean(axis=1)

adj_train_preds = [0 if p < 0.5 else 1 for p in train_preds]
adj_test_preds = [0 if p < 0.5 else 1 for p in test_preds]
y_train_1d = [0 if j[0] == 0 else 1 for j in y_train] # [0,1] -> [away,home] -> 0 indicates home win, 1 indicates away
y_test_1d = [0 if j[0] == 0 else 1 for j in y_test]

print('---TRAINING SET---')
print('TN, FP, FN, TP')
print(confusion_matrix(y_train_1d,adj_train_preds).ravel())
print(classification_report(y_train_1d,adj_train_preds))
print('---TEST SET---')
print('TN, FP, FN, TP')
print(confusion_matrix(y_test_1d,adj_test_preds).ravel())
print(classification_report(y_test_1d,adj_test_preds))

---TRAINING SET---
TN, FP, FN, TP
[1969 2431 1894 2343]
              precision    recall  f1-score   support

           0       0.51      0.45      0.48      4400
           1       0.49      0.55      0.52      4237

    accuracy                           0.50      8637
   macro avg       0.50      0.50      0.50      8637
weighted avg       0.50      0.50      0.50      8637

---TEST SET---
TN, FP, FN, TP
[678 813 596 793]
              precision    recall  f1-score   support

           0       0.53      0.45      0.49      1491
           1       0.49      0.57      0.53      1389

    accuracy                           0.51      2880
   macro avg       0.51      0.51      0.51      2880
weighted avg       0.51      0.51      0.51      2880



## Define kelly critereon to take in average prediction score and make bets

In [78]:
def kelly(home_pred,away_pred,home_line,away_line,max_bet=100):
    '''
    Applies kelly critereon based on features and moneyline data
    home_pred: Prediction from MLP for home team
    away_pred: Prediction from MLP for away team
    home_line: Moneyline for home team
    away_line: Moneyline for away team
    '''
    bet_amount = 0
    to_win = 0

    log_home = home_pred - home_pred * away_pred / (home_pred + away_pred - (2*home_pred*away_pred))
    log_away = away_pred - home_pred * away_pred / (home_pred + away_pred - (2*home_pred*away_pred))

    # calculate ratio and implied for home
    home_line_adj = home_line
    away_line_adj = away_line
    if home_line < 0:
        home_line_adj *= -1
        home_line_adj /= 100
        home_ratio = 1/(home_line_adj)
        implied_home = home_line_adj/(1+home_line_adj)
    else:
        home_line_adj /= 100
        home_ratio = home_line_adj
        implied_home = 1/(home_line+1)

    # calculate ratio and implied for away
    if away_line < 0:
        away_line_adj *= -1
        away_line_adj /= 100
        away_ratio = 1/(away_line_adj)
        implied_away = away_line_adj/(1+away_line_adj)
    else:
        away_line_adj /= 100
        away_ratio = away_line_adj
        implied_away = 1/(away_line_adj+1)
    
    diff_home = log_home - implied_home
    diff_away = log_away - implied_away

    kelly_home = log_home - (log_away/home_ratio)
    kelly_away = log_away - (log_home/away_ratio)

    prob = 0

    # make bets, negative if away team bet
    if diff_home > diff_away and diff_home > 0.05:
        bet_amount = (max_bet*kelly_home)
        if home_line < 0:
            to_win = bet_amount/((home_line*-1)/100)
        else:
            to_win = bet_amount/((home_line)/100)
        prob = home_pred

    
    elif diff_away > diff_home and diff_away > 0.05:
        bet_amount = (max_bet*kelly_away)
        if away_line < 0:
            to_win = -1*bet_amount/((away_line*-1)/100)
        else:
            to_win = -1*bet_amount/((away_line)/100)
        prob = away_pred

    return bet_amount,to_win,prob

def BNN_kelly(preds,actual,money_lines):
    money_made = 0
    money_risked = 0
    correct = 0
    guessed = 0
    team_bet = []
    amount = []
    gained = []
    probs = []     

    for i in range(len(preds)):
        home_pred = preds[i]
        away_pred = 1 - home_pred
        home_ml = money_lines[i][7]
        away_ml = money_lines[i][10]

        to_bet,to_win,prob = kelly(home_pred,away_pred,home_ml,away_ml)
        probs.append(prob)
        money_risked += to_bet

        curr_gained = 0

        if to_win < 0:
            team_bet.append('Away')
            amount.append(to_bet)
            guessed += 1
            curr_gained = -1*to_bet
            if actual[i] == 1:
                correct += 1
                curr_gained = (-1*to_win)
                #money_made += curr_gained
        elif to_win > 0:
            team_bet.append('Home')
            amount.append(to_bet)
            guessed += 1
            curr_gained = -1*to_bet
            if actual[i] == 0:
                correct += 1
                curr_gained = to_win
                #money_made += curr_gained
        else:
            team_bet.append(0)
            amount.append(0)

        gained.append(curr_gained)

        if curr_gained > 0:
            money_made += curr_gained

    return correct,guessed,team_bet,probs,amount,gained
        

In [79]:
# Get 2022-2023 game data and betting information
features_new = np.genfromtxt('old_samps_feats/2022-2023_nba_features_inj.csv',delimiter=',')
samples_new = np.genfromtxt('old_samps_feats/2022-2023_nba_samples_inj.csv',delimiter=',')
bet_data = np.genfromtxt('with_bets/2022-2023_season.csv',delimiter=',')

samples_new_1d = [0 if j[0] == 0 else 1 for j in samples_new]

new_x_tens = torch.FloatTensor(features_new)
new_y_tens = torch.FloatTensor(samples_new)

new_predictive = Predictive(model=model, posterior_samples=mcmc.get_samples(),return_sites=("obs","_RETURN"))
new_y_pred = new_predictive(new_x_tens)['obs'].T.float().mean(axis=1)

correct,guessed,team_bet,probs,amount,gained = BNN_kelly(new_y_pred,samples_new_1d,bet_data[1:])
#print(money_risked-money_made)
print(correct)
print(guessed)
print(sum(gained))

48
104
tensor(-465.6457)
