In [1]:
!pip install pyro-ppl

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyro-ppl
  Downloading pyro_ppl-1.8.4-py3-none-any.whl (730 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m730.7/730.7 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
Collecting pyro-api>=0.1.1 (from pyro-ppl)
  Downloading pyro_api-0.1.2-py3-none-any.whl (11 kB)
Installing collected packages: pyro-api, pyro-ppl
Successfully installed pyro-api-0.1.2 pyro-ppl-1.8.4


In [34]:
import numpy as np
import pandas as pd  
from matplotlib import pyplot as plt
from sklearn import linear_model
import torch

import pyro
import pyro.distributions as dist
from pyro.contrib.autoguide import AutoDiagonalNormal, AutoMultivariateNormal, AutoLowRankMultivariateNormal
from pyro.infer import MCMC, NUTS, HMC, SVI, Trace_ELBO, Predictive
from pyro.optim import Adam, ClippedAdam



# matplotlib options
plt.style.use('ggplot')
%matplotlib inline
plt.rcParams['figure.figsize'] = (8, 6)

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
dfgames = pd.read_csv('/content/drive/MyDrive/data/games/games.csv')
dfteams = pd.read_csv('/content/drive/MyDrive/data/games/teams.csv')
dfplayers = pd.read_csv('/content/drive/MyDrive/data/games/players.csv')
dfranking = pd.read_csv('/content/drive/MyDrive/data/games/ranking.csv')
dfgamebox = pd.read_csv('/content/drive/MyDrive/data/games/games_details.csv',low_memory=False)
dfplayerpergame = pd.read_csv('/content/drive/MyDrive/data/NBAstats/Player-Per-Game.csv')
dfplayerdata = pd.read_csv('/content/drive/MyDrive/data/player_seasonal/all_seasons.csv')

dfgames['GAME_DATE_EST'] = pd.to_datetime(dfgames['GAME_DATE_EST'])

In [4]:

dfgames.columns

Index(['GAME_DATE_EST', 'GAME_ID', 'GAME_STATUS_TEXT', 'HOME_TEAM_ID',
       'VISITOR_TEAM_ID', 'SEASON', 'TEAM_ID_home', 'PTS_home', 'FG_PCT_home',
       'FT_PCT_home', 'FG3_PCT_home', 'AST_home', 'REB_home', 'TEAM_ID_away',
       'PTS_away', 'FG_PCT_away', 'FT_PCT_away', 'FG3_PCT_away', 'AST_away',
       'REB_away', 'HOME_TEAM_WINS'],
      dtype='object')

In [5]:
training_games=dfgames[(dfgames["SEASON"] == 2015) |(dfgames["SEASON"] == 2016) |(dfgames["SEASON"] == 2017) |(dfgames["SEASON"] == 2018) | (dfgames["SEASON"] == 2019) | (dfgames["SEASON"] == 2020) | (dfgames["SEASON"] == 2021)]
testing_games=dfgames[(dfgames["SEASON"] == 2022)]

In [None]:
home_stats=[]
away_stats=[]
for i in range(len(training_games)):
         home_id,away_id=training_games["HOME_TEAM_ID"].iloc[i],training_games["VISITOR_TEAM_ID"].iloc[i]
         game_date=training_games["GAME_DATE_EST"].iloc[i]
         home_matches=dfgames[ ((dfgames["HOME_TEAM_ID"] == home_id)	| (dfgames["VISITOR_TEAM_ID"] == home_id)) & (dfgames["GAME_DATE_EST"]  < game_date)].head()
         away_matches=dfgames[ ((dfgames["HOME_TEAM_ID"] == away_id)	| (dfgames["VISITOR_TEAM_ID"] == away_id)) & (dfgames["GAME_DATE_EST"]  < game_date)].head()
         
         idx_home,idx_away=((home_matches["HOME_TEAM_ID"] == home_id).values).astype(int),((home_matches["HOME_TEAM_ID"] == away_id).values).astype(int)
         idx_home[idx_home == 0] = -1
         idx_away[idx_away == 0] = -1

         home_stats.append(( (home_matches["PTS_home"]-home_matches["PTS_away"]) *idx_home).values) 
         away_stats.append(( (away_matches["PTS_home"]-away_matches["PTS_away"]) *idx_away).values) 

home_stats_test=[]
away_stats_test=[]
for i in range(len(testing_games)):
         home_id,away_id=testing_games["HOME_TEAM_ID"].iloc[i],testing_games["VISITOR_TEAM_ID"].iloc[i]
         game_date=testing_games["GAME_DATE_EST"].iloc[i]
         home_matches=dfgames[ ((dfgames["HOME_TEAM_ID"] == home_id)	| (dfgames["VISITOR_TEAM_ID"] == home_id)) & (dfgames["GAME_DATE_EST"]  < game_date)].head()
         away_matches=dfgames[ ((dfgames["HOME_TEAM_ID"] == away_id)	| (dfgames["VISITOR_TEAM_ID"] == away_id)) & (dfgames["GAME_DATE_EST"]  < game_date)].head()
         
         idx_home,idx_away=((home_matches["HOME_TEAM_ID"] == home_id).values).astype(int),((home_matches["HOME_TEAM_ID"] == away_id).values).astype(int)
         idx_home[idx_home == 0] = -1
         idx_away[idx_away == 0] = -1

         home_stats_test.append(( (home_matches["PTS_home"]-home_matches["PTS_away"]) *idx_home).values) 
         away_stats_test.append(( (away_matches["PTS_home"]-away_matches["PTS_away"]) *idx_away).values) 

In [None]:
home_stats=torch.tensor(home_stats).float()
away_stats=torch.tensor(away_stats).float()

home_stats_test=torch.tensor(home_stats_test).float()
away_stats_test=torch.tensor(away_stats_test).float()

In [None]:
y=torch.tensor(training_games["HOME_TEAM_WINS"].values).float()
y_test=torch.tensor(testing_games["HOME_TEAM_WINS"].values).float()

In [None]:
from pyro.nn import PyroModule, PyroSample
import torch.nn as nn
class FFNN(PyroModule):
    def __init__(self, n_in, n_hidden, n_out):
        super(FFNN, self).__init__()
        
        # Architecture
        self.in_layer = PyroModule[nn.Linear](n_in, n_hidden)
        self.in_layer.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_in]).to_event(2))

        self.h_layer = PyroModule[nn.Linear](n_hidden, n_hidden)
        self.h_layer.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_hidden]).to_event(2))

        #self.h_layer12 = PyroModule[nn.Linear](n_hidden, n_hidden)
        #self.h_layer12.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_hidden]).to_event(2))

        self.out_layer = PyroModule[nn.Linear](n_hidden, n_out)
        self.out_layer.weight = PyroSample(dist.Normal(0., 2.).expand([n_out, n_hidden]).to_event(2))

        self.in_layer2 = PyroModule[nn.Linear](n_in, n_hidden)
        self.in_layer2.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_in]).to_event(2))

        self.h_layer2 = PyroModule[nn.Linear](n_hidden, n_hidden)
        self.h_layer2.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_hidden]).to_event(2))

        #self.h_layer22 = PyroModule[nn.Linear](n_hidden, n_hidden)
        #self.h_layer22.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_hidden]).to_event(2))

        self.out_layer2 = PyroModule[nn.Linear](n_hidden, n_out)
        self.out_layer2.weight = PyroSample(dist.Normal(0., 2.).expand([n_out, n_hidden]).to_event(2))

        # Activation functions
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, X1,X2, y=None):
        X1= self.tanh(self.in_layer(X1))
        X1 = self.tanh(self.h_layer(X1))
        #X1 = self.tanh(self.h_layer12(X1))
        X1 = self.sigmoid(self.out_layer(X1))
        prob1 = X1.squeeze(-1)

        X2= self.tanh(self.in_layer2(X2))
        X2 = self.tanh(self.h_layer2(X2))
        #X2 = self.tanh(self.h_layer22(X2))
        X2 = self.sigmoid(self.out_layer2(X2))
        prob2 = X2.squeeze(-1)

        with pyro.plate("observations"):
            y = pyro.sample("obs", dist.Bernoulli(prob1/(prob1+prob2)), obs=y)
            
        return prob1/(prob1+prob2)

In [None]:
model = FFNN(n_in=home_stats.shape[1], n_hidden=5, n_out=1)
guide = AutoLowRankMultivariateNormal(model)
pyro.clear_param_store()

In [None]:
# Define the number of optimization steps
n_steps = 10000

# Setup the optimizer
initial_lr = 0.01
gamma = 0.01
lrd = gamma ** (1 / n_steps)
optimizer = pyro.optim.ClippedAdam({'lr': initial_lr, 'lrd': lrd})


# Setup the inference algorithm
elbo = Trace_ELBO(num_particles=1)
svi = SVI(model, guide, optimizer, loss=elbo)

# Do gradient steps
for step in range(n_steps):
    elbo = svi.step(home_stats,away_stats, y)
    if step % 500 == 0:
        print("[%d] ELBO: %.1f" % (step, elbo))

[0] ELBO: 7839.9
[500] ELBO: 6547.5
[1000] ELBO: 6500.0
[1500] ELBO: 6487.8
[2000] ELBO: 6475.8
[2500] ELBO: 6452.3
[3000] ELBO: 6450.3
[3500] ELBO: 6477.8
[4000] ELBO: 6462.9
[4500] ELBO: 6454.8
[5000] ELBO: 6457.7
[5500] ELBO: 6450.6
[6000] ELBO: 6427.5
[6500] ELBO: 6434.5
[7000] ELBO: 6451.9
[7500] ELBO: 6486.6
[8000] ELBO: 6461.0
[8500] ELBO: 6460.7
[9000] ELBO: 6457.8
[9500] ELBO: 6433.2


In [None]:
torch.set_printoptions(threshold=10_000)

In [None]:


# Make predictions for test set
predictive = Predictive(model, guide=guide, num_samples=1000,
                        return_sites=("obs", "_RETURN"))
samples = predictive(home_stats_test,away_stats_test)

In [None]:
y_pred=samples["_RETURN"].mean(axis=0)
y_pred

In [None]:
y_pred=samples["_RETURN"].mean(axis=0)

y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] =0 

torch.mean((y_pred == y_test).float())

tensor(0.5978)

In [None]:
# Run inference in Pyro
#nuts_kernel = NUTS(model)
#mcmc = MCMC(nuts_kernel, num_samples=1000, warmup_steps=500, num_chains=1)
#mcmc.run(home_stats,away_stats, y)

# Show summary of inference results
#mcmc.summary()

In [None]:
#predictive = Predictive(model, posterior_samples=mcmc.get_samples(), num_samples=1000,
#                        return_sites=("obs", "_RETURN"))
#samples2 = predictive(home_stats_test,away_stats_test)

In [None]:
#y_pred=samples2["_RETURN"].mean(axis=0)

#y_pred[y_pred > 0.5] = 1
#y_pred[y_pred <= 0.5] =0 

#torch.mean((y_pred == y_test).float())

tensor(0.5996)

# Poisson

In [6]:
training_games.columns

Index(['GAME_DATE_EST', 'GAME_ID', 'GAME_STATUS_TEXT', 'HOME_TEAM_ID',
       'VISITOR_TEAM_ID', 'SEASON', 'TEAM_ID_home', 'PTS_home', 'FG_PCT_home',
       'FT_PCT_home', 'FG3_PCT_home', 'AST_home', 'REB_home', 'TEAM_ID_away',
       'PTS_away', 'FG_PCT_away', 'FT_PCT_away', 'FG3_PCT_away', 'AST_away',
       'REB_away', 'HOME_TEAM_WINS'],
      dtype='object')

In [16]:
training_games.iloc[:,19]

542      44.0
543      47.0
544      55.0
545      31.0
546      43.0
         ... 
25228    33.0
25229    60.0
25230    41.0
25231    55.0
25232    57.0
Name: REB_away, Length: 9460, dtype: float64

In [197]:
home_stats=[]
away_stats=[]
for i in range(len(training_games)):
         home_id,away_id=training_games["HOME_TEAM_ID"].iloc[i],training_games["VISITOR_TEAM_ID"].iloc[i]
         game_date=training_games["GAME_DATE_EST"].iloc[i]
         hh_matches=dfgames[(dfgames["HOME_TEAM_ID"] == home_id) & (dfgames["GAME_DATE_EST"]  < game_date)].head()
         ha_matches=dfgames[(dfgames["VISITOR_TEAM_ID"] == home_id) & (dfgames["GAME_DATE_EST"]  < game_date)].head()

         ah_matches=dfgames[(dfgames["HOME_TEAM_ID"] == away_id) & (dfgames["GAME_DATE_EST"]  < game_date)].head()
         aa_matches=dfgames[(dfgames["VISITOR_TEAM_ID"] == away_id) & (dfgames["GAME_DATE_EST"]  < game_date)].head()

         home_offense=0.5*np.mean((hh_matches.iloc[:, 7:12]).values+(ha_matches.iloc[:, 14:19]).values,axis=0) #7:12 14:19
         away_offense=0.5*np.mean((ah_matches.iloc[:, 7:12]).values+(aa_matches.iloc[:, 14:19]).values,axis=0)

         home_defense=0.5*np.mean((hh_matches.iloc[:, 12]).values+(ha_matches.iloc[:, 19]).values,axis=0)
         away_defense=0.5*np.mean((ah_matches.iloc[:, 12]).values+(aa_matches.iloc[:, 19]).values,axis=0)

         home_stats.append(np.append(home_offense,away_defense) ) 
         away_stats.append(np.append(away_offense,home_defense) ) 


In [198]:
home_stats=torch.tensor(home_stats).float()
away_stats=torch.tensor(away_stats).float()

In [199]:
y1=torch.tensor(training_games["PTS_home"].values).float()
y2=torch.tensor(training_games["PTS_away"].values).float()

In [73]:
def poisson_model(X1,X2, obs1=None,obs2=None):
    alpha1 = pyro.sample("alpha1", pyro.distributions.Normal(0, .1))
    alpha2 = pyro.sample("alpha2", pyro.distributions.Normal(0, .1))
    with pyro.plate("param", X1.shape[1]):
         beta1 = pyro.sample("beta1", pyro.distributions.Normal(0, .1))
         beta2 = pyro.sample("beta2", pyro.distributions.Normal(0, .1))
    with pyro.plate("data", len(X1)):
        pyro.sample("y1", pyro.distributions.Poisson(torch.exp(torch.matmul(X1,beta1)+alpha1)), obs=obs1)
        pyro.sample("y2", pyro.distributions.Poisson(torch.exp(torch.matmul(X2,beta2)+alpha2)), obs=obs2)

In [200]:
from pyro.nn import PyroModule, PyroSample
import torch.nn as nn
class poisson_NN(PyroModule):
    def __init__(self, n_in, n_hidden, n_out):
        super(poisson_NN, self).__init__()
        
        # Architecture
        self.in_layer = PyroModule[nn.Linear](n_in, n_hidden)
        self.in_layer.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_in]).to_event(2))

        self.h_layer = PyroModule[nn.Linear](n_hidden, n_hidden)
        self.h_layer.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_hidden]).to_event(2))

        #self.h_layer12 = PyroModule[nn.Linear](n_hidden, n_hidden)
        #self.h_layer12.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_hidden]).to_event(2))

        self.out_layer = PyroModule[nn.Linear](n_hidden, n_out)
        self.out_layer.weight = PyroSample(dist.Normal(0., 2.).expand([n_out, n_hidden]).to_event(2))

        self.in_layer2 = PyroModule[nn.Linear](n_in, n_hidden)
        self.in_layer2.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_in]).to_event(2))

        self.h_layer2 = PyroModule[nn.Linear](n_hidden, n_hidden)
        self.h_layer2.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_hidden]).to_event(2))

        #self.h_layer22 = PyroModule[nn.Linear](n_hidden, n_hidden)
        #self.h_layer22.weight = PyroSample(dist.Normal(0., 2.).expand([n_hidden, n_hidden]).to_event(2))

        self.out_layer2 = PyroModule[nn.Linear](n_hidden, n_out)
        self.out_layer2.weight = PyroSample(dist.Normal(0., 2.).expand([n_out, n_hidden]).to_event(2))

        # Activation functions
        self.tanh = nn.Tanh()
        #self.relu = nn.ReLU()
        
    def forward(self, X1,X2, obs1=None,obs2=None):
        X1= self.tanh(self.in_layer(X1))
        X1 = self.tanh(self.h_layer(X1))
        #X1 = self.tanh(self.h_layer12(X1))
        X1 = (self.out_layer(X1))
        prob1 = X1.squeeze(-1)

        X2= self.tanh(self.in_layer2(X2))
        X2 = self.tanh(self.h_layer2(X2))
        #X2 = self.tanh(self.h_layer22(X2))
        X2 = self.out_layer2(X2)
        prob2 = X2.squeeze(-1)

        with pyro.plate("observations"):
            pyro.sample("y1", pyro.distributions.Poisson(torch.exp(prob1)), obs=obs1)
            pyro.sample("y2", pyro.distributions.Poisson(torch.exp(prob2)), obs=obs2)

In [201]:
# Define guide function
model_NN=poisson_NN(n_in=home_stats.shape[1], n_hidden=5, n_out=1)
guide =AutoDiagonalNormal(model_NN)

# Reset parameter values
pyro.clear_param_store()

In [202]:
# Define the number of optimization steps
n_steps = 10000

# Setup the optimizer
initial_lr = 0.1
gamma = 0.01
lrd = gamma ** (1 / n_steps)
optimizer = pyro.optim.ClippedAdam({'lr': initial_lr, 'lrd': lrd})

# Setup the inference algorithm
elbo = Trace_ELBO(num_particles=1)
svi = SVI(model_NN, guide, optimizer, loss=elbo)

# Do gradient steps
for step in range(n_steps):
    elbo = svi.step(home_stats, away_stats, y1, y2)
    if step % 500 == 0:
        print("[%d] ELBO: %.1f" % (step, elbo))

[0] ELBO: 6790583.5
[500] ELBO: 94799.2
[1000] ELBO: 79315.9
[1500] ELBO: 128207.0
[2000] ELBO: 93910.5
[2500] ELBO: 79462.3
[3000] ELBO: 87284.4
[3500] ELBO: 81253.7
[4000] ELBO: 76827.0
[4500] ELBO: 76691.4
[5000] ELBO: 76284.8
[5500] ELBO: 76557.4
[6000] ELBO: 76188.6
[6500] ELBO: 76377.8
[7000] ELBO: 76170.5
[7500] ELBO: 76172.4
[8000] ELBO: 76179.3
[8500] ELBO: 76165.0
[9000] ELBO: 76160.2
[9500] ELBO: 76239.4


In [213]:

predictive = Predictive(model_NN, guide=guide, num_samples=5000,
                        return_sites=("y1", "y2"))
samples = predictive(home_stats, away_stats)

In [214]:
y_pred=samples['y1'].mean(axis=0) > samples['y2'].mean(axis=0)


np.mean(np.array(y_pred) == training_games["HOME_TEAM_WINS"].values)

0.5694503171247357

In [215]:
np.sum(np.array(y_pred))

9460

In [212]:
samples['y1']

tensor([[112., 102., 111.,  ..., 115., 102., 101.],
        [120., 106., 107.,  ..., 113., 122., 114.],
        [103., 102., 108.,  ...,  98.,  97., 102.],
        ...,
        [109., 106., 105.,  ...,  98., 108., 116.],
        [100.,  97., 105.,  ...,  97., 132., 104.],
        [107., 131., 110.,  ..., 126., 104.,  88.]])

In [209]:
samples['y2']

tensor([[108., 106., 110.,  ..., 109., 109., 113.],
        [106., 125., 114.,  ..., 134.,  95., 121.],
        [105.,  98., 109.,  ..., 108., 112.,  96.],
        ...,
        [ 86.,  90., 129.,  ..., 113., 112., 102.],
        [102., 110., 108.,  ..., 113., 115., 111.],
        [116.,  98.,  82.,  ..., 122., 110., 126.]])