In [17]:
import math
import torch
import pyro
import pyro.distributions as dist
import pyro.poutine as poutine
from pyro.contrib.examples.bart import load_bart_od
from pyro.contrib.forecast import ForecastingModel, Forecaster, eval_crps
from pyro.infer.reparam import LinearHMMReparam, StableReparam, SymmetricStableReparam
from pyro.ops.tensor_utils import periodic_repeat
from pyro.ops.stats import quantile
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline
assert pyro.__version__.startswith('1.9.1')
pyro.set_rng_seed(20200305)


In [18]:
games_df = pd.read_csv('../processed_data/games.csv')

In [19]:
home_df = games_df.loc[(games_df['home_team'] == 'Arizona Cardinals')]

In [20]:
home_df

Unnamed: 0,year,week,home_team,away_team,winner,tie,day,date,time,pts_win,...,weekly_attendance,weekly_attendance_ratio,home_team_superbowl_winner_last_season,away_team_superbowl_winner_last_season,home_team_playoffs_last_season,away_team_playoffs_last_season,home_win,away_win,home_team_wins_last_3,away_team_wins_last_3
43,2010,3,Arizona Cardinals,Oakland Raiders,Arizona Cardinals,,Sun,September 26,4:15PM,24,...,62439.0,0.728678,False,False,True,False,1,0,1.0,1.0
71,2010,5,Arizona Cardinals,New Orleans Saints,Arizona Cardinals,,Sun,October 10,4:05PM,30,...,62621.0,0.730802,False,True,True,True,1,0,1.0,2.0
113,2010,8,Arizona Cardinals,Tampa Bay Buccaneers,Tampa Bay Buccaneers,,Sun,October 31,4:15PM,38,...,61857.0,0.721886,False,False,True,False,0,1,1.0,2.0
139,2010,10,Arizona Cardinals,Seattle Seahawks,Seattle Seahawks,,Sun,November 14,4:15PM,36,...,61904.0,0.722435,False,False,True,False,0,1,0.0,1.0
175,2010,12,Arizona Cardinals,San Francisco 49ers,San Francisco 49ers,,Mon,November 29,8:40PM,27,...,62308.0,0.727150,False,False,True,False,0,1,0.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2389,2019,6,Arizona Cardinals,Atlanta Falcons,Arizona Cardinals,,Sun,October 13,4:05PM,34,...,60140.0,0.701849,False,False,False,False,1,0,1.0,0.0
2436,2019,9,Arizona Cardinals,San Francisco 49ers,San Francisco 49ers,,Thu,October 31,8:20PM,28,...,60986.0,0.711722,False,False,False,False,0,1,2.0,3.0
2489,2019,13,Arizona Cardinals,Los Angeles Rams,Los Angeles Rams,,Sun,December 1,4:05PM,34,...,60944.0,0.711231,False,False,False,True,0,1,0.0,1.0
2506,2019,14,Arizona Cardinals,Pittsburgh Steelers,Pittsburgh Steelers,,Sun,December 8,4:25PM,23,...,63880.0,0.745495,False,False,False,False,0,1,0.0,2.0


In [23]:
yearly_attendance_average_df = home_df.groupby(["home_team","year"])["weekly_attendance_ratio"].mean().reset_index().rename(columns={"weekly_attendance_ratio": "yearly_attendance_ratio_average"})
yearly_attendance_average_df["lagged_yearly_attendance_ratio_average"] = yearly_attendance_average_df.groupby("home_team")["yearly_attendance_ratio_average"].shift(1).fillna(0) # This fillna we can have 100% efficiency

home_df = home_df.merge(
    yearly_attendance_average_df[["home_team", "year", "lagged_yearly_attendance_ratio_average"]],
    on=["home_team", "year"],
    how="left"
).rename(columns={"lagged_yearly_attendance_ratio_average": "last_year_avg_attendance_ratio"})

In [24]:
home_df

Unnamed: 0,year,week,home_team,away_team,winner,tie,day,date,time,pts_win,...,weekly_attendance_ratio,home_team_superbowl_winner_last_season,away_team_superbowl_winner_last_season,home_team_playoffs_last_season,away_team_playoffs_last_season,home_win,away_win,home_team_wins_last_3,away_team_wins_last_3,last_year_avg_attendance_ratio
0,2010,3,Arizona Cardinals,Oakland Raiders,Arizona Cardinals,,Sun,September 26,4:15PM,24,...,0.728678,False,False,True,False,1,0,1.0,1.0,0.000000
1,2010,5,Arizona Cardinals,New Orleans Saints,Arizona Cardinals,,Sun,October 10,4:05PM,30,...,0.730802,False,True,True,True,1,0,1.0,2.0,0.000000
2,2010,8,Arizona Cardinals,Tampa Bay Buccaneers,Tampa Bay Buccaneers,,Sun,October 31,4:15PM,38,...,0.721886,False,False,True,False,0,1,1.0,2.0,0.000000
3,2010,10,Arizona Cardinals,Seattle Seahawks,Seattle Seahawks,,Sun,November 14,4:15PM,36,...,0.722435,False,False,True,False,0,1,0.0,1.0,0.000000
4,2010,12,Arizona Cardinals,San Francisco 49ers,San Francisco 49ers,,Mon,November 29,8:40PM,27,...,0.727150,False,False,True,False,0,1,0.0,2.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,2019,6,Arizona Cardinals,Atlanta Falcons,Arizona Cardinals,,Sun,October 13,4:05PM,34,...,0.701849,False,False,False,False,1,0,1.0,0.0,0.723717
76,2019,9,Arizona Cardinals,San Francisco 49ers,San Francisco 49ers,,Thu,October 31,8:20PM,28,...,0.711722,False,False,False,False,0,1,2.0,3.0,0.723717
77,2019,13,Arizona Cardinals,Los Angeles Rams,Los Angeles Rams,,Sun,December 1,4:05PM,34,...,0.711231,False,False,False,True,0,1,0.0,1.0,0.723717
78,2019,14,Arizona Cardinals,Pittsburgh Steelers,Pittsburgh Steelers,,Sun,December 8,4:25PM,23,...,0.745495,False,False,False,False,0,1,0.0,2.0,0.723717


In [25]:
train_df = home_df.loc[(home_df['year'] > 2001) & (home_df['year'] < 2017)]

In [None]:
class Model1(ForecastingModel):
    def __init__(self, cov_dim, hidden_dim=3):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.cov_dim = cov_dim

        # Define a neural net to generate transition matrices from covariates
        self.trans_net = torch.nn.Sequential(
            torch.nn.Linear(cov_dim, 16),
            torch.nn.ReLU(),
            torch.nn.Linear(16, hidden_dim * hidden_dim)
        )

    def model(self, zero_data, covariates):
        duration = zero_data.size(-2)
        hidden_dim = self.hidden_dim
        last_year_avg_attendance = covariates[..., -1]
        other_covariates = covariates[..., :-1]

        # Mean function (could also depend on covariates)
        # prediction = torch.zeros(zero_data.shape).unsqueeze(-1)  # or a periodic_repeat()
        prediction = last_year_avg_attendance.unsqueeze(-1).expand_as(zero_data)

        # Priors
        init_dist = dist.Normal(0, 10).expand([hidden_dim]).to_event(1)
        trans_scale = pyro.sample("trans_scale", dist.LogNormal(-0.5 * math.log(24), 1))
        trans_dist = dist.Normal(0, trans_scale).expand([hidden_dim]).to_event(1)
        obs_scale = pyro.sample("obs_scale", dist.LogNormal(-2, 1))
        obs_dist = dist.Normal(0, obs_scale).expand([1]).to_event(1)

        # Use the NN to generate transition matrices per time step
        trans_logits = self.trans_net(other_covariates)  # [duration, hidden_dim * hidden_dim]
        trans_matrix_raw = trans_logits.view(duration, hidden_dim, hidden_dim)

        # Apply a nonlinearity to ensure stability (e.g. tanh scaled to keep values bounded)
        trans_matrix = 0.99 * torch.tanh(trans_matrix_raw)

        # Build GaussianHMM
        noise_dist = dist.GaussianHMM(
            initial_distribution=init_dist,
            transition_matrix=trans_matrix,
            transition_distribution=trans_dist,
            observation_matrix=torch.eye(hidden_dim, 1),  # obs_dim = 1
            observation_distribution=obs_dist,
            duration=duration
        )

        self.predict(noise_dist, prediction)


In [None]:
%%time
pyro.set_rng_seed(1)
pyro.clear_param_store()
covariates = train_df[['Sb_winners', 'Sb_winners_prev_years', 'Playoff_prev_years','Bad_streak', 'Medium_streak', 'last_year_avg_attendance']].astype(int)
covariates_tensor = torch.tensor(covariates.values, dtype=torch.float)

y = home_df.loc[train_df.index, 'weekly_attendance']
y_tensor = torch.tensor(y.values, dtype=torch.float).unsqueeze(-1)

forecaster = Forecaster(Model1(cov_dim=5, hidden_dim=3), y_tensor, covariates_tensor, learning_rate=0.1, num_steps=500)
for name, value in forecaster.guide.median().items():
    if value.numel() == 1:
        print("{} = {:0.4g}".format(name, value.item()))

torch.Size([128, 6])
128
128


TypeError: GaussianHMM.__init__() got an unexpected keyword argument 'initial_distribution'