In [17]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import arviz as az
import numpy as np
import pymc as pm


def sampler_kwargs():
    return dict(
        nuts_sampler="nutpie",
        chains=2,
        draws=300,
        nuts_sampler_kwargs={"backend": "jax", "gradient_backend": "jax"},
    )

project_dir = Path.cwd().parent
train_df = pd.read_csv(project_dir / "data" / "model" / "training_data.csv",
    parse_dates=True,
    date_format="%Y %b",
    index_col=0,
    dtype={"value": float},
)

M = 30
D = 20

train_df['y'] = train_df.sum(axis=1)
train_df.index = pd.to_datetime(train_df.index)
train_df.index.freq = train_df.index.inferred_freq


In [25]:
N = len(train_df) - D + 1
reporting_matrices = np.empty((N, D, D))

def mask_matrix(matrix):
        masked_matrix = matrix.copy()
        for i in range(D):
            for j in range(D):
                if i + j > D - 1:
                     masked_matrix[i, j] = 0
        return  masked_matrix

for i in range(N):
    t = i + D
    delay_matrix = train_df.copy().drop('y', axis=1)
    delay_matrix = np.array(delay_matrix.iloc[t - D : t].values)
    masked_delay_matrix = mask_matrix(delay_matrix)

    reporting_matrices[i] = masked_delay_matrix

y_all = train_df['y'][D-1:]
print(y_all.shape)
print(reporting_matrices.shape)

(1998,)
(1998, 20, 20)


In [6]:
%config InlineBackend.figure_format = 'retina'

RANDOM_SEED = 8998
rng = np.random.default_rng(RANDOM_SEED)

az.style.use("arviz-darkgrid")

In [None]:
# GP input standardised so same across all batches
x = np.array(list(range(0,M))).reshape(-1,1)
x_min = np.min(x)
x_max = np.max(x)
x_scaled = (x - x_min) / (x_max - x_min)

x_new = np.array(list(range(M+1,M+D+1))).reshape(-1,1)
x_new_scaled = (x_new - x_min) / (x_max - x_min)


Minibatch.0

In [33]:
def delay_adjustment_net(x):
    # Assume x is shape (M, D, D)
    x_flat = x.reshape((x.shape[0], -1)) # (M, D*D)
    h1 = pm.math.tanh(pm.math.dot(x_flat, W1) + b1)
    out = pm.math.dot(h1, W2) + b2  # shape (M,)

    return out
    
# Pull in reporting matrix and correct num of cases label for training


z_mb = pm.Minibatch(reporting_matrices, batch_size=M)
y_mb = pm.Minibatch(y_all, batch_size=M)

with pm.Model() as model:

    # NN priors
    # Layer 1
    W1 = pm.Normal("W1", mu=0, sigma=1, shape=(M, M))
    b1 = pm.Normal("b1", mu=0, sigma=1, shape=(M,))
    

    # Output layer
    W2 = pm.Normal("W2", mu=0, sigma=1, shape=(M,))
    b2 = pm.Normal("b2", mu=0, sigma=1)
    

    # Linear trend parameters
    intercept = pm.Normal("intercept", mu=0, sigma=10)
    slope = pm.Normal("slope", mu=0, sigma=5)

    ell_trend = pm.Gamma("ell_trend", alpha=5, beta=1)   # length scale
    eta_trend = pm.HalfNormal("eta_trend", sigma=5)      # amplitude
    cov_trend = eta_trend**2 * pm.gp.cov.ExpQuad(1, ls=ell_trend)

    mean_func = pm.gp.mean.Linear(coeffs=slope, intercept=intercept)

    # Periodic kernel parameters
    period = pm.Gamma("period", alpha=2, beta=4)
    ell_per = pm.Gamma("ell_per", alpha=5, beta=5)   # length scale
    eta_per = pm.HalfNormal("eta_per", sigma=5)      # amplitude
    cov_per = eta_per**2 * pm.gp.cov.Periodic(1, period=period, ls=ell_per)

    sigma_noise = pm.HalfNormal("sigma_noise", sigma=2)
    cov_noise = pm.gp.cov.WhiteNoise(sigma_noise**2)

    cov = cov_trend + cov_per + cov_noise

    gp = pm.gp.Latent(mean_func=mean_func, cov_func=cov)

    f = gp.prior("f", X=x_scaled) # shape (M,)
    mu = delay_adjustment_net(z_mb) # shape (M,)

    log_lam = f + mu
    lam = pm.Deterministic("lam", pm.math.exp(log_lam))

    y_ = pm.Poisson("y", mu=lam, observed=y_mb)

    idata = pm.sample(**sampler_kwargs())
 
    



TypeError: Variables that depend on other nodes cannot be used for observed data.The data variable was: Cast{int32}.0