In [1]:
import pandas as pd
import pymc as pm


In [2]:
cols = ['date','ticker','close','rsi','bb_low', 'bb_mid', 'bb_high', 'sharpe_ratio']
df = pd.read_csv("data/price_data.csv", usecols=cols)
df.head()

Unnamed: 0,date,ticker,close,rsi,bb_low,bb_mid,bb_high,sharpe_ratio
0,2017-01-03,A,43.559391,55.906011,3.757332,3.781197,3.805061,0.548844
1,2017-01-03,AAPL,26.82724,58.487161,3.274114,3.315296,3.356479,0.836831
2,2017-01-03,ABBV,43.078159,53.851836,3.75631,3.777267,3.798224,0.661857
3,2017-01-03,ABT,33.30719,50.624336,3.504776,3.52377,3.542764,0.568612
4,2017-01-03,ACGL,27.224224,59.555791,3.323679,3.345575,3.367472,0.645114


In [3]:
smoke_test = True
if smoke_test:
    df = df[df["ticker"] == "AAPL"]

df.head()

Unnamed: 0,date,ticker,close,rsi,bb_low,bb_mid,bb_high,sharpe_ratio
1,2017-01-03,AAPL,26.82724,58.487161,3.274114,3.315296,3.356479,0.836831
472,2017-01-04,AAPL,26.797222,57.928821,3.28376,3.318253,3.352746,0.836831
943,2017-01-05,AAPL,26.933493,59.764295,3.293188,3.321085,3.348982,0.836831
1414,2017-01-06,AAPL,27.233753,63.461604,3.301094,3.323981,3.346868,0.836831
1885,2017-01-09,AAPL,27.483202,66.179389,3.306484,3.326847,3.347211,0.836831


In [None]:
## Set up a dictionary for the specification of our priors
## We set up the dictionary to specify size of the AR coefficients in
## case we want to vary the AR lags.
priors = {
    "coefs": {"mu": [10, 0.2], "sigma": [0.1, 0.1], "size": 2},
    "sigma": 8,
    "init": {"mu": 9, "sigma": 0.1, "size": 1},
}

## Initialise the model
with pm.Model() as AR:
    pass

## Define the time interval for fitting the data
t_data = list(range(len(df["close"])))
## Add the time interval as a mutable coordinate to the model to allow for future predictions
AR.add_coord("obs_id", t_data, mutable=True)

with AR:
    ## Data containers to enable prediction
    t = pm.MutableData("t", t_data, dims="obs_id")
    y = pm.MutableData("y", df["close"], dims="obs_id")

    # The first coefficient will be the constant term but we need to set priors for each coefficient in the AR process
    coefs = pm.Normal("coefs", priors["coefs"]["mu"], priors["coefs"]["sigma"])
    sigma = pm.HalfNormal("sigma", priors["sigma"])
    # We need one init variable for each lag, hence size is variable too
    init = pm.Normal.dist(
        priors["init"]["mu"], priors["init"]["sigma"], size=priors["init"]["size"]
    )
    # Steps of the AR model minus the lags required
    ar1 = pm.AR(
        "ar",
        coefs,
        sigma=sigma,
        init_dist=init,
        constant=True,
        steps=t.shape[0] - (priors["coefs"]["size"] - 1),
        dims="obs_id",
    )

    # The Likelihood
    outcome = pm.Normal("likelihood", mu=ar1, sigma=sigma, observed=y, dims="obs_id")
    ## Sampling
    idata_ar = pm.sample_prior_predictive()
    idata_ar.extend(pm.sample(2000, random_seed=100, target_accept=0.95))
    idata_ar.extend(pm.sample_posterior_predictive(idata_ar))

Sampling: [ar, coefs, likelihood, sigma]
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [coefs, sigma, ar]
