In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.ar_model import AutoReg
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from scipy.stats import norm
import statsmodels.api as sm

In [79]:
DEFAULT_FIGSIZE = (48,18)
plt.rc("figure", figsize=DEFAULT_FIGSIZE)
plt.rc("font", size=48)

def plot(s, y=DEFAULT_FIGSIZE):
    #figsize(y=y)
    fig, ax = plt.subplots(1, 1)
    if isinstance(s, pd.Series):
        s.plot(ax=ax, legend=False, color=["orange"])
    else:
        s.plot(ax=ax, legend=False)
        fig.legend(frameon=False)
    
    ax.set_xlabel(None)
    ax.set_xlim(s.index[0], s.index[-1]) #s.index[0]
    sns.despine()
    fig.tight_layout(pad=1.0)
    #figsize()

In [40]:
def diebold_mariano(loss_a, loss_b, nw_bandwidth,cv):
    delta = loss_a - loss_b
    mod = sm.OLS(delta, np.ones_like(delta))
    dm_res = mod.fit(cov_type="HAC", cov_kwds={"maxlags":int(nw_bandwidth)})
    av_diff = delta.mean()
    a  = dm_res.bse[0]
    dm_stat = float(av_diff / a)  

    #cv = norm.ppf(0.975)

    if dm_stat < (cv * -1):
        concl = 1
    else:
        concl = 0


    return concl

In [42]:
# Simulate some data
# run time ~ 1 minute

# True simulated data ## Sample size 100
rg = np.random.RandomState(100)

r = [0.90, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]
runs = 1000
true_100 = {}
size = 100
nw = size ** (1/3)
cv = norm.ppf(0.975)
theta_100 = []
dm_100 = []

for rho in r:
    print(rho)
    for run in range(runs):
        start = rg.normal(0,1/(1-(rho**2)))
        run_holder = []
        run_holder.append(start)

        for i in range(1,size):
            shock = rg.standard_normal()
            last_val = run_holder[-1]
            new_val = last_val * rho + shock
            run_holder.append(new_val)
            
        y = np.asarray(run_holder)

        # Fit using first half, tau//2
        mod = AutoReg(y[:size//2], lags=1, trend="c", old_names=False)
        res = mod.fit()

        # Full-sample model
        oos_mod = AutoReg(y, lags=1, trend="c", old_names=False)

        # One-step predictions
        oos_1step = oos_mod.predict(res.params)

        # OOS Random Walk predictions
        oos_rw = oos_mod.predict([0, 1])

        # Get second half of both
        # Use -tau//2: to get second half
        oos_1step = oos_1step[-size//2:]
        oos_rw = oos_rw[-size//2:]

        # Append the estimated thetas and the DM results into lists
        theta_100.append(res.params[1])

        dm_100.append(diebold_mariano( oos_rw, oos_1step, nw, cv))


0.9
0.91
0.92
0.93
0.94
0.95
0.96
0.97
0.98
0.99


In [66]:
np.quantile(theta_100,0.01)

0.6056777336885242

In [88]:
dm_100

array([1, 0, 0, ..., 0, 1, 0])

In [93]:
dm_100 = np.asarray(dm_100)
theta_100 = np.asarray(theta_100)
# x = np.asarray(pd.DataFrame([theta_100,(theta_100)**2]).T) ### can be used to get a non-linear relationship?
res = sm.OLS(dm_100, sm.add_constant(theta_100)).fit()
res.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.004
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,37.04
Date:,"Fri, 12 Mar 2021",Prob (F-statistic):,1.2e-09
Time:,06:40:40,Log-Likelihood:,-6817.7
No. Observations:,10000,AIC:,13640.0
Df Residuals:,9998,BIC:,13650.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0575,0.050,1.160,0.246,-0.040,0.155
x1,0.3393,0.056,6.086,0.000,0.230,0.449

0,1,2,3
Omnibus:,47565.263,Durbin-Watson:,1.995
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1695.598
Skew:,0.59,Prob(JB):,0.0
Kurtosis:,1.364,Cond. No.,20.8


In [84]:
x

array([[0.8435884 , 0.7116414 , 0.60033243, 0.50643348],
       [0.92474213, 0.85514801, 0.7907914 , 0.73127812],
       [0.82114738, 0.67428302, 0.55368573, 0.45465759],
       ...,
       [0.99886207, 0.99772544, 0.9965901 , 0.99545605],
       [0.99315808, 0.98636298, 0.97961436, 0.97291192],
       [0.82562923, 0.68166363, 0.56280142, 0.4646653 ]])

In [34]:
np.mean(y[-size//2:] - oos_1step)

0.1550788316275451

In [37]:
np.mean(y[-size//2:] - oos_rw)

0.03075100645963808