In [5]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

seed = 42  # For reproducibility
np.random.seed(seed)
units = [chr(65 + i) for i in range(10)]  # Units A–J
periods = pd.date_range(start="1900-03-01", end="2024-12-01", freq="3MS")
T = len(periods)

# Parameters
p, q = 0.98, 0.6
alpha = 0.04
beta = 1.5
sigma = 0.5

data = []

for unit in units:
    # Stage 2: Fix A's params
    if unit == "A":
        rho_i = 0.8
        gamma_i = 1.4
    else:
        rho_i = np.random.uniform(0.4, 0.9)
        gamma_i = np.random.uniform(0.5, 1.5)

    y = np.zeros(T)
    delta = np.zeros(T)
    x = np.zeros(T)

    signal = np.sin(np.linspace(0, 6 * np.pi, T))

    x = np.random.normal(0, 1, T)
    delta[:] = 0

    for t in range(1, T):
        X = x[t-1]
        y_lag = y[t-1]

        delta[t] = 0

        epsilon = np.random.normal(0, sigma)
        y[t] = rho_i * y[t-1] + beta * delta[t] + epsilon

        data.append({
            'unit': unit,
            'period': periods[t],
            'y': y[t],
            'y_lag': y_lag,
            'delta': delta[t],
            'x': x[t],
            'x_lag': x[t-1],
            'rho_i': rho_i,
            'gamma_i': gamma_i,
        })

df = pd.DataFrame(data)
df['period'] = pd.to_datetime(df['period'])
df

Unnamed: 0,unit,period,y,y_lag,delta,x,x_lag,rho_i,gamma_i
0,A,1900-06-01,0.463089,0.000000,0.0,-0.138264,0.496714,0.800000,1.400000
1,A,1900-09-01,1.325179,0.463089,0.0,0.647689,-0.138264,0.800000,1.400000
2,A,1900-12-01,0.360860,1.325179,0.0,1.523030,0.647689,0.800000,1.400000
3,A,1901-03-01,0.570172,0.360860,0.0,-0.234153,1.523030,0.800000,1.400000
4,A,1901-06-01,0.130817,0.570172,0.0,-0.234137,-0.234153,0.800000,1.400000
...,...,...,...,...,...,...,...,...,...
4985,J,2023-12-01,0.672279,0.027791,0.0,1.242827,-1.209435,0.781816,1.187984
4986,J,2024-03-01,-0.473574,0.672279,0.0,0.727168,1.242827,0.781816,1.187984
4987,J,2024-06-01,-0.722906,-0.473574,0.0,0.065892,0.727168,0.781816,1.187984
4988,J,2024-09-01,-0.317297,-0.722906,0.0,1.665142,0.065892,0.781816,1.187984


## AR(1)

In [6]:
from statsmodels.tsa.ar_model import AutoReg

unit_col = 'unit'
time_col = 'period'
target_col = 'y'
target_unit = 'A'
forecast_times = list(pd.date_range(start="2012-03-01", end="2018-12-01", freq="3MS"))

ar1_errors = []

for t in forecast_times:

    # --- AR(1) ---
    df_train = df[(df[unit_col] == target_unit) & (df[time_col] < t)]
    y_ar = df_train[target_col].values
    ar_model = AutoReg(y_ar, lags=1, trend='n', old_names=False).fit()

    # Extract rho (coefficient on lagged y)
    rho_est = ar_model.params[0]  # [0] is lag 1 coefficient
    print(f"{t.date()}: Estimated ρ = {rho_est:.3f}")

    y_pred = ar_model.predict(start=len(y_ar), end=len(y_ar))[0]
    y_true = df[(df[unit_col] == target_unit) & (df[time_col] == t)][target_col].values[0]
    ar1_errors.append((y_pred - y_true) ** 2)

2012-03-01: Estimated ρ = 0.802
2012-06-01: Estimated ρ = 0.803
2012-09-01: Estimated ρ = 0.803
2012-12-01: Estimated ρ = 0.803
2013-03-01: Estimated ρ = 0.803
2013-06-01: Estimated ρ = 0.802
2013-09-01: Estimated ρ = 0.803
2013-12-01: Estimated ρ = 0.801
2014-03-01: Estimated ρ = 0.801
2014-06-01: Estimated ρ = 0.801
2014-09-01: Estimated ρ = 0.801
2014-12-01: Estimated ρ = 0.802
2015-03-01: Estimated ρ = 0.803
2015-06-01: Estimated ρ = 0.803
2015-09-01: Estimated ρ = 0.804
2015-12-01: Estimated ρ = 0.806
2016-03-01: Estimated ρ = 0.807
2016-06-01: Estimated ρ = 0.808
2016-09-01: Estimated ρ = 0.808
2016-12-01: Estimated ρ = 0.811
2017-03-01: Estimated ρ = 0.809
2017-06-01: Estimated ρ = 0.811
2017-09-01: Estimated ρ = 0.811
2017-12-01: Estimated ρ = 0.808
2018-03-01: Estimated ρ = 0.808
2018-06-01: Estimated ρ = 0.808
2018-09-01: Estimated ρ = 0.808
2018-12-01: Estimated ρ = 0.807
