In [9]:
import pandas as pd
import numpy as np
from statsmodels.tsa.ar_model import AutoReg

# ---------------------------------------------------------
# 1. Load the original GDP file
# ---------------------------------------------------------
input_path = r"../Data/GDP_forecasts.csv"        # <-- change this
output_path = r"../Data/GDP_forecasts.csv"   # <-- and this if you want

df = pd.read_csv(input_path)

# Parse Date column and sort chronologically
df['Date'] = pd.to_datetime(df['Date'], dayfirst=False)
df = df.sort_values('Date').reset_index(drop=True)

# ---------------------------------------------------------
# 2. Rolling AR(1) on non-missing block
# ---------------------------------------------------------
def rolling_ar_forecast_nonmissing(series, lags=1, min_obs=36):
    """
    Build a rolling AR(lags) forecast only on the part of the series
    where data is available (non-NaN).

    - For dates with real data:
        * first min_obs → keep realized values
        * later → AR(1) 1-step-ahead forecasts
    - For dates that were NaN originally → stay NaN
    """

    # Ensure numeric, turn weird stuff into NaN
    s = pd.to_numeric(series, errors='coerce')

    # Work only with the non-missing observations
    valid = s.dropna()

    n_valid = len(valid)
    if n_valid == 0:
        # nothing to do, just return original
        return series.copy()

    # Ensure we have at least something to estimate
    min_obs = max(min_obs, lags + 1)
    if n_valid <= min_obs + lags:
        # Too little data for rolling forecasts -> return cleaned series
        out = pd.Series(index=s.index, dtype=float)
        out.loc[valid.index] = valid
        return out

    # Output series: same index as original, all NaN initially
    forecast = pd.Series(index=s.index, dtype=float)

    valid_idx = valid.index          # positions (indices) where we have data
    valid_vals = valid.values        # numpy array of the actual values

    # Rolling AR using numpy arrays
    for t in range(min_obs, n_valid):
        train_vals = valid_vals[:t]

        # Fit AR model on train_vals (plain numpy array)
        model = AutoReg(train_vals, lags=lags, old_names=False).fit()

        # 1-step-ahead forecast (next value after train)
        pred = model.predict(start=len(train_vals), end=len(train_vals))
        # IMPORTANT FIX: take first element by position, not by label
        fcast = float(np.asarray(pred)[0])

        # Put forecast at the corresponding original index
        forecast.loc[valid_idx[t]] = fcast

    # For the first min_obs valid points, keep realized values
    forecast.loc[valid_idx[:min_obs]] = valid.iloc[:min_obs]

    # Dates with no realized GDP stay NaN
    return forecast

# ---------------------------------------------------------
# 3. Apply AR(1) forecasting to CH, JP, EU
# ---------------------------------------------------------
countries_to_forecast = ['CH', 'JP', 'EU']

for c in countries_to_forecast:
    print(f"Building AR(1) forecast series for {c}...")
    df[c] = rolling_ar_forecast_nonmissing(df[c], lags=1, min_obs=36)

# ---------------------------------------------------------
# 4. Save result
# ---------------------------------------------------------
df.to_csv(output_path, index=False)

print("Done.")
print("AR-based GDP forecasts for CH, JP, and EU saved to:")
print(output_path)
print(df.tail())


Building AR(1) forecast series for CH...
Building AR(1) forecast series for JP...
Building AR(1) forecast series for EU...
Done.
AR-based GDP forecasts for CH, JP, and EU saved to:
../Data/GDP_forecasts.csv
          Date     AU     GB        CH        JP        EU        US    EM
659 2025-06-01  0.021  0.025  0.023906  0.018488  0.015211  0.013454  0.05
660 2025-07-01  0.032  0.025  0.023908  0.018493  0.015211  0.014502  0.05
661 2025-08-01  0.032  0.025  0.013519  0.010418  0.013702  0.014502  0.05
662 2025-09-01  0.032  0.025  0.013517  0.010419  0.013702  0.014502  0.05
663 2025-10-01  0.038  0.025  0.013515  0.010420  0.013701  0.015632  0.05
