In [1]:
import pandas as pd
import numpy as np
import os, sys
sys.path.insert(0, os.path.abspath('../..'))
import source.data_preparation as dp
import source.modelling as modl
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# Example


In [2]:
data = dp.prepare_data()

  warn("""Cannot parse header or footer so it will be ignored""")


In [3]:
# Pick the monthly variables you actually have in `data`:
monthly_vars = [
    "vp","impvar","vrp","lzrt","ogap","wtexas","sntm","ndrbL","skvw","tail",
    "fbm","dtoy","dtoat","ygap","rdsp","rsvix","tchi","avgcor","shtint",
    "disag","ntis","tby","lty","ltr","tms","dfy","dfr","infl"
]
# keep only those present in your DataFrame
monthly_vars = [v for v in monthly_vars if v in data.columns]

ranking = modl.rank_monthly_predictors(
    data,
    monthly_vars=monthly_vars,
    start_date="1927-01-01",
    start_oos="1965-01-01",
    lag=1,
    quiet=True,     # set False if you want per-variable metric prints
)


Monthly predictors ranked (worst → best) by OOS R²:
 1.      rsvix   R²_OOS = -0.0837
 2.        vrp   R²_OOS = -0.0766
 3.     impvar   R²_OOS = -0.0738
 4.      disag   R²_OOS = -0.0310
 5.     shtint   R²_OOS = -0.0298
 6.       ygap   R²_OOS = -0.0192
 7.       ntis   R²_OOS = -0.0153
 8.       dtoy   R²_OOS = -0.0149
 9.         vp   R²_OOS = -0.0145
10.        lty   R²_OOS = -0.0131
11.       skvw   R²_OOS = -0.0122
12.        fbm   R²_OOS = -0.0112
13.       sntm   R²_OOS = -0.0109
14.       rdsp   R²_OOS = -0.0107
15.        dfr   R²_OOS = -0.0107
16.       tail   R²_OOS = -0.0106
17.        ltr   R²_OOS = -0.0105
18.       tchi   R²_OOS = -0.0090
19.        tms   R²_OOS = -0.0088
20.      dtoat   R²_OOS = -0.0088
21.     avgcor   R²_OOS = -0.0087
22.        dfy   R²_OOS = -0.0086
23.       lzrt   R²_OOS = -0.0086
24.     wtexas   R²_OOS = -0.0083
25.       infl   R²_OOS = -0.0055
26.       ogap   R²_OOS = -0.0041


In [4]:
modl.chronos_oos( data,
    start_oos='1965-01-01',
    quiet=False,
    lag=1,
    start_date='1927-01-01')

Starting Chronos OOS evaluation...
Data from 1926-01-01 00:00:00 to 2024-12-01 00:00:00, 1188 months total.
Loading Chronos model...


`torch_dtype` is deprecated! Use `dtype` instead!
`torch_dtype` is deprecated! Use `dtype` instead!


Chronos model loaded.
[Chronos-Bolt] OOS months: 720  MSE=0.001928  RMSE=0.043905  R²_OS=-0.0115


In [5]:
r2 = modl.timesfm_oos(data, start_oos="1965-01-01", min_context=120, max_context=1024, ct_cutoff=True, quiet=False)
print("TimesFM OOS R²:", r2)


[TimesFM] Using device: mps
[TimesFM] Device=mps | Valid months=195 | MSE=0.002089 | RMSE=0.045702 | R²_OS=-0.0583
TimesFM OOS R²: -0.058341522364834475


In [6]:
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
import timesfm

# =====================================================
# 1️⃣  Device detection (Apple GPU / CUDA / CPU)
# =====================================================
torch.set_float32_matmul_precision("high")
if torch.backends.mps.is_available():
    device = "mps"     # Apple Silicon GPU
elif torch.cuda.is_available():
    device = "cuda"    # NVIDIA GPU
else:
    device = "cpu"
print(f"Using device: {device}")

# =====================================================
# 2️⃣  Data preparation (equity premium)
# =====================================================
df = data.copy()
df = df.sort_index()[['equity_premium']].dropna()
y = df['equity_premium'].astype("float32")

# =====================================================
# 3️⃣  Load TimesFM model
# =====================================================
model = timesfm.TimesFM_2p5_200M_torch.from_pretrained("google/timesfm-2.5-200m-pytorch")
cfg = timesfm.ForecastConfig(
    max_context=1024,         # use last 1024 months for context
    max_horizon=1,            # one-step-ahead forecasts
    normalize_inputs=True,
    use_continuous_quantile_head=True,
    force_flip_invariance=True,
    infer_is_positive=False,
    fix_quantile_crossing=True,
)
model.compile(cfg)

# =====================================================
# 4️⃣  Forecast setup (Goyal–Welch expanding window)
# =====================================================
start_oos = pd.Timestamp("1965-01-01")
test_idx = y.index[y.index >= start_oos]
preds, trues = [], []

MIN_CONTEXT = 120  # require at least 10 years of history

for date_t in test_idx:
    pos = y.index.get_loc(date_t)
    if pos < MIN_CONTEXT:
        continue

    # expanding window = all data up to t (but not including t)
    context = y.iloc[:pos].to_numpy(dtype="float32")

    # truncate to last max_context
    if len(context) > cfg.max_context:
        context = context[-cfg.max_context:]

    if np.isnan(context).any() or np.std(context) < 1e-6:
        # skip near-constant or invalid contexts
        continue

    # forecast 1 month ahead
    with torch.inference_mode():
        point_fcst, _ = model.forecast(horizon=1, inputs=[context])

    y_hat = float(point_fcst[0, 0])
    y_true = float(y.iloc[pos])

    if np.isnan(y_hat) or np.isnan(y_true):
        continue

    preds.append(y_hat)
    trues.append(y_true)

# =====================================================
# 5️⃣  Evaluation
# =====================================================
preds, trues = np.array(preds), np.array(trues)
valid_mask = ~np.isnan(preds) & ~np.isnan(trues)
preds, trues = preds[valid_mask], trues[valid_mask]

if len(preds) == 0:
    raise RuntimeError("No valid TimesFM predictions were generated. Check MIN_CONTEXT or model stability.")

mse = mean_squared_error(trues, preds)
rmse = np.sqrt(mse)
mean_forecast = np.array([trues[:i].mean() for i in range(1, len(trues)+1)])
r2_oos = 1 - np.sum((trues - preds)**2) / np.sum((trues - mean_forecast)**2)

print(f"[TimesFM] Device={device} | Valid months={len(preds)} | MSE={mse:.6f} | RMSE={rmse:.6f} | R²_OS={r2_oos:.4f}")


Using device: mps
[TimesFM] Device=mps | Valid months=195 | MSE=0.002093 | RMSE=0.045752 | R²_OS=-0.0606


In [7]:
# pip install -U uni2ts gluonts torch
import numpy as np, pandas as pd, torch
from sklearn.metrics import mean_squared_error
from gluonts.dataset.common import ListDataset
from uni2ts.model.moirai2 import Moirai2Forecast, Moirai2Module

# =====================================================
# 1️⃣  Data prep
# =====================================================
df = data.copy()
df["equity_premium"] = df["ret"] - df["Rfree"]
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date").sort_index()
df = df[["equity_premium", "d/p", "tms", "dfy"]].dropna()   # use any 2–3 predictors you like
freq = "M"

# Target & covariates
y  = df["equity_premium"]
dp = df["d/p"]
tms = df["tms"]
dfy = df["dfy"]

start_oos = pd.Timestamp("1965-01-01")
test_idx = y.index[y.index >= start_oos]

# =====================================================
# 2️⃣  Helper: build GluonTS ListDataset for current history
# =====================================================
def make_entry(y_series, end_ts, ctx, freq, covars):
    pos = y_series.index.get_loc(end_ts)
    if isinstance(pos, slice):
        pos = pos.start
    if pos == 0:
        return None
    y_hist = y_series.values[:pos].astype("float32")
    idx_hist = y_series.index[:pos]
    if len(y_hist) > ctx:
        y_hist, idx_hist = y_hist[-ctx:], idx_hist[-ctx:]
    entry = {"start": idx_hist[0], "target": y_hist}
    if covars:
        cov_mats = [c.values[:pos].astype("float32") for c in covars]
        cov_mats = [m[-ctx:] if len(m) > ctx else m for m in cov_mats]
        entry["past_feat_dynamic_real"] = cov_mats
    return ListDataset([entry], freq=freq)

# =====================================================
# 3️⃣  Forecast loop (expanding window, retrain each step)
# =====================================================
CTX = 240   # 20 years of history
preds, trues = [], []

for date_t in test_idx:
    pos = y.index.get_loc(date_t)
    if pos < 60:
        continue
    print(date_t)
    # prepare dataset up to t−1
    ds_one = make_entry(y, end_ts=date_t, ctx=CTX, freq=freq, covars=[dp, tms, dfy])
    if ds_one is None:
        continue

    # instantiate + retrain (small model for stability)
    # instantiate + retrain (small model for stability)
    model = Moirai2Forecast(
        module=Moirai2Module.from_pretrained("Salesforce/moirai-2.0-R-small"),
        prediction_length=1,
        context_length=CTX,
        target_dim=1,
        feat_dynamic_real_dim=0,         # ✅ must be provided even if unused
        past_feat_dynamic_real_dim=3,    # ✅ your 3 predictors (d/p, tms, dfy)
    )

    predictor = model.create_predictor(batch_size=2)
    try:
        predictor = predictor.to("cpu")  # safer for Mac/MPS
    except:
        pass

    # predict one step
    f = next(predictor.predict(ds_one))
    y_hat = float(f.quantile(0.5)[0])   # median forecast
    y_true = float(y.iloc[pos])
    if np.isnan(y_hat) or np.isnan(y_true):
        continue

    preds.append(y_hat)
    trues.append(y_true)

# =====================================================
# 4️⃣  Evaluate OOS R²
# =====================================================
preds, trues = np.array(preds), np.array(trues)
valid = ~np.isnan(preds) & ~np.isnan(trues)
preds, trues = preds[valid], trues[valid]

mse = mean_squared_error(trues, preds)
rmse = np.sqrt(mse)
mean_forecast = np.array([trues[:i].mean() for i in range(1, len(trues)+1)])
r2_oos = 1 - np.sum((trues - preds)**2) / np.sum((trues - mean_forecast)**2)

print(f"[Moirai 2 (retrained each step)] Months={len(preds)} | MSE={mse:.6f} | RMSE={rmse:.6f} | R²_OS={r2_oos:.4f}")


ModuleNotFoundError: No module named 'gluonts'

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# --- 1️⃣  Prepare Data ---
df = data.copy()
df['equity_premium'] = df['ret'] - df['Rfree']
df['dp_lag'] = df['d/p'].shift(1)
df = df.dropna(subset=['equity_premium', 'dp_lag'])
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date').sort_index()

# --- 2️⃣  Define OOS start (consistent across models) ---
start_oos = pd.Timestamp("1965-01-01")

# --- 3️⃣  Storage for predictions & actuals ---
predictions, actuals = [], []

# --- 4️⃣  Recursive expanding-window OOS forecast ---
for date_t in df.index:
    if date_t < start_oos:
        continue

    # training window: all data before t
    est = df.loc[:date_t].iloc[:-1]
    if len(est) < 30:
        continue

    X_train = est[['dp_lag']].values
    y_train = est['equity_premium'].values

    # fit OLS
    model = LinearRegression().fit(X_train, y_train)

    # one-step-ahead prediction
    X_pred = np.array([[df.loc[date_t, 'dp_lag']]])
    pred = model.predict(X_pred)[0]

    predictions.append(pred)
    actuals.append(df.loc[date_t, 'equity_premium'])

# --- 5️⃣  Convert to arrays ---
predictions = np.array(predictions)
actuals = np.array(actuals)

# --- 6️⃣  Compute evaluation metrics ---
mse = mean_squared_error(actuals, predictions)
rmse = np.sqrt(mse)
mean_forecast = np.array([actuals[:i].mean() for i in range(1, len(actuals)+1)])
r2_oos = 1 - np.sum((actuals - predictions)**2) / np.sum((actuals - mean_forecast)**2)

# --- 7️⃣  Print clean summary ---
print(f"[OLS Benchmark] Valid months={len(actuals)} | "
      f"MSE={mse:.6f} | RMSE={rmse:.6f} | R²_OS={r2_oos:.4f}")
