# Conformalized Quantile Regression with Rolling Windows
This notebook implements conformal prediction around the existing RandomForestQuantileRegressor (QRF) model. We evaluate rolling-window calibration and sharpness of 72h return forecasts, conditioning on market regime.

In [None]:
import pandas as pd
import numpy as np
from quantile_forest import RandomForestQuantileRegressor
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import acf


## Load model dataset
The dataframe `df_model` contains 12‑hour features, a `regime` flag and our target `return_72h`.

In [None]:
df_model = pd.read_parquet('data/06data.parquet')
feature_cols = [
    'token_volume_usd', 'holder_count', 'sol_volume_usd', 'realized_vol_12h',
    'network_tx_count', 'tvl_usd', 'tvl_change_12h', 'sol_return',
    'extreme_move', 'extreme_freq', 'rolling_skew_50', 'tail_asymmetry'
]
model_df = df_model.dropna(subset=feature_cols + ['return_72h', 'regime'])
model_df = model_df.sort_values('timestamp').reset_index(drop=True)
X = model_df[feature_cols]
y = model_df['return_72h']
regime = model_df['regime']


## Rolling conformal interval estimation
For each window we fit QRF on the training slice, compute conformity scores on the calibration slice and adjust the test quantiles.

In [None]:
train_size = 120
cal_size = 30
test_size = 30
alphas = np.linspace(0.5, 0.95, 10)

records = []
preds = []

for start in range(0, len(model_df) - (train_size+cal_size+test_size) + 1, test_size):
    idx_train = slice(start, start+train_size)
    idx_cal = slice(start+train_size, start+train_size+cal_size)
    idx_test = slice(start+train_size+cal_size, start+train_size+cal_size+test_size)

    X_train, y_train = X.iloc[idx_train], y.iloc[idx_train]
    X_cal, y_cal = X.iloc[idx_cal], y.iloc[idx_cal]
    X_test, y_test = X.iloc[idx_test], y.iloc[idx_test]
    regime_test = regime.iloc[idx_test]

    qrf = RandomForestQuantileRegressor(n_estimators=100, random_state=0)
    qrf.fit(X_train, y_train)

    median_pred = qrf.predict(X_test, quantiles=[0.5])
    resid = y_test.reset_index(drop=True) - median_pred

    for alpha in alphas:
        lower_q = (1 - alpha) / 2
        upper_q = 1 - lower_q

        lower_cal = qrf.predict(X_cal, quantiles=[lower_q])
        upper_cal = qrf.predict(X_cal, quantiles=[upper_q])
        scores = np.maximum(lower_cal - y_cal.values, y_cal.values - upper_cal)
        q = np.ceil((1 - alpha) * (len(scores) + 1)) / len(scores)
        delta = np.quantile(scores, q, method='higher')

        naive_lower = qrf.predict(X_test, quantiles=[lower_q])
        naive_upper = qrf.predict(X_test, quantiles=[upper_q])
        conf_lower = naive_lower - delta
        conf_upper = naive_upper + delta

        coverage_naive = ((y_test >= naive_lower) & (y_test <= naive_upper)).mean()
        coverage_conf = ((y_test >= conf_lower) & (y_test <= conf_upper)).mean()
        width_naive = (naive_upper - naive_lower).mean()
        width_conf = (conf_upper - conf_lower).mean()

        records.append({
            'start': start,
            'alpha': alpha,
            'delta': delta,
            'coverage': coverage_conf,
            'naive_coverage': coverage_naive,
            'width': width_conf,
            'naive_width': width_naive,
        })

        preds.append(pd.DataFrame({
            'timestamp': model_df.loc[idx_test, 'timestamp'].values,
            'y': y_test.values,
            'lower': conf_lower,
            'upper': conf_upper,
            'naive_lower': naive_lower,
            'naive_upper': naive_upper,
            'residual': resid,
            'regime': regime_test.values,
            'alpha': alpha
        }))

result_df = pd.DataFrame(records)
preds_df = pd.concat(preds).reset_index(drop=True)


## Aggregate calibration metrics

In [None]:
calibrated = preds_df.groupby('alpha')['lower'].count().to_frame('n')
calibrated['empirical'] = preds_df.groupby('alpha').apply(lambda d: ((d['y']>=d['lower'])&(d['y']<=d['upper'])).mean())
calibrated['naive'] = preds_df.groupby('alpha').apply(lambda d: ((d['y']>=d['naive_lower'])&(d['y']<=d['naive_upper'])).mean())
calibrated = calibrated.reset_index()

regime_stats = preds_df.groupby(['regime','alpha']).apply(lambda d: pd.Series({
    'coverage': ((d['y']>=d['lower'])&(d['y']<=d['upper'])).mean(),
    'width': (d['upper']-d['lower']).mean()
}))
regime_stats = regime_stats.reset_index()

delta_ts = result_df[result_df['alpha']==0.8][['start','delta']]
residuals_all = preds_df.query('alpha==0.8')['residual']
acf_vals = acf(residuals_all, nlags=20)


## Plots

In [None]:
# Calibration curves
plt.figure(figsize=(6,5))
plt.plot(calibrated['alpha'], calibrated['empirical'], marker='o', label='Conformal')
plt.plot(calibrated['alpha'], calibrated['naive'], marker='o', label='Naive')
plt.plot([0.5,0.95],[0.5,0.95],'--',color='gray')
plt.xlabel('Nominal coverage')
plt.ylabel('Empirical coverage')
plt.legend()
plt.grid(True)
plt.show()

# Delta over time
plt.figure(figsize=(6,3))
plt.plot(delta_ts['start'], delta_ts['delta'])
plt.title('Rolling conformal delta (alpha=0.8)')
plt.xlabel('Window start index')
plt.ylabel('Delta')
plt.grid(True)
plt.show()

# Residual ACF
plt.figure(figsize=(6,3))
plt.stem(acf_vals, use_line_collection=True)
plt.title('Residual ACF (alpha=0.8)')
plt.xlabel('Lag')
plt.ylabel('ACF')
plt.show()


### Coverage vs feature quintiles

In [None]:
feature_bins = {}
for feat in ['realized_vol_12h','network_tx_count','sol_volume_usd']:
    bins = pd.qcut(model_df[feat], 5, duplicates='drop')
    preds_df[f'{feat}_bin'] = bins.loc[preds_df.index]
    cover = preds_df.groupby(f'{feat}_bin').apply(lambda d: ((d['y']>=d['lower'])&(d['y']<=d['upper'])).mean())
    feature_bins[feat] = cover

for feat, cover in feature_bins.items():
    plt.figure(figsize=(6,4))
    cover.plot(kind='bar')
    plt.title(f'Coverage by {feat} quintile (alpha=0.8)')
    plt.ylabel('Coverage')
    plt.ylim(0,1)
    plt.grid(True)
    plt.show()


## Summary tables

In [None]:
summary_delta = result_df.groupby('alpha')['delta'].mean()
sharpness = result_df.groupby(['alpha'])[['width','naive_width']].mean()
print('Delta by alpha:')
print(summary_delta)
print('
Sharpness comparison:')
print(sharpness)
print('
Coverage by regime (alpha=0.8):')
print(regime_stats[regime_stats.alpha==0.8])
