In [1]:
# %% Imports & Config
import os
import logging
# Suppress TensorFlow and absl logs
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import absl.logging as _absl
_absl.set_verbosity('error')
import numpy as np
import pandas as pd
import tensorflow as tf
# Set TensorFlow logger level
tf.get_logger().setLevel(logging.ERROR)
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.auto import tqdm
from pathlib import Path
import statsmodels.api as sm
import train_ensemble
import gc

E0000 00:00:1746743334.522950  687968 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746743334.529407  687968 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746743334.681445  687968 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746743334.681473  687968 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746743334.681475  687968 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746743334.681477  687968 computation_placer.cc:177] computation placer already registered. Please check linka

In [2]:
# Allow TF to pick GPU if available, otherwise CPU
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

CONFIG = train_ensemble.CONFIG
make_dataset = train_ensemble.make_dataset

# %% Phase 0: Prepare output directory
pred_dir = Path("predictions")
pred_dir.mkdir(exist_ok=True)

# %% Phase 1: Preload feature pipelines & build truth DataFrames
variants = ['raw', 'pct', 'z', 'invn']
ds_feat = {}
truth_dfs = {}

for var in variants:
    # update CONFIG for this variant
    CONFIG['variant'] = var
    CONFIG['label'] = 'ret_exc_lead1m'
    CONFIG['weight'] = 'w_vw'
    CONFIG['tfrecord_dir'] = os.path.join(CONFIG['data_dir'], 'tfrecords', var)

    # dataset for predictions (features only)
    ds = make_dataset(CONFIG['predict_years'], mode='predict')
    ds_feat[var] = ds.map(lambda feats, *_: feats).prefetch(tf.data.AUTOTUNE)

    # extract arrays and meta
    records = []
    for feats, y, w, meta in ds:
        y_arr = y.numpy().ravel()
        w_arr = w.numpy().ravel()
        # meta is a dict of arrays; convert each to ravel
        flat_meta = {k: v.numpy().ravel() for k, v in meta.items()}
        # build DataFrame for this batch
        batch_df = pd.DataFrame(flat_meta)
        batch_df[CONFIG['label']] = y_arr
        batch_df['w'] = w_arr
        records.append(batch_df)
    # concatenate all batches into a single truth DataFrame
    truth_dfs[var] = pd.concat(records, ignore_index=True)

I0000 00:00:1746743338.058769  687968 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 44326 MB memory:  -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:25:00.0, compute capability: 7.5
I0000 00:00:1746743338.062571  687968 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 44326 MB memory:  -> device: 1, name: Quadro RTX 8000, pci bus id: 0000:81:00.0, compute capability: 7.5
I0000 00:00:1746743338.063248  687968 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 44326 MB memory:  -> device: 2, name: Quadro RTX 8000, pci bus id: 0000:e2:00.0, compute capability: 7.5


In [3]:
# %% Phase 2: Build combo list with GPU assignment
gpus = tf.config.list_physical_devices('GPU')
num_gpus = len(gpus)
combos = []
base = Path(CONFIG['model_dir'])
for idx, var in enumerate(variants):
    for lbl in ['ret_exc_lead1m', 'ret_pct', 'ret_z', 'ret_invn']:
        for wt in ['w_ew', 'w_vw']:
            for loss in ['mse', 'mae']:
                combo = f"{var}_{lbl}_{wt}_{loss}"
                path = base / combo
                if path.is_dir():
                    gpu_id = idx % num_gpus if num_gpus > 0 else None
                    combos.append((combo, var, lbl, wt, loss, path, gpu_id))

In [5]:
# %% Phase 3: Define worker_predict with correct signature
def worker_predict(combo, var, lbl, wt, loss, path, gpu_id):
    out_path = pred_dir / f"{combo}.parquet"
    if os.path.exists(out_path):
        print(f"[SKIP] dataset for {combo} already exists")
        return combo

    device = f'/GPU:{gpu_id}' if gpu_id is not None else '/CPU:0'
    with tf.device(device):
        files = sorted(path.glob('run_*.keras'))
        y_sum = None
        for mf in files:
            m = tf.keras.models.load_model(str(mf))
            pred = m.predict(ds_feat[var], verbose=0).ravel()
            y_sum = pred if y_sum is None else y_sum + pred
            del m
        y_hat = y_sum / len(files)

    panel = truth_dfs[var].copy()
    panel['signal'] = y_hat

    panel.to_parquet(out_path, index=False)
    tqdm.write(f"predictions for {combo} saved.")
    del panel, y_sum, y_hat
    gc.collect()
    return combo


# %% Phase 4: Run parallel predictions with tqdm using as_completed
n_workers = 64
with ThreadPoolExecutor(max_workers=n_workers) as exe:
    futures = {exe.submit(worker_predict, *c): c[0] for c in combos}
    for fut in tqdm(as_completed(futures),
                    total=len(futures),
                    desc="Predicting",
                    unit="combo"):
        fut.result()

[SKIP] dataset for raw_ret_exc_lead1m_w_ew_mae already exists
[SKIP] dataset for raw_ret_pct_w_ew_mse already exists
[SKIP] dataset for raw_ret_exc_lead1m_w_vw_mae already exists
[SKIP] dataset for raw_ret_pct_w_vw_mse already exists
[SKIP] dataset for raw_ret_pct_w_ew_mae already exists
[SKIP] dataset for raw_ret_z_w_ew_mae already exists
[SKIP] dataset for raw_ret_z_w_vw_mae already exists
[SKIP] dataset for raw_ret_z_w_vw_mse already exists
[SKIP] dataset for raw_ret_invn_w_ew_mae already exists
[SKIP] dataset for raw_ret_invn_w_ew_mse already exists
[SKIP] dataset for raw_ret_invn_w_vw_mae already exists
[SKIP] dataset for raw_ret_invn_w_vw_mse already exists
[SKIP] dataset for pct_ret_exc_lead1m_w_vw_mse already exists
[SKIP] dataset for pct_ret_exc_lead1m_w_vw_mae already exists
[SKIP] dataset for pct_ret_pct_w_ew_mse already exists
[SKIP] dataset for pct_ret_exc_lead1m_w_ew_mae already exists
[SKIP] dataset for pct_ret_pct_w_ew_mae already exists
[SKIP] dataset for pct_ret_pct_w

Predicting:   0%|          | 0/64 [03:22<?, ?combo/s]

predictions for raw_ret_exc_lead1m_w_vw_mse saved.


Predicting:  83%|████████▎ | 53/64 [03:29<00:42,  3.87s/combo]

predictions for z_ret_z_w_vw_mae saved.


Predicting:  84%|████████▍ | 54/64 [03:31<00:39,  3.91s/combo]

predictions for pct_ret_pct_w_vw_mae saved.


Predicting:  86%|████████▌ | 55/64 [03:31<00:34,  3.81s/combo]

predictions for pct_ret_invn_w_ew_mse saved.


Predicting:  89%|████████▉ | 57/64 [03:32<00:23,  3.42s/combo]

predictions for raw_ret_exc_lead1m_w_ew_mse saved.


Predicting:  89%|████████▉ | 57/64 [03:32<00:23,  3.42s/combo]

predictions for raw_ret_z_w_ew_mse saved.


Predicting:  91%|█████████ | 58/64 [03:33<00:19,  3.20s/combo]

predictions for z_ret_exc_lead1m_w_ew_mse saved.


Predicting:  92%|█████████▏| 59/64 [03:34<00:14,  2.91s/combo]

predictions for raw_ret_pct_w_vw_mae saved.


Predicting:  94%|█████████▍| 60/64 [03:35<00:10,  2.70s/combo]

predictions for pct_ret_exc_lead1m_w_ew_mse saved.


Predicting:  95%|█████████▌| 61/64 [03:35<00:07,  2.40s/combo]

predictions for invn_ret_invn_w_ew_mae saved.


Predicting:  95%|█████████▌| 61/64 [03:39<00:07,  2.40s/combo]

predictions for z_ret_z_w_ew_mse saved.


Predicting:  98%|█████████▊| 63/64 [03:42<00:02,  2.20s/combo]

predictions for z_ret_invn_w_vw_mse saved.


Predicting: 100%|██████████| 64/64 [03:43<00:00,  3.49s/combo]


In [16]:
# %% Phase 5: Define analysis helpers
def construct_quantile_portfolios(df,
                                  signal_col='signal',
                                  date_col='eom',
                                  weight_col='w',
                                  return_col='y',
                                  n=10):
    df0 = df.dropna(
        subset=[date_col, weight_col, return_col, signal_col]).copy()
    df0['rank_pct'] = df0.groupby(date_col)[signal_col].rank(method='first',
                                                             pct=True)
    df0['quantile'] = np.ceil(df0['rank_pct'] * n).astype(int).clip(1, n)
    df0['wR'] = df0[weight_col] * df0[return_col]
    grp = df0.groupby([date_col, 'quantile'])
    port = (grp['wR'].sum() / grp[weight_col].sum()).unstack(fill_value=np.nan)
    port.columns = [f"q{q}" for q in port.columns]
    port['long_short'] = port[f"q{n}"] - port["q1"]
    return port.sort_index()


def evaluate_portfolios(returns, freq=12):
    rets = returns.dropna(how='all')
    mean_ret = rets.mean() * freq
    vol = rets.std(ddof=1) * np.sqrt(freq)
    sharpe = mean_ret / vol
    wealth = (1 + rets).cumprod()
    running_max = wealth.cummax()
    drawdown = 1 - wealth / running_max
    perf = pd.DataFrame({
        'mean_return': mean_ret,
        'vol': vol,
        'sharpe': sharpe,
        'max_drawdown': drawdown.max()
    })
    # transpose so index = portfolios, columns = metrics
    return perf


# %% Phase 6: Parallel analysis and Fama-MacBeth, using existing combos metadata
y = 'y'
def analyze_combo(args):
    combo, var, lbl, wt, loss, path, gpu_id = args
    file_path = pred_dir / f"{combo}.parquet"
    # skip if file does not exist
    if not file_path.exists():
        tqdm.write(f"[!] File not found, skipping combo: {combo}")
        return None
    df = pd.read_parquet(file_path)
    port = construct_quantile_portfolios(df)
    perf = evaluate_portfolios(port)
    result = {
        'combo': combo,
        'variant': var,
        'label': lbl,
        'weight': wt,
        'loss': loss
    }
    # add sharpe metrics for each portfolio
    for portfolio in perf.index:
        result[f"sharpe_{portfolio}"] = perf.at[portfolio, 'sharpe']

    # Fama-MacBeth regression
    X = sm.add_constant(df['signal'])
    regr = sm.OLS(df[y], X).fit()
    result['fm_beta'] = regr.params.get('signal', np.nan)
    result['fm_tstat'] = regr.tvalues.get('signal', np.nan)
    ics = df.groupby('eom').apply(
        lambda d: d['signal'].corr(d[y], method='spearman'))
    result['ic_mean'] = ics.mean()
    result['ic_tstat'] = ics.mean() / (
        ics.std(ddof=1) / np.sqrt(ics.count())) if ics.count() > 1 else np.nan
    return pd.DataFrame([result])


# execute analysis in parallel with real-time updates
stats = []
n_workers = min(os.cpu_count() or 1, len(combos))
with ThreadPoolExecutor(max_workers=n_workers) as exe:
    futures = {exe.submit(analyze_combo, c): c[0] for c in combos}
    for fut in tqdm(as_completed(futures),
                    total=len(futures),
                    desc="Analyzing",
                    unit="combo"):
        stats.append(fut.result())

stats_df = pd.concat(stats)

  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(


  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(


  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(


  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
  ics = df.groupby('eom').apply(
Analyzing: 100%|██████████| 64/64 [00:27<00:00,  2.37combo/s]


In [17]:
def analyze_combo_performance_stats(
        df: pd.DataFrame,
        target: str = 'sharpe_long_short',
        factors: list = ['variant', 'label', 'weight', 'loss'],
        drop_first: bool = False):
    """
    Fit an OLS regression via statsmodels to explain `target` by categorical `factors`,
    providing intercept, coefficients, and significance levels.

    Parameters
    ----------
    df : pd.DataFrame
        Panel data containing target and factor columns.
    target : str
        Name of the performance metric column.
    factors : list of str
        Categorical setup columns to dummy-encode.
    drop_first : bool
        If True, drop the first level of each factor to serve as baseline.

    Returns
    -------
    results : RegressionResultsWrapper
        The fitted OLS model results; summary is printed.
    """
    # Build design matrix
    X = pd.get_dummies(df[factors], drop_first=drop_first)
    #     X = sm.add_constant(X)  # always include intercept
    y = df[target]

    # Fit OLS
    model = sm.OLS(y, X).fit()

    return model


def analyze_multiple_targets(df: pd.DataFrame,
                             targets: list,
                             factors: list = [
                                 'variant', 'label', 'weight', 'loss'
                             ],
                             drop_first: bool = False) -> pd.DataFrame:
    """
    Reuse analyze_combo_performance_stats to run regressions for multiple targets.
    Returns a combined DataFrame of coefficients.
    """
    all_coefs = []
    for tgt in targets:
        model = analyze_combo_performance_stats(df,
                                                target=tgt,
                                                factors=factors,
                                                drop_first=drop_first)
        params = model.params
        all_coefs.append(pd.DataFrame(params, columns=[tgt]))

    combined_df = pd.concat(all_coefs, axis=1)
    return combined_df

In [13]:
stats_df.sort_values(['sharpe_long_short'])

Unnamed: 0,combo,variant,label,weight,loss,sharpe_q1,sharpe_q2,sharpe_q3,sharpe_q4,sharpe_q5,sharpe_q6,sharpe_q7,sharpe_q8,sharpe_q9,sharpe_q10,sharpe_long_short,fm_beta,fm_tstat,ic_mean,ic_tstat
0,raw_ret_invn_w_ew_mse,raw,ret_invn,w_ew,mse,0.686743,0.648738,0.590922,0.656697,0.675330,0.588211,0.523123,0.437883,0.526845,0.485773,-0.106805,-0.010400,-1.461543,-0.037184,-8.061258
0,raw_ret_exc_lead1m_w_vw_mse,raw,ret_exc_lead1m,w_vw,mse,0.665572,0.694707,0.757096,0.647091,0.581812,0.647773,0.631843,0.466140,0.423915,0.484591,-0.078327,-0.003613,-0.844821,-0.037501,-8.167034
0,raw_ret_exc_lead1m_w_vw_mae,raw,ret_exc_lead1m,w_vw,mae,0.684165,0.656908,0.661541,0.704069,0.623348,0.674423,0.629423,0.659743,0.563962,0.639803,-0.073935,0.002158,0.201893,0.008812,3.425730
0,raw_ret_z_w_vw_mae,raw,ret_z,w_vw,mae,0.600054,0.595727,0.556420,0.592107,0.578844,0.662676,0.695824,0.698431,0.678080,0.654568,-0.064103,-0.001011,-0.521399,-0.006987,-1.408658
0,raw_ret_z_w_ew_mae,raw,ret_z,w_ew,mae,0.593985,0.549303,0.486063,0.416587,0.453581,0.600676,0.658907,0.565615,0.716592,0.677718,-0.053891,0.001762,0.157398,0.026513,6.602181
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,invn_ret_exc_lead1m_w_vw_mse,invn,ret_exc_lead1m,w_vw,mse,-0.141662,0.392322,0.443622,0.535707,0.716958,0.669732,0.650764,0.713966,0.806157,0.725975,0.808348,0.376752,32.339828,0.075423,16.245264
0,invn_ret_pct_w_ew_mae,invn,ret_pct,w_ew,mae,-0.188175,0.242581,0.364825,0.530886,0.420746,0.585459,0.767559,0.687346,0.813079,0.654799,0.814882,0.089387,43.487326,0.083696,17.299266
0,invn_ret_pct_w_ew_mse,invn,ret_pct,w_ew,mse,0.091494,0.368122,0.386784,0.528287,0.551235,0.614574,0.656754,0.816010,0.708010,0.884238,0.825277,0.206725,47.910817,0.077659,19.052041
0,invn_ret_invn_w_ew_mae,invn,ret_invn,w_ew,mae,-0.191661,0.263063,0.318381,0.423225,0.655178,0.524396,0.700297,0.738989,0.765690,0.692117,0.893507,0.033318,45.192934,0.079802,17.827567


In [20]:
stats_df.sort_values(['ic_mean'])

Unnamed: 0,combo,variant,label,weight,loss,sharpe_q1,sharpe_q2,sharpe_q3,sharpe_q4,sharpe_q5,sharpe_q6,sharpe_q7,sharpe_q8,sharpe_q9,sharpe_q10,sharpe_long_short,fm_beta,fm_tstat,ic_mean,ic_tstat
0,raw_ret_exc_lead1m_w_vw_mse,raw,ret_exc_lead1m,w_vw,mse,0.657299,0.570377,0.575231,0.699272,0.536405,0.572740,0.706519,0.669869,0.640574,0.645546,-0.010784,-0.010637,-0.432395,-0.028671,-5.481941
0,raw_ret_exc_lead1m_w_ew_mse,raw,ret_exc_lead1m,w_ew,mse,0.576724,0.599126,0.606013,0.635964,0.497792,0.465906,0.720788,0.380435,0.644442,0.661653,0.082425,0.011522,0.541704,-0.015769,-3.152833
0,raw_ret_z_w_vw_mse,raw,ret_z,w_vw,mse,0.628786,0.555368,0.656153,0.548955,0.676968,0.567339,0.685074,0.653190,0.738782,0.650284,0.036685,0.001373,0.517661,-0.013291,-4.102083
0,raw_ret_z_w_ew_mse,raw,ret_z,w_ew,mse,0.677612,0.609888,0.646657,0.543687,0.657027,0.638752,0.658967,0.519706,0.463544,0.428343,-0.109347,-0.034720,-0.753256,-0.006907,-1.761939
0,raw_ret_pct_w_vw_mae,raw,ret_pct,w_vw,mae,0.699676,0.515996,0.571950,0.464046,0.446460,0.531143,0.677634,0.722518,0.602211,0.750887,0.256096,0.000316,0.524100,-0.006453,-4.037856
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,pct_ret_invn_w_vw_mse,pct,ret_invn,w_vw,mse,0.034941,0.252694,0.342422,0.424254,0.557752,0.679340,0.565011,0.614576,0.750385,0.772897,0.529557,0.043685,28.791242,0.094355,13.671888
0,invn_ret_exc_lead1m_w_vw_mae,invn,ret_exc_lead1m,w_vw,mae,-0.068922,0.148611,0.365403,0.375545,0.631527,0.550076,0.533744,0.710235,0.658853,0.848560,0.630544,0.334580,29.638486,0.094492,13.709032
0,invn_ret_pct_w_ew_mse,invn,ret_pct,w_ew,mse,-0.242620,0.173203,0.295554,0.387761,0.470061,0.535879,0.565711,0.703529,0.868473,0.816100,0.800725,0.175873,44.222812,0.094542,15.988462
0,invn_ret_invn_w_vw_mae,invn,ret_invn,w_vw,mae,0.008168,0.269911,0.329039,0.333533,0.574759,0.552210,0.582295,0.635120,0.630831,0.847835,0.501736,0.029979,25.862589,0.094563,12.863858


In [14]:
analyze_multiple_targets(
    stats_df,
    ['sharpe_long_short', 'fm_beta', 'fm_tstat', 'ic_mean', 'ic_tstat']).sort_index()

Unnamed: 0,sharpe_long_short,fm_beta,fm_tstat,ic_mean,ic_tstat
label_ret_exc_lead1m,0.074676,0.144481,4.168051,-0.001108,-0.229203
label_ret_invn,0.104666,-0.049404,5.02342,0.016288,2.718069
label_ret_pct,0.070676,0.002409,2.483542,0.016498,2.997093
label_ret_z,0.052253,-0.033559,5.609942,0.005746,1.749497
loss_mae,0.180818,0.024803,7.881086,0.02612,4.230748
loss_mse,0.121454,0.039124,9.403869,0.011305,3.004708
variant_invn,0.268075,0.042105,14.2165,0.027809,5.104869
variant_pct,0.253982,0.04132,11.080081,0.026012,4.437554
variant_raw,-0.361932,-0.081489,-21.849422,-0.044883,-7.86894
variant_z,0.142147,0.06199,13.837797,0.028487,5.561974
