# IV Skew Delta FF5 — Stock-Level Regressions

Estimate a Fama–French 5-factor time-series regression for every stock with sufficient IV skew delta history to see how the signal behaves name by name.


In [7]:
import pandas as pd
import polars as pl
import numpy as np
import statsmodels.api as sm
from pathlib import Path

pd.set_option('display.max_columns', None)
print('✓ Libraries ready')


✓ Libraries ready


In [8]:
PROCESSED_DATA_DIR = Path('processed_data')
IV_DELTA_PATH = PROCESSED_DATA_DIR / 'daily_iv_skew_delta.parquet'

print(f'Loading IV skew delta panel from {IV_DELTA_PATH} ...')
iv_delta_pl = pl.read_parquet(IV_DELTA_PATH)
print(f"✓ Loaded shape: {iv_delta_pl.shape}")
print(f"  Columns available: {iv_delta_pl.columns}")


Loading IV skew delta panel from processed_data/daily_iv_skew_delta.parquet ...
✓ Loaded shape: (624626, 18)
  Columns available: ['secid', 'iv_date', 'IV_skew', 'IV_skew_lag25', 'IV_skew_delta_25', 'PERMNO', 'TICKER', 'COMNAM', 'PRC', 'next_return', 'next_date', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF', 'excess_return']


In [9]:
factor_cols = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']
keep_cols = ['secid', 'PERMNO', 'TICKER', 'iv_date', 'next_date', 'IV_skew_delta_25', 'excess_return'] + factor_cols + ['RF']

missing_required = [c for c in ['secid', 'next_date', 'excess_return'] + factor_cols if c not in iv_delta_pl.columns]
if missing_required:
    raise ValueError(f"Dataset missing required columns: {missing_required}")

available_cols = [c for c in keep_cols if c in iv_delta_pl.columns]
stock_panel_pl = iv_delta_pl.select(available_cols)

stock_panel_pl = stock_panel_pl.drop_nulls(['IV_skew_delta_25', 'excess_return'] + factor_cols)
print(f"Rows after dropping nulls for regression inputs: {stock_panel_pl.shape[0]:,}")

stock_panel = stock_panel_pl.to_pandas()
stock_panel['next_date'] = pd.to_datetime(stock_panel['next_date'])
if 'iv_date' in stock_panel.columns:
    stock_panel['iv_date'] = pd.to_datetime(stock_panel['iv_date'])
stock_panel = stock_panel.sort_values(['secid', 'next_date']).reset_index(drop=True)
stock_panel.head()


Rows after dropping nulls for regression inputs: 624,626


Unnamed: 0,secid,PERMNO,TICKER,iv_date,next_date,IV_skew_delta_25,excess_return,Mkt-RF,SMB,HML,RMW,CMA,RF
0,6646,75672,WWR,2023-03-06,2023-03-07,-0.193222,0.128164,-0.0145,0.0063,-0.0067,-0.0013,-0.0012,0.0002
1,6646,75672,WWR,2023-04-04,2023-04-05,-0.243835,-0.038758,-0.0039,-0.01,0.014,0.0079,0.0129,0.0002
2,8170,80341,MPAA,2019-10-22,2019-10-23,-0.06043,0.026124,0.0025,-0.001,0.0027,-0.0001,0.0009,0.0001
3,8170,80341,MPAA,2019-10-23,2019-10-24,0.128819,0.011257,0.0025,-0.005,-0.0089,-0.0005,-0.0061,0.0001
4,8170,80341,MPAA,2019-10-24,2019-10-25,0.088066,0.024044,0.005,0.004,0.0007,0.0034,0.0,0.0001


In [10]:
MIN_OBS = 200   # require ~1 trading year of data per stock
HAC_LAGS = 21   # ≈ one trading month

print(f"Running FF5 regressions for stocks with ≥{MIN_OBS} observations...")
results = []
stock_models = {}
eligible_names = 0

for secid, sub in stock_panel.groupby('secid'):
    if len(sub) < MIN_OBS:
        continue
    eligible_names += 1
    lag = min(HAC_LAGS, max(1, len(sub) - 1))
    X = sm.add_constant(sub[factor_cols])
    y = sub['excess_return']
    model = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': lag})
    stock_models[secid] = model

    params = model.params
    tvals = model.tvalues
    results.append({
        'secid': secid,
        'ticker': sub['TICKER'].iloc[0] if 'TICKER' in sub.columns else None,
        'n_obs': len(sub),
        'alpha': params['const'],
        'alpha_t': tvals['const'],
        'beta_mkt': params['Mkt-RF'],
        'beta_smb': params['SMB'],
        'beta_hml': params['HML'],
        'beta_rmw': params['RMW'],
        'beta_cma': params['CMA'],
        'avg_iv_skew_delta': sub['IV_skew_delta_25'].mean(),
        'std_iv_skew_delta': sub['IV_skew_delta_25'].std(),
        'r_squared': model.rsquared
    })

print(f"✓ Estimated regressions for {eligible_names:,} stocks")
stock_results_df = pd.DataFrame(results)
stock_results_df.head()


Running FF5 regressions for stocks with ≥200 observations...
✓ Estimated regressions for 1,266 stocks


Unnamed: 0,secid,ticker,n_obs,alpha,alpha_t,beta_mkt,beta_smb,beta_hml,beta_rmw,beta_cma,avg_iv_skew_delta,std_iv_skew_delta,r_squared
0,100862,AIR,200,0.002636,1.915639,1.211235,0.700057,1.126032,0.217041,-0.096726,0.002934,0.08675,0.444498
1,100885,ABM,242,0.000245,0.22729,0.864871,0.718832,0.369132,0.354203,0.023636,-0.004164,0.068663,0.444472
2,100892,AFL,517,0.000896,1.84272,0.934954,-0.10164,0.962996,-0.337153,-0.188139,0.000758,0.026379,0.502223
3,100896,AGCO,338,0.000717,1.030093,1.058428,0.320646,0.617576,-0.277929,0.13253,0.001129,0.040754,0.469752
4,100900,AES,409,-0.000256,-0.342806,0.915351,0.141977,0.580277,-0.330238,-0.442957,0.001563,0.028076,0.444067


In [11]:
if stock_results_df.empty:
    print('No stocks met the observation threshold. Consider lowering MIN_OBS.')
else:
    alpha_summary = stock_results_df['alpha'].describe(percentiles=[0.1, 0.25, 0.5, 0.75, 0.9])
    print('Alpha distribution (daily excess-return units):')
    display(alpha_summary)

    cols_to_show = ['secid', 'ticker', 'n_obs', 'alpha', 'alpha_t', 'r_squared', 'beta_mkt', 'beta_smb', 'beta_hml', 'beta_rmw', 'beta_cma']
    top_positive = stock_results_df.sort_values('alpha_t', ascending=False).head(10)[cols_to_show]
    top_negative = stock_results_df.sort_values('alpha_t').head(10)[cols_to_show]

    print('\nTop 10 stocks by positive alpha t-stat:')
    display(top_positive.round(4))
    print('\nTop 10 stocks by negative alpha t-stat:')
    display(top_negative.round(4))


Alpha distribution (daily excess-return units):


count    1266.000000
mean        0.000217
std         0.001396
min        -0.007544
10%        -0.001365
25%        -0.000443
50%         0.000246
75%         0.000957
90%         0.001652
max         0.011093
Name: alpha, dtype: float64


Top 10 stocks by positive alpha t-stat:


Unnamed: 0,secid,ticker,n_obs,alpha,alpha_t,r_squared,beta_mkt,beta_smb,beta_hml,beta_rmw,beta_cma
260,105109,AJG,377,0.0017,3.8337,0.494,0.8491,-0.3815,0.1475,-0.0746,0.0962
19,101164,APD,444,0.0015,3.5613,0.5087,0.9133,-0.153,0.0522,0.1341,0.2398
989,189470,GLPI,370,0.0016,3.5611,0.4239,0.8684,0.1249,0.1486,-0.0101,0.1338
1044,205771,FRPT,330,0.0039,3.5587,0.336,1.0854,0.7385,-0.226,-0.4471,-0.4372
1113,210217,TTD,602,0.0037,3.4857,0.5058,1.5577,0.7788,-0.8136,-1.117,-1.0459
391,107253,MLM,437,0.0025,3.4384,0.4682,0.9951,0.5343,0.5625,0.2898,-0.3859
1150,212141,MDB,566,0.0038,3.3626,0.4752,1.1904,0.4841,-0.8768,-1.2409,-1.3072
216,104124,ETN,503,0.0014,3.2356,0.595,1.008,0.3251,0.3759,0.4884,-0.2511
1105,209603,ATKR,308,0.0033,3.2131,0.4626,1.385,1.4191,0.1779,0.8784,-0.0679
671,111962,WCC,381,0.0028,3.1753,0.5515,1.3533,1.0061,0.7751,0.2317,-0.5345



Top 10 stocks by negative alpha t-stat:


Unnamed: 0,secid,ticker,n_obs,alpha,alpha_t,r_squared,beta_mkt,beta_smb,beta_hml,beta_rmw,beta_cma
421,107749,MOV,226,-0.0029,-3.5553,0.4054,1.2503,1.3966,0.0885,0.4562,0.6936
90,102113,BBY,619,-0.0021,-3.2819,0.4375,1.2278,0.4691,-0.0958,0.6063,0.0635
980,188534,AGIO,210,-0.0035,-3.1959,0.3815,0.9778,0.5171,-0.2,-0.7039,-0.2313
149,103042,CSCO,638,-0.0011,-3.1429,0.5479,1.0017,-0.1087,-0.1699,0.3609,0.586
664,111861,WBA,610,-0.0022,-3.0933,0.3122,0.9604,0.0619,0.1842,0.1729,0.6135
1063,207394,ENR,250,-0.0026,-2.7698,0.2475,0.7983,0.4588,0.2773,0.1934,-0.282
916,148166,AMCX,329,-0.0036,-2.6958,0.1974,0.9535,0.8923,0.259,-0.5871,0.0051
556,110050,SMTC,315,-0.0039,-2.6408,0.3744,1.3871,1.0425,-0.3031,0.2937,-0.3186
318,106049,INCY,435,-0.002,-2.4505,0.1985,0.6629,0.0146,-0.2671,-0.4311,0.4407
1228,214062,GRWG,221,-0.0053,-2.3562,0.516,1.9141,1.7086,-0.4743,-1.3333,-0.8876


In [12]:
if stock_results_df.empty:
    print('No model summaries available.')
else:
    best_secid = stock_results_df.loc[stock_results_df['alpha_t'].idxmax(), 'secid']
    best_ticker = stock_results_df.loc[stock_results_df['alpha_t'].idxmax(), 'ticker']
    print(f"Detailed HAC regression for secid {best_secid} ({best_ticker})")
    display(stock_models[best_secid].summary())


Detailed HAC regression for secid 105109 (AJG)


0,1,2,3
Dep. Variable:,excess_return,R-squared:,0.494
Model:,OLS,Adj. R-squared:,0.487
Method:,Least Squares,F-statistic:,65.23
Date:,"Sat, 15 Nov 2025",Prob (F-statistic):,9.44e-49
Time:,18:30:00,Log-Likelihood:,1210.9
No. Observations:,377,AIC:,-2410.0
Df Residuals:,371,BIC:,-2386.0
Df Model:,5,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0017,0.000,3.834,0.000,0.001,0.003
Mkt-RF,0.8491,0.055,15.396,0.000,0.741,0.957
SMB,-0.3815,0.107,-3.562,0.000,-0.591,-0.172
HML,0.1475,0.106,1.389,0.165,-0.061,0.356
RMW,-0.0746,0.118,-0.632,0.527,-0.306,0.157
CMA,0.0962,0.204,0.471,0.638,-0.304,0.497

0,1,2,3
Omnibus:,24.965,Durbin-Watson:,2.003
Prob(Omnibus):,0.0,Jarque-Bera (JB):,79.054
Skew:,0.153,Prob(JB):,6.82e-18
Kurtosis:,5.222,Cond. No.,265.0


### Notes on methodology & standard errors
- **Setup:** For each `secid`, regress daily excess returns on the FF5 factors plus an intercept. Requiring `MIN_OBS = 200` ensures the HAC estimator has enough data; tweak the threshold for thinner histories.
- **HAC choice:** Portfolios inherit overlap from the 25-day IV skew delta lag, so a Newey–West/HAC correction with `maxlags = 21` (≈ one trading month) keeps alpha t-stats conservative. You can scale the lag to your holding period (e.g., 63 for quarterly horizons) or fall back to White HC1 if you are confident serial correlation is negligible.
- **Per-stock nuance:** We shrink the HAC lag to `len(stock)-1` automatically when a name has fewer than 21 usable days. To flag cross-sectional patterns (e.g., sectors with positive alphas), aggregate `stock_results_df` after running the notebook.
