# Firm-Level FF3 Regressions by IV Skew Quintiles

This notebook builds on the processed dataset from `data_loader.ipynb` and explores Fama-French 3-factor regressions conditional on implied-volatility skew quintiles. We start with a single firm (AAPL) before generalizing to other tickers.


In [21]:
import pandas as pd
import polars as pl
import numpy as np
import statsmodels.api as sm
from pathlib import Path

pd.set_option("display.max_columns", None)
print("✓ Libraries ready")


✓ Libraries ready


In [22]:
PROCESSED_DATA_DIR = Path("processed_data")
MERGED_PATH = PROCESSED_DATA_DIR / "merged_data_with_ff3.parquet"

print(f"Loading merged dataset from {MERGED_PATH} ...")
merged_df = pl.read_parquet(MERGED_PATH)
print(f"✓ Loaded dataset with shape: {merged_df.shape}")
print(f"  Columns: {merged_df.columns}")


Loading merged dataset from processed_data/merged_data_with_ff3.parquet ...
✓ Loaded dataset with shape: (514768, 17)
  Columns: ['secid', 'week_start', 'week_end', 'IV_skew', 'PERMNO', 'week_start_right', 'weekly_return', 'week_end_right', 'TICKER', 'COMNAM', 'trading_days', 'sdate', 'edate', 'Mkt-RF', 'SMB', 'HML', 'RF']


In [23]:
FIRM_TICKER = "AAPL"
REQUIRED_COLS = [
    "secid", "TICKER", "week_start", "week_end", "IV_skew",
    "weekly_return", "Mkt-RF", "SMB", "HML", "RF"
]

firm_pl = (
    merged_df
    .filter(pl.col("TICKER") == FIRM_TICKER)
    .select(REQUIRED_COLS)
    .drop_nulls(["IV_skew", "weekly_return", "Mkt-RF", "SMB", "HML", "RF"])
    .sort("week_start")
)

firm_pd = firm_pl.to_pandas()
firm_pd["week_start"] = pd.to_datetime(firm_pd["week_start"])
firm_pd["week_end"] = pd.to_datetime(firm_pd["week_end"])
firm_pd["excess_return"] = firm_pd["weekly_return"] - firm_pd["RF"]

firm_pd["iv_quintile"] = (
    pd.qcut(
        firm_pd["IV_skew"],
        q=5,
        labels=False,
        duplicates="drop"
    )
    .astype("float")
)
firm_pd["iv_quintile"] = firm_pd["iv_quintile"].add(1)

print(f"Observations for {FIRM_TICKER}: {len(firm_pd):,}")
print("IV skew quintile distribution:")
print(firm_pd["iv_quintile"].value_counts(dropna=False).sort_index())
firm_pd.head()


Observations for AAPL: 244
IV skew quintile distribution:
iv_quintile
1.0    49
2.0    49
3.0    48
4.0    49
5.0    49
Name: count, dtype: int64


Unnamed: 0,secid,TICKER,week_start,week_end,IV_skew,weekly_return,Mkt-RF,SMB,HML,RF,excess_return,iv_quintile
0,101594,AAPL,2018-12-31,2019-01-04,-0.023604,0.027182,0.0289,0.0219,-0.0155,0.0005,0.026682,4.0
1,101594,AAPL,2019-01-07,2019-01-11,-0.026902,0.029747,0.0284,-0.0085,0.0089,0.0005,0.029247,3.0
2,101594,AAPL,2019-01-14,2019-01-18,-0.032017,0.005994,-0.0027,0.001,-0.0017,0.0005,0.005494,3.0
3,101594,AAPL,2019-01-21,2019-01-25,-0.031406,0.055527,0.0156,-0.0033,-0.008,0.0005,0.055027,3.0
4,101594,AAPL,2019-01-28,2019-02-01,-0.033564,0.027745,0.0011,0.0031,-0.0131,0.0005,0.027245,2.0


In [24]:
def run_ff3(subset: pd.DataFrame):
    X = subset[["Mkt-RF", "SMB", "HML"]]
    X = sm.add_constant(X)
    y = subset["excess_return"]
    ols_result = sm.OLS(y, X).fit()
    robust_result = ols_result.get_robustcov_results(cov_type="HC1")
    setattr(robust_result, "_rsquared", ols_result.rsquared)
    return robust_result


def star_from_p(pval: float) -> str:
    if np.isnan(pval):
        return ""
    if pval < 0.01:
        return "***"
    if pval < 0.05:
        return "**"
    if pval < 0.10:
        return "*"
    return ""


results = []
for quintile in sorted(firm_pd["iv_quintile"].dropna().unique()):
    sub = firm_pd[firm_pd["iv_quintile"] == quintile]
    if len(sub) < 20:
        print(f"Skipping quintile {quintile}: insufficient observations ({len(sub)})")
        continue
    model = run_ff3(sub)
    params = pd.Series(model.params, index=model.model.exog_names)
    tvals = pd.Series(model.tvalues, index=model.model.exog_names)
    pvals = pd.Series(model.pvalues, index=model.model.exog_names)
    results.append({
        "quintile": int(quintile),
        "n_obs": len(sub),
        "alpha": params["const"],
        "alpha_t": tvals["const"],
        "alpha_p": pvals["const"],
        "alpha_sig": star_from_p(pvals["const"]),
        "beta_mkt": params["Mkt-RF"],
        "beta_mkt_t": tvals["Mkt-RF"],
        "beta_mkt_p": pvals["Mkt-RF"],
        "beta_mkt_sig": star_from_p(pvals["Mkt-RF"]),
        "beta_smb": params["SMB"],
        "beta_smb_t": tvals["SMB"],
        "beta_smb_p": pvals["SMB"],
        "beta_smb_sig": star_from_p(pvals["SMB"]),
        "beta_hml": params["HML"],
        "beta_hml_t": tvals["HML"],
        "beta_hml_p": pvals["HML"],
        "beta_hml_sig": star_from_p(pvals["HML"]),
        "r_squared": getattr(model, "_rsquared", np.nan)
    })

results_df = pd.DataFrame(results).sort_values("quintile")
print("FF3 regression results by IV skew quintile (AAPL):")
results_df


FF3 regression results by IV skew quintile (AAPL):


Unnamed: 0,quintile,n_obs,alpha,alpha_t,alpha_p,alpha_sig,beta_mkt,beta_mkt_t,beta_mkt_p,beta_mkt_sig,beta_smb,beta_smb_t,beta_smb_p,beta_smb_sig,beta_hml,beta_hml_t,beta_hml_p,beta_hml_sig,r_squared
0,1,49,0.002805,0.864592,0.391849,,0.958155,6.130065,2.001224e-07,***,-0.265683,-1.002372,0.321524,,-0.219368,-1.58736,0.119434,,0.593493
1,2,49,0.005813,2.032665,0.048012,**,1.148682,11.24883,1.148262e-14,***,-0.533078,-2.228053,0.030919,**,-0.272342,-2.300317,0.026121,**,0.689475
2,3,48,0.006083,1.831669,0.073776,*,1.182837,10.319276,2.511762e-13,***,-0.504936,-1.958401,0.056541,*,-0.319798,-2.687405,0.010126,**,0.704875
3,4,49,0.003893,1.129965,0.264478,,1.340339,16.939305,3.771872e-21,***,-0.632213,-2.74498,0.008664,***,-0.410009,-3.231272,0.002307,***,0.761539
4,5,49,0.002238,0.492524,0.624744,,1.026517,7.690792,9.752336e-10,***,-0.520613,-2.484018,0.01678,**,-0.485831,-3.059438,0.003729,***,0.605435


In [25]:
display_cols = [
    "quintile", "n_obs",
    "alpha", "alpha_t", "alpha_p", "alpha_sig",
    "beta_mkt", "beta_mkt_t", "beta_mkt_p", "beta_mkt_sig",
    "beta_smb", "beta_smb_t", "beta_smb_p", "beta_smb_sig",
    "beta_hml", "beta_hml_t", "beta_hml_p", "beta_hml_sig",
    "r_squared"
]

formatted_results = results_df[display_cols].copy()
numeric_cols = [
    c for c in formatted_results.columns
    if c.endswith(("_p", "_t")) or c in {"alpha", "beta_mkt", "beta_smb", "beta_hml", "r_squared"}
]
formatted_results[numeric_cols] = formatted_results[numeric_cols].round(4)
formatted_results


Unnamed: 0,quintile,n_obs,alpha,alpha_t,alpha_p,alpha_sig,beta_mkt,beta_mkt_t,beta_mkt_p,beta_mkt_sig,beta_smb,beta_smb_t,beta_smb_p,beta_smb_sig,beta_hml,beta_hml_t,beta_hml_p,beta_hml_sig,r_squared
0,1,49,0.0028,0.8646,0.3918,,0.9582,6.1301,0.0,***,-0.2657,-1.0024,0.3215,,-0.2194,-1.5874,0.1194,,0.5935
1,2,49,0.0058,2.0327,0.048,**,1.1487,11.2488,0.0,***,-0.5331,-2.2281,0.0309,**,-0.2723,-2.3003,0.0261,**,0.6895
2,3,48,0.0061,1.8317,0.0738,*,1.1828,10.3193,0.0,***,-0.5049,-1.9584,0.0565,*,-0.3198,-2.6874,0.0101,**,0.7049
3,4,49,0.0039,1.13,0.2645,,1.3403,16.9393,0.0,***,-0.6322,-2.745,0.0087,***,-0.41,-3.2313,0.0023,***,0.7615
4,5,49,0.0022,0.4925,0.6247,,1.0265,7.6908,0.0,***,-0.5206,-2.484,0.0168,**,-0.4858,-3.0594,0.0037,***,0.6054


In [26]:
sample_quintile = 5
sample_df = firm_pd[firm_pd["iv_quintile"] == sample_quintile]

if len(sample_df) >= 20:
    sample_model = run_ff3(sample_df)
    print(f"Detailed FF3 summary for {FIRM_TICKER} quintile {sample_quintile} (HC1 SEs)")
    display(sample_model.summary())
else:
    print(f"Not enough observations to display summary for quintile {sample_quintile}")


Detailed FF3 summary for AAPL quintile 5 (HC1 SEs)


0,1,2,3
Dep. Variable:,excess_return,R-squared:,0.605
Model:,OLS,Adj. R-squared:,0.579
Method:,Least Squares,F-statistic:,31.13
Date:,"Thu, 13 Nov 2025",Prob (F-statistic):,4.76e-11
Time:,19:24:06,Log-Likelihood:,105.85
No. Observations:,49,AIC:,-203.7
Df Residuals:,45,BIC:,-196.1
Df Model:,3,,
Covariance Type:,HC1,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0022,0.005,0.493,0.625,-0.007,0.011
Mkt-RF,1.0265,0.133,7.691,0.000,0.758,1.295
SMB,-0.5206,0.210,-2.484,0.017,-0.943,-0.098
HML,-0.4858,0.159,-3.059,0.004,-0.806,-0.166

0,1,2,3
Omnibus:,25.025,Durbin-Watson:,2.321
Prob(Omnibus):,0.0,Jarque-Bera (JB):,48.177
Skew:,1.491,Prob(JB):,3.46e-11
Kurtosis:,6.834,Cond. No.,64.5
