# Volatility Smile and Smirk Analysis

This notebook analyzes the volatility smile and smirk patterns in equity options markets. We examine the relationship between implied volatility skew and future returns across multiple securities.


## 1. Setup and Imports

Loading required libraries for data manipulation, statistical analysis, and visualization.


In [1]:
# Data manipulation and analysis
import pandas as pd
import numpy as np
import polars as pl 

# Statistical modeling and diagnostics
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.diagnostic import het_breuschpagan, het_white
from statsmodels.stats.stattools import jarque_bera

# Machine learning and preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Data source
from ucimlrepo import fetch_ucirepo

# Configure plotting
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 11

plt.rcParams['figure.constrained_layout.use'] = True
print("All libraries imported successfully")

All libraries imported successfully


## 2. Data Loading

### 2.1 Options Data

Loading the options dataset containing implied volatilities, strike prices, and moneyness metrics.

In [2]:
option_df = pl.read_csv('raw_data/options_data.csv')

In [3]:
option_df

secid,date,exdate,cp_flag,strike_price,volume,open_interest,impl_volatility,opprc,moneyness,tte,close,spread,mod_open_interest,noi
i64,str,str,str,i64,i64,i64,f64,f64,f64,i64,f64,f64,i64,i64
5594,"""2021-10-07""","""2021-11-19""","""C""",10000,0,2,0.422947,1.25,0.907441,43,11.02,0.4,2,2
5594,"""2021-10-08""","""2021-11-19""","""C""",10000,0,2,0.456293,1.375,0.897666,42,11.14,0.15,2,0
5594,"""2021-10-11""","""2021-11-19""","""C""",10000,0,2,0.438753,1.275,0.904159,39,11.06,0.35,2,0
5594,"""2021-10-12""","""2021-11-19""","""C""",10000,0,2,0.453386,1.3,0.902527,38,11.08,0.4,2,0
5594,"""2021-10-13""","""2021-11-19""","""C""",10000,0,2,0.424783,1.225,0.906618,37,11.03,0.35,2,0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
218609,"""2023-08-31""","""2023-09-15""","""P""",90000,0,1,0.477438,11.55,1.14431,15,78.65,2.5,,
218609,"""2023-08-31""","""2023-10-20""","""P""",60000,0,2,0.506986,0.425,0.762873,50,78.65,0.65,,
218609,"""2023-08-31""","""2023-10-20""","""P""",70000,0,19,0.33145,0.825,0.890019,50,78.65,0.15,,
218609,"""2023-08-31""","""2023-10-20""","""P""",75000,0,16,0.24364,1.3,0.953592,50,78.65,1.0,,


In [4]:
start_date = option_df['date'].min()
end_date = option_df['date'].max()
print("Start date =", start_date)
print("End date =", end_date)

Start date = 2019-01-02
End date = 2023-08-31


### 2.2 Equity Returns Data

Loading historical equity returns from CRSP for all securities in our sample.

In [5]:
ret_df = (
    pl.read_csv(
        "./raw_data/all_equities.csv",
        schema_overrides={"RET": pl.Utf8}
    )
    .with_columns(pl.col("RET").cast(pl.Float64, strict=False))
)

In [6]:
ret_df

PERMNO,date,TICKER,COMNAM,PRC,RET
i64,str,str,str,f64,f64
10026,"""2019-01-02""","""JJSF""","""J & J SNACK FOODS CORP""",141.0,-0.024829
10026,"""2019-01-03""","""JJSF""","""J & J SNACK FOODS CORP""",143.02,0.014326
10026,"""2019-01-04""","""JJSF""","""J & J SNACK FOODS CORP""",144.84,0.012725
10026,"""2019-01-07""","""JJSF""","""J & J SNACK FOODS CORP""",145.41,0.003935
10026,"""2019-01-08""","""JJSF""","""J & J SNACK FOODS CORP""",148.7,0.022626
…,…,…,…,…,…
93436,"""2023-12-22""","""TSLA""","""TESLA INC""",252.53999,-0.007701
93436,"""2023-12-26""","""TSLA""","""TESLA INC""",256.60999,0.016116
93436,"""2023-12-27""","""TSLA""","""TESLA INC""",261.44,0.018822
93436,"""2023-12-28""","""TSLA""","""TESLA INC""",253.17999,-0.031594


### 2.2.1 Weekly Aggregation of Returns

Computing weekly returns by grouping daily returns into calendar weeks. Weekly returns are calculated as cumulative returns: $(1+r_1) \times (1+r_2) \times ... \times (1+r_n) - 1$.


In [7]:
# Create weekly aggregation of returns
weekly_ret_df = (
    ret_df
    .with_columns([
        # Parse date column as Date type if not already
        pl.col("date").str.to_date().alias("date"),
    ])
    .with_columns([
        # Create week_start column (Monday-anchored)
        pl.col("date").dt.truncate("1w").alias("week_start"),
    ])
    .filter(pl.col("RET").is_not_null() & pl.col("RET").is_finite())
    .group_by(["PERMNO", "week_start"])
    .agg([
        # Cumulative weekly return: product of (1 + daily returns) - 1
        ((pl.col("RET") + 1).product() - 1).alias("weekly_return"),
        # Last date in the week
        pl.max("date").alias("week_end"),
        # Additional info
        pl.first("TICKER").alias("TICKER"),
        pl.first("COMNAM").alias("COMNAM"),
        # Count of trading days in the week
        pl.count().alias("trading_days"),
    ])
    .sort(["PERMNO", "week_start"])
)


  pl.count().alias("trading_days"),


In [8]:
weekly_ret_df


PERMNO,week_start,weekly_return,week_end,TICKER,COMNAM,trading_days
i64,date,f64,date,str,str,u32
10026,2018-12-31,0.001728,2019-01-04,"""JJSF""","""J & J SNACK FOODS CORP""",3
10026,2019-01-07,0.027962,2019-01-11,"""JJSF""","""J & J SNACK FOODS CORP""",5
10026,2019-01-14,-0.011082,2019-01-18,"""JJSF""","""J & J SNACK FOODS CORP""",5
10026,2019-01-21,-0.018268,2019-01-25,"""JJSF""","""J & J SNACK FOODS CORP""",4
10026,2019-01-28,0.070909,2019-02-01,"""JJSF""","""J & J SNACK FOODS CORP""",5
…,…,…,…,…,…,…
93436,2023-11-27,0.014355,2023-12-01,"""TSLA""","""TESLA INC""",5
93436,2023-12-04,0.020978,2023-12-08,"""TSLA""","""TESLA INC""",5
93436,2023-12-11,0.039617,2023-12-15,"""TSLA""","""TESLA INC""",5
93436,2023-12-18,-0.003787,2023-12-22,"""TSLA""","""TESLA INC""",5


In [9]:
# Summary statistics of weekly returns
print("Weekly Returns Summary:")
print(f"Total observations: {len(weekly_ret_df):,}")
print(f"Unique securities: {weekly_ret_df['PERMNO'].n_unique()}")
print(f"Date range: {weekly_ret_df['week_start'].min()} to {weekly_ret_df['week_end'].max()}")
print(f"\nWeekly Return Statistics:")
print(weekly_ret_df['weekly_return'].describe())


Weekly Returns Summary:
Total observations: 2,233,778
Unique securities: 12281
Date range: 2018-12-31 to 2023-12-29

Weekly Return Statistics:
shape: (9, 2)
┌────────────┬────────────┐
│ statistic  ┆ value      │
│ ---        ┆ ---        │
│ str        ┆ f64        │
╞════════════╪════════════╡
│ count      ┆ 2.233778e6 │
│ null_count ┆ 0.0        │
│ mean       ┆ 0.001779   │
│ std        ┆ 0.092335   │
│ min        ┆ -0.985393  │
│ 25%        ┆ -0.023492  │
│ 50%        ┆ 0.000753   │
│ 75%        ┆ 0.023382   │
│ max        ┆ 18.728247  │
└────────────┴────────────┘


### 2.3 Security Identifier Mapping

Loading the mapping between OptionMetrics security IDs (secid) and CRSP PERMNOs to enable data linkage.

In [10]:
map_df = pl.read_csv("./raw_data/permno_secid_mapping.csv")

In [11]:
filtered_map = map_df.filter(pl.col('edate')>start_date)
filtered_map

secid,sdate,edate,PERMNO
i64,str,str,i64
5111,"""2021-03-18""","""2023-02-02""",20768
5121,"""2018-02-28""","""2019-08-13""",17295
5131,"""2007-04-02""","""2024-05-02""",88960
5139,"""2002-07-29""","""2024-12-31""",89462
5166,"""2014-01-15""","""2022-05-06""",14380
…,…,…,…
219171,"""2024-01-04""","""2024-12-31""",24747
219172,"""2024-01-04""","""2024-12-31""",24746
219173,"""2024-01-04""","""2024-12-31""",24718
219174,"""2024-01-25""","""2024-12-31""",24685


## 3. Data Preparation

### 3.1 Merging Returns with Security Identifiers

Joining the equity returns data with the security identifier mapping to enable analysis across datasets.

In [12]:
ret_df = ret_df.join(filtered_map,on="PERMNO")
ret_df

PERMNO,date,TICKER,COMNAM,PRC,RET,secid,sdate,edate
i64,str,str,str,f64,f64,i64,str,str
10026,"""2019-01-02""","""JJSF""","""J & J SNACK FOODS CORP""",141.0,-0.024829,106500,"""1996-01-02""","""2024-12-31"""
10026,"""2019-01-03""","""JJSF""","""J & J SNACK FOODS CORP""",143.02,0.014326,106500,"""1996-01-02""","""2024-12-31"""
10026,"""2019-01-04""","""JJSF""","""J & J SNACK FOODS CORP""",144.84,0.012725,106500,"""1996-01-02""","""2024-12-31"""
10026,"""2019-01-07""","""JJSF""","""J & J SNACK FOODS CORP""",145.41,0.003935,106500,"""1996-01-02""","""2024-12-31"""
10026,"""2019-01-08""","""JJSF""","""J & J SNACK FOODS CORP""",148.7,0.022626,106500,"""1996-01-02""","""2024-12-31"""
…,…,…,…,…,…,…,…,…
93436,"""2023-12-22""","""TSLA""","""TESLA INC""",252.53999,-0.007701,143439,"""2010-06-29""","""2024-12-31"""
93436,"""2023-12-26""","""TSLA""","""TESLA INC""",256.60999,0.016116,143439,"""2010-06-29""","""2024-12-31"""
93436,"""2023-12-27""","""TSLA""","""TESLA INC""",261.44,0.018822,143439,"""2010-06-29""","""2024-12-31"""
93436,"""2023-12-28""","""TSLA""","""TESLA INC""",253.17999,-0.031594,143439,"""2010-06-29""","""2024-12-31"""


### 3.2 Adding Security IDs to Weekly Returns

Adding `secid` to the weekly returns dataframe to enable joining with options data.


In [13]:
# Add secid to weekly returns for joining with options data
weekly_ret_df = weekly_ret_df.join(filtered_map, on="PERMNO")
weekly_ret_df


PERMNO,week_start,weekly_return,week_end,TICKER,COMNAM,trading_days,secid,sdate,edate
i64,date,f64,date,str,str,u32,i64,str,str
10026,2018-12-31,0.001728,2019-01-04,"""JJSF""","""J & J SNACK FOODS CORP""",3,106500,"""1996-01-02""","""2024-12-31"""
10026,2019-01-07,0.027962,2019-01-11,"""JJSF""","""J & J SNACK FOODS CORP""",5,106500,"""1996-01-02""","""2024-12-31"""
10026,2019-01-14,-0.011082,2019-01-18,"""JJSF""","""J & J SNACK FOODS CORP""",5,106500,"""1996-01-02""","""2024-12-31"""
10026,2019-01-21,-0.018268,2019-01-25,"""JJSF""","""J & J SNACK FOODS CORP""",4,106500,"""1996-01-02""","""2024-12-31"""
10026,2019-01-28,0.070909,2019-02-01,"""JJSF""","""J & J SNACK FOODS CORP""",5,106500,"""1996-01-02""","""2024-12-31"""
…,…,…,…,…,…,…,…,…,…
93436,2023-11-27,0.014355,2023-12-01,"""TSLA""","""TESLA INC""",5,143439,"""2010-06-29""","""2024-12-31"""
93436,2023-12-04,0.020978,2023-12-08,"""TSLA""","""TESLA INC""",5,143439,"""2010-06-29""","""2024-12-31"""
93436,2023-12-11,0.039617,2023-12-15,"""TSLA""","""TESLA INC""",5,143439,"""2010-06-29""","""2024-12-31"""
93436,2023-12-18,-0.003787,2023-12-22,"""TSLA""","""TESLA INC""",5,143439,"""2010-06-29""","""2024-12-31"""


## 4. Volatility Skew Calculation

### 4.1 Methodology

Computing weekly implied volatility skew by comparing average call and put IVs within specified moneyness ranges. The skew metric captures the asymmetry in the volatility smile, which can indicate market sentiment and forward-looking risk perceptions.

### 4.2 Weekly IV Skew Function

This function computes weighted average implied volatility skew on a weekly basis, filtering for options within a specified DTE range and moneyness bands. Multiple weighting schemes are supported (equal, weekday-based, linear, exponential).

In [14]:
def compute_weekly_iv_skew_streaming(
    csv_path: str,
    dte_limit: int = 60,
    call_range: tuple[float, float] = (0.95, 1.05),
    put_range: tuple[float, float] = (0.85, 1.05),
    weight_scheme: str = "weekday",   # "equal" | "weekday" | "linear" | "exp"
    alpha: float = 0.3,               # for "exp": weight = exp(alpha * (pos-1))
    streaming: bool = True,           # set False if using "linear"/"exp" (windowed)
) -> pl.DataFrame:
    c = pl.col

    # Use scan_csv to stay lazy/streaming; parse date at read-time
    lf = pl.scan_csv(
    csv_path,
    schema_overrides={
        "secid": pl.Int64,
        "date": pl.Date,
        "tte": pl.Int32,
        "impl_volatility": pl.Float32,
        "cp_flag": pl.Categorical,
        "moneyness": pl.Float32,
    },
).select(["secid", "date", "tte", "impl_volatility", "cp_flag", "moneyness"])

    opt = (
        lf
        .filter(
            (c("tte") < dte_limit)
            & c("moneyness").is_not_null()
            & c("impl_volatility").is_finite()
        )
        .with_columns([
            # Monday-anchored calendar week; only trading days exist
            c("date").dt.truncate("1w").alias("week_start"),
            # mark eligible call/put IVs
            pl.when(
                (c("cp_flag") == "C")
                & (c("moneyness") >= call_range[0])
                & (c("moneyness") <= call_range[1])
            ).then(c("impl_volatility")).otherwise(None).alias("call_iv"),
            pl.when(
                (c("cp_flag") == "P")
                & (c("moneyness") >= put_range[0])
                & (c("moneyness") <= put_range[1])
            ).then(c("impl_volatility")).otherwise(None).alias("put_iv"),
        ])
    )

    # Daily skew per secid/date
    daily = (
        opt.group_by(["secid", "date", "week_start"])
           .agg([
               pl.mean("call_iv").alias("call_iv_d"),
               pl.mean("put_iv").alias("put_iv_d"),
           ])
           .filter(c("call_iv_d").is_not_null() & c("put_iv_d").is_not_null())
           .with_columns((c("call_iv_d") - c("put_iv_d")).alias("skew_d"))
    )

    # Weights
    if weight_scheme == "equal":
        daily_w = daily.with_columns(pl.lit(1.0).alias("w"))
        allow_stream = True
    elif weight_scheme == "weekday":
        # Mon..Fri -> 1..5 (holidays/short weeks handled naturally)
        daily_w = daily.with_columns((c("date").dt.weekday() + 1).cast(pl.Float32).alias("w"))
        allow_stream = True
    elif weight_scheme in ("linear", "exp"):
        # Position within week (1..N) requires a window; may disable streaming
        daily_w = (
            daily.sort(["secid", "week_start", "date"])
                 .with_columns((pl.cum_count().over(["secid", "week_start"]) + 1).alias("pos"))
                 .with_columns(
                     pl.when(weight_scheme == "linear")
                       .then(c("pos").cast(pl.Float32))
                       .otherwise((pl.lit(alpha) * (c("pos") - 1)).exp().cast(pl.Float32))
                       .alias("w")
                 )
        )
        allow_stream = False
    else:
        raise ValueError("weight_scheme must be one of {'equal','weekday','linear','exp'}")

    weekly = (
        daily_w.group_by(["secid", "week_start"])
               .agg([
                   ((c("skew_d") * c("w")).sum() / c("w").sum()).alias("IV_skew"),
                   pl.max("date").alias("week_end"),  # last trading day in week
               ])
               .sort(["secid", "week_start"])
               .select(["secid", "week_start", "week_end", "IV_skew"])
    )

    return weekly.collect(streaming=(streaming and allow_stream))

### 4.3 Computing Weekly Skew Metrics

Applying the IV skew computation with weekday-weighted averaging to capture intra-week dynamics in option pricing.


In [15]:
weekly_option_df = compute_weekly_iv_skew_streaming(
    "./raw_data/options_data.csv",
    dte_limit=60,
    weight_scheme="weekday",   # or "equal" / "linear" / "exp"
    alpha=0.4,                 # only used for "exp"
)



### 4.4 Results Preview

Displaying the computed weekly IV skew metrics for all securities in the sample period.


In [16]:
weekly_option_df


secid,week_start,week_end,IV_skew
i64,date,date,f32
5594,2021-11-08,2021-11-11,0.076602
5594,2021-11-15,2021-11-19,0.028996
5594,2021-11-22,2021-11-24,0.044062
6646,2020-10-26,2020-10-26,-0.037153
6646,2020-11-16,2020-11-18,0.015387
…,…,…,…
218532,2023-08-21,2023-08-25,0.010728
218532,2023-08-28,2023-08-31,0.503919
218609,2023-08-14,2023-08-18,0.011428
218609,2023-08-21,2023-08-25,-0.004274


## Joining options and returns data into one dataframe

In [21]:
merged_df = (
    weekly_option_df
    .join(
        weekly_ret_df.with_columns([
            (pl.col("week_start") - pl.duration(days=7)).alias("prev_week_start")
        ]),
        left_on=["secid", "week_start"],
        right_on=["secid", "prev_week_start"],
        how="left"
    )
)

In [22]:
merged_df

secid,week_start,week_end,IV_skew,PERMNO,week_start_right,weekly_return,week_end_right,TICKER,COMNAM,trading_days,sdate,edate
i64,date,date,f32,i64,date,f64,date,str,str,u32,str,str
5594,2021-11-08,2021-11-11,0.076602,52250,2021-11-15,-0.0016,2021-11-19,"""GENC""","""GENCOR INDUSTRIES INC""",5,"""1996-01-01""","""2024-12-31"""
5594,2021-11-15,2021-11-19,0.028996,52250,2021-11-22,-0.060095,2021-11-26,"""GENC""","""GENCOR INDUSTRIES INC""",4,"""1996-01-01""","""2024-12-31"""
5594,2021-11-22,2021-11-24,0.044062,52250,2021-11-29,-0.019608,2021-12-03,"""GENC""","""GENCOR INDUSTRIES INC""",5,"""1996-01-01""","""2024-12-31"""
6646,2020-10-26,2020-10-26,-0.037153,75672,2020-11-02,-0.068293,2020-11-06,"""WWR""","""WESTWATER RESOURCES INC""",5,"""1996-01-01""","""2024-12-31"""
6646,2020-11-16,2020-11-18,0.015387,75672,2020-11-23,-0.119448,2020-11-27,"""WWR""","""WESTWATER RESOURCES INC""",4,"""1996-01-01""","""2024-12-31"""
…,…,…,…,…,…,…,…,…,…,…,…,…
218532,2023-08-21,2023-08-25,0.010728,16592,2023-08-28,0.059203,2023-09-01,"""METC""","""RAMACO RESOURCES INC""",5,"""2023-06-22""","""2024-12-31"""
218532,2023-08-28,2023-08-31,0.503919,16592,2023-09-04,-0.022277,2023-09-08,"""METC""","""RAMACO RESOURCES INC""",4,"""2023-06-22""","""2024-12-31"""
218609,2023-08-14,2023-08-18,0.011428,12373,2023-08-21,0.008978,2023-08-25,"""HHH""","""HOWARD HUGHES HOLDINGS INC""",5,"""2023-08-14""","""2024-12-31"""
218609,2023-08-21,2023-08-25,-0.004274,12373,2023-08-28,0.048416,2023-09-01,"""HHH""","""HOWARD HUGHES HOLDINGS INC""",5,"""2023-08-14""","""2024-12-31"""


In [32]:
# Select only the columns you want to plot BEFORE converting
quick_reg_df = merged_df.select(['IV_skew', 'weekly_return']).to_pandas()
quick_reg_df = quick_reg_df.dropna()

In [37]:
quick_reg_df

Unnamed: 0,IV_skew,weekly_return
0,0.076602,-0.001600
1,0.028996,-0.060095
2,0.044062,-0.019608
3,-0.037153,-0.068293
4,0.015387,-0.119448
...,...,...
514763,0.010728,0.059203
514764,0.503919,-0.022277
514765,0.011428,0.008978
514766,-0.004274,0.048416


In [None]:

sns.jointplot(data=quick_reg_df, x='IV_skew', y='weekly_return', kind='reg')
plt.show()

In [33]:
X = sm.add_constant(quick_reg_df['IV_skew'])
Y = quick_reg_df['weekly_return']


In [34]:
Y

0        -0.001600
1        -0.060095
2        -0.019608
3        -0.068293
4        -0.119448
            ...   
514763    0.059203
514764   -0.022277
514765    0.008978
514766    0.048416
514767   -0.037069
Name: weekly_return, Length: 514600, dtype: float64

In [35]:
model = sm.OLS(endog=Y, exog=X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:          weekly_return   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     24.02
Date:                Fri, 24 Oct 2025   Prob (F-statistic):           9.56e-07
Time:                        14:49:27   Log-Likelihood:             5.6080e+05
No. Observations:              514600   AIC:                        -1.122e+06
Df Residuals:                  514598   BIC:                        -1.122e+06
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0020      0.000     17.914      0.0

In [36]:
r_squared = model.rsquared

print(f"R_squared = {r_squared}")

R_squared = 4.666719953627663e-05


# Now building up other factors for our regression

In [56]:
ff3_factors = pd.read_csv('raw_data/F-F_Research_Data_Factors_weekly.csv')
ff3_factors 

Unnamed: 0,Date,Mkt-RF,SMB,HML,RF
0,19260702,1.58,-0.62,-0.86,0.06
1,19260710,0.37,-0.90,0.31,0.06
2,19260717,0.98,0.59,-1.44,0.06
3,19260724,-2.03,0.02,-0.17,0.06
4,19260731,3.06,-1.89,-0.85,0.06
...,...,...,...,...,...
5169,20250801,-2.52,-2.15,-2.02,0.09
5170,20250808,2.38,0.44,-0.80,0.09
5171,20250815,1.02,2.12,1.08,0.09
5172,20250822,0.34,1.84,2.88,0.09


In [57]:
# Divide only the factor columns by 100, not the Date column
ff3_factors[['Mkt-RF', 'SMB', 'HML', 'RF']] = ff3_factors[['Mkt-RF', 'SMB', 'HML', 'RF']] / 100
ff3_factors

Unnamed: 0,Date,Mkt-RF,SMB,HML,RF
0,19260702,0.0158,-0.0062,-0.0086,0.0006
1,19260710,0.0037,-0.0090,0.0031,0.0006
2,19260717,0.0098,0.0059,-0.0144,0.0006
3,19260724,-0.0203,0.0002,-0.0017,0.0006
4,19260731,0.0306,-0.0189,-0.0085,0.0006
...,...,...,...,...,...
5169,20250801,-0.0252,-0.0215,-0.0202,0.0009
5170,20250808,0.0238,0.0044,-0.0080,0.0009
5171,20250815,0.0102,0.0212,0.0108,0.0009
5172,20250822,0.0034,0.0184,0.0288,0.0009


In [58]:
# Convert to Polars
ff3_factors_pl = pl.from_pandas(ff3_factors)

# Parse the Date column properly
ff3_factors_pl = (
    ff3_factors_pl
    .with_columns([
        # Convert Date from YYYYMMDD integer to proper date, then truncate to Monday
        pl.col("Date").cast(pl.Int64).cast(pl.Utf8).str.to_date("%Y%m%d")
          .dt.truncate("1w").alias("week_start")
    ])
    .drop("Date")  # drop original Date column after creating week_start
)

print("FF3 factors columns:", ff3_factors_pl.columns)
ff3_factors_pl.head()

FF3 factors columns: ['Mkt-RF', 'SMB', 'HML', 'RF', 'week_start']


Mkt-RF,SMB,HML,RF,week_start
f64,f64,f64,f64,date
0.0158,-0.0062,-0.0086,0.0006,1926-06-28
0.0037,-0.009,0.0031,0.0006,1926-07-05
0.0098,0.0059,-0.0144,0.0006,1926-07-12
-0.0203,0.0002,-0.0017,0.0006,1926-07-19
0.0306,-0.0189,-0.0085,0.0006,1926-07-26


In [59]:
# Check what columns we have in ff3_factors
print("Columns in ff3_factors:")
print(ff3_factors.columns)
print("\nFirst few rows:")
ff3_factors.head()


Columns in ff3_factors:
Index(['Date', 'Mkt-RF', 'SMB', 'HML', 'RF'], dtype='object')

First few rows:


Unnamed: 0,Date,Mkt-RF,SMB,HML,RF
0,19260702,0.0158,-0.0062,-0.0086,0.0006
1,19260710,0.0037,-0.009,0.0031,0.0006
2,19260717,0.0098,0.0059,-0.0144,0.0006
3,19260724,-0.0203,0.0002,-0.0017,0.0006
4,19260731,0.0306,-0.0189,-0.0085,0.0006


In [60]:
# Check what columns we have in merged_df to find the returns week column
print("Columns in merged_df:")
print(merged_df.columns)
merged_df.head()


Columns in merged_df:
['secid', 'week_start', 'week_end', 'IV_skew', 'PERMNO', 'week_start_right', 'weekly_return', 'week_end_right', 'TICKER', 'COMNAM', 'trading_days', 'sdate', 'edate']


secid,week_start,week_end,IV_skew,PERMNO,week_start_right,weekly_return,week_end_right,TICKER,COMNAM,trading_days,sdate,edate
i64,date,date,f32,i64,date,f64,date,str,str,u32,str,str
5594,2021-11-08,2021-11-11,0.076602,52250,2021-11-15,-0.0016,2021-11-19,"""GENC""","""GENCOR INDUSTRIES INC""",5,"""1996-01-01""","""2024-12-31"""
5594,2021-11-15,2021-11-19,0.028996,52250,2021-11-22,-0.060095,2021-11-26,"""GENC""","""GENCOR INDUSTRIES INC""",4,"""1996-01-01""","""2024-12-31"""
5594,2021-11-22,2021-11-24,0.044062,52250,2021-11-29,-0.019608,2021-12-03,"""GENC""","""GENCOR INDUSTRIES INC""",5,"""1996-01-01""","""2024-12-31"""
6646,2020-10-26,2020-10-26,-0.037153,75672,2020-11-02,-0.068293,2020-11-06,"""WWR""","""WESTWATER RESOURCES INC""",5,"""1996-01-01""","""2024-12-31"""
6646,2020-11-16,2020-11-18,0.015387,75672,2020-11-23,-0.119448,2020-11-27,"""WWR""","""WESTWATER RESOURCES INC""",4,"""1996-01-01""","""2024-12-31"""


In [61]:
# FF3 factors are now ready to join with merged_df

In [62]:
# Now join the FF3 factors (using the corrected ff3_factors_pl)
merged_with_ff = (
    merged_df
    .join(
        ff3_factors_pl,
        left_on="week_start_right",  # This is the returns week from weekly_ret_df  
        right_on="week_start",
        how="left",
        suffix="_ff"  # Add suffix to avoid column conflicts
    )
)

print("Successfully joined! Columns in merged_with_ff:")
print(merged_with_ff.columns)
print(f"\nShape: {merged_with_ff.shape}")
merged_with_ff.head()


Successfully joined! Columns in merged_with_ff:
['secid', 'week_start', 'week_end', 'IV_skew', 'PERMNO', 'week_start_right', 'weekly_return', 'week_end_right', 'TICKER', 'COMNAM', 'trading_days', 'sdate', 'edate', 'Mkt-RF', 'SMB', 'HML', 'RF']

Shape: (514768, 17)


secid,week_start,week_end,IV_skew,PERMNO,week_start_right,weekly_return,week_end_right,TICKER,COMNAM,trading_days,sdate,edate,Mkt-RF,SMB,HML,RF
i64,date,date,f32,i64,date,f64,date,str,str,u32,str,str,f64,f64,f64,f64
5594,2021-11-08,2021-11-11,0.076602,52250,2021-11-15,-0.0016,2021-11-19,"""GENC""","""GENCOR INDUSTRIES INC""",5,"""1996-01-01""","""2024-12-31""",-0.0015,-0.0181,-0.016,0.0
5594,2021-11-15,2021-11-19,0.028996,52250,2021-11-22,-0.060095,2021-11-26,"""GENC""","""GENCOR INDUSTRIES INC""",4,"""1996-01-01""","""2024-12-31""",-0.0251,-0.0224,0.0266,0.0
5594,2021-11-22,2021-11-24,0.044062,52250,2021-11-29,-0.019608,2021-12-03,"""GENC""","""GENCOR INDUSTRIES INC""",5,"""1996-01-01""","""2024-12-31""",-0.0213,-0.0162,0.013,0.0
6646,2020-10-26,2020-10-26,-0.037153,75672,2020-11-02,-0.068293,2020-11-06,"""WWR""","""WESTWATER RESOURCES INC""",5,"""1996-01-01""","""2024-12-31""",0.0769,0.0008,-0.0494,0.0
6646,2020-11-16,2020-11-18,0.015387,75672,2020-11-23,-0.119448,2020-11-27,"""WWR""","""WESTWATER RESOURCES INC""",4,"""1996-01-01""","""2024-12-31""",0.0285,0.0128,0.0131,0.0


In [63]:
# Verify the data looks correct - show key columns
merged_with_ff.select([
    'secid', 'week_start', 'week_start_right', 'IV_skew', 'weekly_return',  
    'Mkt-RF', 'SMB', 'HML', 'RF'
]).head(20)


secid,week_start,week_start_right,IV_skew,weekly_return,Mkt-RF,SMB,HML,RF
i64,date,date,f32,f64,f64,f64,f64,f64
5594,2021-11-08,2021-11-15,0.076602,-0.0016,-0.0015,-0.0181,-0.016,0.0
5594,2021-11-15,2021-11-22,0.028996,-0.060095,-0.0251,-0.0224,0.0266,0.0
5594,2021-11-22,2021-11-29,0.044062,-0.019608,-0.0213,-0.0162,0.013,0.0
6646,2020-10-26,2020-11-02,-0.037153,-0.068293,0.0769,0.0008,-0.0494,0.0
6646,2020-11-16,2020-11-23,0.015387,-0.119448,0.0285,0.0128,0.0131,0.0
…,…,…,…,…,…,…,…,…
6646,2021-03-01,2021-03-08,-0.001298,0.309623,0.0332,0.0462,0.0163,0.0
6646,2021-03-08,2021-03-15,0.003886,-0.086261,-0.0103,-0.0136,-0.0038,0.0
6646,2021-03-22,2021-03-29,-0.01896,0.055663,0.0138,0.0048,-0.0162,0.0
6646,2021-03-29,2021-04-05,0.050432,-0.049092,0.0246,-0.0257,-0.0131,0.0


## 5. Adding Fama-French 3 Factors

Joining the Fama-French 3 factors (Mkt-RF, SMB, HML) and the risk-free rate (RF) to our merged dataframe. The FF3 factors are matched to the **returns week** (one week after the options observation), which is appropriate for risk-adjusting the forward returns.


In [64]:
ffm_iv_reg = merged_with_ff.to_pandas()

## 6. Portfolio Sorting Analysis: Testing for Significant Alpha

We now perform a quintile portfolio sorting analysis to test whether IV_skew has predictive power for future returns. Each week, we:
1. Sort stocks into 5 quintiles based on IV_skew
2. Calculate equal-weighted returns for each quintile
3. Form a long-short portfolio (Q5 - Q1)
4. Regress portfolio returns on FF3 factors to test for significant alpha


In [None]:
# Prepare data for portfolio sorting
# Remove rows with missing IV_skew, returns, or FF3 factors
portfolio_data = (
    merged_with_ff
    .filter(
        pl.col("IV_skew").is_not_null() &
        pl.col("weekly_return").is_not_null() &
        pl.col("Mkt-RF").is_not_null() &
        pl.col("SMB").is_not_null() &
        pl.col("HML").is_not_null() &
        pl.col("RF").is_not_null()
    )
)

print(f"Portfolio data shape: {portfolio_data.shape}")
print(f"Date range: {portfolio_data['week_start'].min()} to {portfolio_data['week_start'].max()}")
print(f"Number of unique stocks: {portfolio_data['secid'].n_unique()}")
print(f"Number of unique weeks: {portfolio_data['week_start'].n_unique()}")


In [None]:
# Sort stocks into quintiles based on IV_skew each week
# Quintile 1 = lowest IV_skew, Quintile 5 = highest IV_skew
portfolio_data = (
    portfolio_data
    .with_columns([
        # Create quintile ranks within each week
        pl.col("IV_skew").qcut(5, labels=["Q1", "Q2", "Q3", "Q4", "Q5"])
        .over("week_start")
        .alias("IV_skew_quintile"),
        # Also create excess returns
        (pl.col("weekly_return") - pl.col("RF")).alias("excess_return")
    ])
)

# Check the distribution of stocks across quintiles
print("Stocks per quintile (sample):")
print(portfolio_data.group_by("IV_skew_quintile").agg(pl.len().alias("count")).sort("IV_skew_quintile"))
portfolio_data.head(10)


In [None]:
# Calculate equal-weighted portfolio returns for each quintile in each week
quintile_returns = (
    portfolio_data
    .group_by(["week_start", "IV_skew_quintile"])
    .agg([
        pl.mean("excess_return").alias("portfolio_excess_return"),
        pl.mean("weekly_return").alias("portfolio_return"),
        pl.len().alias("n_stocks"),
        # Also get the FF3 factors (should be same for all stocks in a week)
        pl.first("Mkt-RF").alias("Mkt-RF"),
        pl.first("SMB").alias("SMB"),
        pl.first("HML").alias("HML"),
        pl.first("RF").alias("RF")
    ])
    .sort(["week_start", "IV_skew_quintile"])
)

print(f"Quintile returns shape: {quintile_returns.shape}")
print(f"\nSample of quintile returns:")
quintile_returns.head(15)


In [None]:
# Create long-short portfolio: Q5 (high IV_skew) - Q1 (low IV_skew)
q5_returns = quintile_returns.filter(pl.col("IV_skew_quintile") == "Q5").select(["week_start", "portfolio_excess_return"]).rename({"portfolio_excess_return": "Q5_return"})
q1_returns = quintile_returns.filter(pl.col("IV_skew_quintile") == "Q1").select(["week_start", "portfolio_excess_return"]).rename({"portfolio_excess_return": "Q1_return"})

long_short = (
    q5_returns
    .join(q1_returns, on="week_start", how="inner")
    .with_columns([
        (pl.col("Q5_return") - pl.col("Q1_return")).alias("long_short_return")
    ])
)

# Join with FF3 factors
long_short = (
    long_short
    .join(
        quintile_returns.filter(pl.col("IV_skew_quintile") == "Q5").select(["week_start", "Mkt-RF", "SMB", "HML"]),
        on="week_start",
        how="left"
    )
)

print("Long-Short Portfolio Summary Statistics:")
print(f"Mean weekly return: {long_short['long_short_return'].mean():.4f}")
print(f"Std dev: {long_short['long_short_return'].std():.4f}")
print(f"Sharpe ratio (annualized): {(long_short['long_short_return'].mean() / long_short['long_short_return'].std()) * np.sqrt(52):.4f}")
print(f"Number of weeks: {len(long_short)}")

long_short.head(10)


In [None]:
# Test for significant alpha: Regress long-short returns on FF3 factors
# Alpha = intercept from: R_long_short = alpha + beta_mkt * Mkt-RF + beta_smb * SMB + beta_hml * HML + epsilon

# Convert to pandas for statsmodels
long_short_pd = long_short.to_pandas().dropna()

# Prepare regression variables
Y_ls = long_short_pd['long_short_return']
X_ls = long_short_pd[['Mkt-RF', 'SMB', 'HML']]
X_ls = sm.add_constant(X_ls)

# Run regression
model_ls = sm.OLS(Y_ls, X_ls).fit()

print("=" * 80)
print("LONG-SHORT PORTFOLIO (Q5 - Q1): ALPHA TEST")
print("=" * 80)
print(model_ls.summary())
print("\n" + "=" * 80)
print("KEY RESULTS:")
print("=" * 80)
print(f"Alpha (weekly): {model_ls.params['const']:.6f}")
print(f"Alpha (annualized): {model_ls.params['const'] * 52:.4f} ({model_ls.params['const'] * 52 * 100:.2f}%)")
print(f"t-statistic: {model_ls.tvalues['const']:.4f}")
print(f"p-value: {model_ls.pvalues['const']:.6f}")
print(f"\n{'*** SIGNIFICANT ALPHA ***' if model_ls.pvalues['const'] < 0.05 else 'Alpha not significant at 5% level'}")


In [None]:
# Analyze each quintile separately to see the pattern
# Calculate average returns and alphas for each quintile

quintile_results = []

for q in ["Q1", "Q2", "Q3", "Q4", "Q5"]:
    q_data = quintile_returns.filter(pl.col("IV_skew_quintile") == q).to_pandas().dropna()
    
    # Calculate mean return
    mean_return = q_data['portfolio_excess_return'].mean()
    
    # Run FF3 regression to get alpha
    Y_q = q_data['portfolio_excess_return']
    X_q = q_data[['Mkt-RF', 'SMB', 'HML']]
    X_q = sm.add_constant(X_q)
    
    model_q = sm.OLS(Y_q, X_q).fit()
    alpha = model_q.params['const']
    alpha_tstat = model_q.tvalues['const']
    alpha_pval = model_q.pvalues['const']
    
    quintile_results.append({
        'Quintile': q,
        'Mean_Return (weekly)': mean_return,
        'Mean_Return (annualized %)': mean_return * 52 * 100,
        'Alpha (weekly)': alpha,
        'Alpha (annualized %)': alpha * 52 * 100,
        't-stat': alpha_tstat,
        'p-value': alpha_pval,
        'Significant': '***' if alpha_pval < 0.01 else '**' if alpha_pval < 0.05 else '*' if alpha_pval < 0.10 else ''
    })

quintile_summary = pd.DataFrame(quintile_results)
print("\n" + "=" * 100)
print("QUINTILE PORTFOLIO ANALYSIS")
print("=" * 100)
print(quintile_summary.to_string(index=False))
print("\nSignificance levels: *** p<0.01, ** p<0.05, * p<0.10")


In [None]:
# Visualize the alpha pattern across quintiles
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Average returns by quintile
colors = ['red' if x < 0 else 'green' for x in quintile_summary['Alpha (annualized %)']]
ax1.bar(quintile_summary['Quintile'], quintile_summary['Mean_Return (annualized %)'], color=colors, alpha=0.6, edgecolor='black')
ax1.axhline(y=0, color='black', linestyle='-', linewidth=0.8)
ax1.set_xlabel('IV Skew Quintile', fontsize=12)
ax1.set_ylabel('Mean Excess Return (% per year)', fontsize=12)
ax1.set_title('Average Returns by IV Skew Quintile', fontsize=14, fontweight='bold')
ax1.grid(True, alpha=0.3)

# Plot 2: Alphas by quintile
colors_alpha = ['red' if x < 0 else 'green' for x in quintile_summary['Alpha (annualized %)']]
bars = ax2.bar(quintile_summary['Quintile'], quintile_summary['Alpha (annualized %)'], color=colors_alpha, alpha=0.6, edgecolor='black')

# Add significance stars on bars
for i, (idx, row) in enumerate(quintile_summary.iterrows()):
    if row['Significant']:
        ax2.text(i, row['Alpha (annualized %)'] + 0.5, row['Significant'], 
                ha='center', va='bottom', fontsize=14, fontweight='bold')

ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.8)
ax2.set_xlabel('IV Skew Quintile', fontsize=12)
ax2.set_ylabel('Alpha (% per year)', fontsize=12)
ax2.set_title('FF3 Alpha by IV Skew Quintile', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nInterpretation:")
print("- Q1 = Lowest IV Skew (puts cheaper relative to calls)")
print("- Q5 = Highest IV Skew (puts more expensive relative to calls)")


In [None]:
# Plot cumulative returns of long-short portfolio over time
long_short_pd = long_short_pd.sort_values('week_start')
long_short_pd['cumulative_return'] = (1 + long_short_pd['long_short_return']).cumprod() - 1

plt.figure(figsize=(14, 6))
plt.plot(long_short_pd['week_start'], long_short_pd['cumulative_return'] * 100, linewidth=2, color='darkblue')
plt.axhline(y=0, color='red', linestyle='--', linewidth=1, alpha=0.7)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Cumulative Return (%)', fontsize=12)
plt.title('Cumulative Returns: Long-Short Portfolio (Q5 - Q1) Based on IV Skew', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"\nTotal cumulative return: {long_short_pd['cumulative_return'].iloc[-1] * 100:.2f}%")
print(f"Number of positive weeks: {(long_short_pd['long_short_return'] > 0).sum()}")
print(f"Number of negative weeks: {(long_short_pd['long_short_return'] < 0).sum()}")
print(f"Win rate: {(long_short_pd['long_short_return'] > 0).sum() / len(long_short_pd) * 100:.2f}%")


### Interpretation Guide

**What we're testing:**
- Does IV skew (call IV - put IV) predict future stock returns?
- If high IV skew predicts higher/lower returns, is this effect significant after controlling for FF3 factors?

**How to interpret the results:**

1. **Long-Short Alpha Test:**
   - **Alpha**: The risk-adjusted return of the long-short portfolio (Q5 - Q1)
   - **p-value < 0.05**: Statistically significant at 5% level (strong evidence)
   - **p-value < 0.01**: Highly significant (very strong evidence)
   - **Positive alpha**: High IV skew stocks outperform low IV skew stocks
   - **Negative alpha**: Low IV skew stocks outperform high IV skew stocks

2. **Quintile Analysis:**
   - Look for **monotonic pattern**: Returns should increase (or decrease) consistently from Q1 to Q5
   - **Significant alphas**: Individual quintiles with significant risk-adjusted returns
   - **Economic magnitude**: Annualized alpha > 5% is economically meaningful for trading

3. **Economic Intuition:**
   - **Negative relationship** (Q1 > Q5): High skew (expensive puts) signals overpricing or investor fear → lower future returns
   - **Positive relationship** (Q5 > Q1): High skew signals informed trading or mispricing → higher future returns
   
**What makes a strong result:**
- ✅ Significant long-short alpha (p < 0.05)
- ✅ Monotonic pattern across quintiles
- ✅ Economically large effect (annualized alpha > 3-5%)
- ✅ Stable over time (cumulative returns chart trends consistently)


In [68]:
ffm_iv_reg['excess_weekly_return'] = ffm_iv_reg['weekly_return'] - ffm_iv_reg['RF']
ffm_iv_reg

Unnamed: 0,secid,week_start,week_end,IV_skew,PERMNO,week_start_right,weekly_return,week_end_right,TICKER,COMNAM,trading_days,sdate,edate,Mkt-RF,SMB,HML,RF,excess_weekly_return
0,5594,2021-11-08,2021-11-11,0.076602,52250.0,2021-11-15,-0.001600,2021-11-19,GENC,GENCOR INDUSTRIES INC,5.0,1996-01-01,2024-12-31,-0.0015,-0.0181,-0.0160,0.0000,-0.001600
1,5594,2021-11-15,2021-11-19,0.028996,52250.0,2021-11-22,-0.060095,2021-11-26,GENC,GENCOR INDUSTRIES INC,4.0,1996-01-01,2024-12-31,-0.0251,-0.0224,0.0266,0.0000,-0.060095
2,5594,2021-11-22,2021-11-24,0.044062,52250.0,2021-11-29,-0.019608,2021-12-03,GENC,GENCOR INDUSTRIES INC,5.0,1996-01-01,2024-12-31,-0.0213,-0.0162,0.0130,0.0000,-0.019608
3,6646,2020-10-26,2020-10-26,-0.037153,75672.0,2020-11-02,-0.068293,2020-11-06,WWR,WESTWATER RESOURCES INC,5.0,1996-01-01,2024-12-31,0.0769,0.0008,-0.0494,0.0000,-0.068293
4,6646,2020-11-16,2020-11-18,0.015387,75672.0,2020-11-23,-0.119448,2020-11-27,WWR,WESTWATER RESOURCES INC,4.0,1996-01-01,2024-12-31,0.0285,0.0128,0.0131,0.0000,-0.119448
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
514763,218532,2023-08-21,2023-08-25,0.010728,16592.0,2023-08-28,0.059203,2023-09-01,METC,RAMACO RESOURCES INC,5.0,2023-06-22,2024-12-31,0.0273,0.0069,0.0064,0.0011,0.058103
514764,218532,2023-08-28,2023-08-31,0.503919,16592.0,2023-09-04,-0.022277,2023-09-08,METC,RAMACO RESOURCES INC,4.0,2023-06-22,2024-12-31,-0.0155,-0.0256,-0.0062,0.0011,-0.023377
514765,218609,2023-08-14,2023-08-18,0.011428,12373.0,2023-08-21,0.008978,2023-08-25,HHH,HOWARD HUGHES HOLDINGS INC,5.0,2023-08-14,2024-12-31,0.0059,-0.0081,-0.0170,0.0011,0.007878
514766,218609,2023-08-21,2023-08-25,-0.004274,12373.0,2023-08-28,0.048416,2023-09-01,HHH,HOWARD HUGHES HOLDINGS INC,5.0,2023-08-14,2024-12-31,0.0273,0.0069,0.0064,0.0011,0.047316


In [79]:
AAPL = ffm_iv_reg[ffm_iv_reg['TICKER']=='AAPL']
AAPL = AAPL[['Mkt-RF','SMB','HML','IV_skew','excess_weekly_return']]

AAPL 

Unnamed: 0,Mkt-RF,SMB,HML,IV_skew,excess_weekly_return
19046,0.0289,0.0219,-0.0155,-0.023604,0.026682
19047,0.0284,-0.0085,0.0089,-0.026902,0.029247
19048,-0.0027,0.0010,-0.0017,-0.032017,0.005494
19049,0.0156,-0.0033,-0.0080,-0.031406,0.055027
19050,0.0011,0.0031,-0.0131,-0.033564,0.027245
...,...,...,...,...,...
19285,-0.0060,-0.0137,0.0046,-0.016695,-0.022860
19286,-0.0230,-0.0072,-0.0164,-0.028722,-0.019661
19287,0.0059,-0.0081,-0.0170,-0.035735,0.022512
19288,0.0273,0.0069,0.0064,-0.034800,0.059645


In [80]:
X = AAPL[['Mkt-RF','SMB','HML','IV_skew']]
Y = AAPL[['excess_weekly_return']]

X = sm.add_constant(X)

In [81]:
X

Unnamed: 0,const,Mkt-RF,SMB,HML,IV_skew
19046,1.0,0.0289,0.0219,-0.0155,-0.023604
19047,1.0,0.0284,-0.0085,0.0089,-0.026902
19048,1.0,-0.0027,0.0010,-0.0017,-0.032017
19049,1.0,0.0156,-0.0033,-0.0080,-0.031406
19050,1.0,0.0011,0.0031,-0.0131,-0.033564
...,...,...,...,...,...
19285,1.0,-0.0060,-0.0137,0.0046,-0.016695
19286,1.0,-0.0230,-0.0072,-0.0164,-0.028722
19287,1.0,0.0059,-0.0081,-0.0170,-0.035735
19288,1.0,0.0273,0.0069,0.0064,-0.034800


In [83]:
Y

Unnamed: 0,excess_weekly_return
19046,0.026682
19047,0.029247
19048,0.005494
19049,0.055027
19050,0.027245
...,...
19285,-0.022860
19286,-0.019661
19287,0.022512
19288,0.059645


In [84]:
model = sm.OLS(endog=Y, exog=X).fit()

model.summary()

0,1,2,3
Dep. Variable:,excess_weekly_return,R-squared:,0.663
Model:,OLS,Adj. R-squared:,0.657
Method:,Least Squares,F-statistic:,117.4
Date:,"Fri, 24 Oct 2025",Prob (F-statistic):,3.1e-55
Time:,16:18:58,Log-Likelihood:,569.95
No. Observations:,244,AIC:,-1130.0
Df Residuals:,239,BIC:,-1112.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0011,0.004,0.283,0.778,-0.006,0.009
Mkt-RF,1.1426,0.054,20.983,0.000,1.035,1.250
SMB,-0.4974,0.104,-4.779,0.000,-0.702,-0.292
HML,-0.3310,0.058,-5.737,0.000,-0.445,-0.217
IV_skew,-0.0939,0.121,-0.774,0.439,-0.333,0.145

0,1,2,3
Omnibus:,21.992,Durbin-Watson:,1.949
Prob(Omnibus):,0.0,Jarque-Bera (JB):,45.009
Skew:,0.45,Prob(JB):,1.68e-10
Kurtosis:,4.901,Cond. No.,81.4


In [85]:
CWST = ffm_iv_reg[ffm_iv_reg['TICKER']=='CWST']
CWST = CWST[['Mkt-RF','SMB','HML','IV_skew','excess_weekly_return']]

CWST

Unnamed: 0,Mkt-RF,SMB,HML,IV_skew,excess_weekly_return
43649,0.0284,-0.0085,0.0089,-0.004588,0.048426
43650,-0.0027,0.0010,-0.0017,0.015550,-0.042101
43651,0.0156,-0.0033,-0.0080,0.010770,-0.011352
43652,0.0011,0.0031,-0.0131,0.004512,0.048702
43653,0.0274,0.0156,0.0002,0.009976,0.109448
...,...,...,...,...,...
43859,-0.0060,-0.0137,0.0046,-0.004546,-0.028184
43860,-0.0230,-0.0072,-0.0164,-0.021830,0.006112
43861,0.0059,-0.0081,-0.0170,-0.012386,0.006816
43862,0.0273,0.0069,0.0064,-0.030240,-0.008329


In [86]:
X = CWST[['Mkt-RF','SMB','HML','IV_skew']]
Y = CWST[['excess_weekly_return']]

X = sm.add_constant(X)

In [87]:
model = sm.OLS(endog=Y, exog=X).fit()

model.summary()

0,1,2,3
Dep. Variable:,excess_weekly_return,R-squared:,0.233
Model:,OLS,Adj. R-squared:,0.218
Method:,Least Squares,F-statistic:,15.93
Date:,"Fri, 24 Oct 2025",Prob (F-statistic):,2.1e-11
Time:,16:21:41,Log-Likelihood:,410.82
No. Observations:,215,AIC:,-811.6
Df Residuals:,210,BIC:,-794.8
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0032,0.003,1.257,0.210,-0.002,0.008
Mkt-RF,0.6043,0.088,6.841,0.000,0.430,0.778
SMB,0.2617,0.169,1.549,0.123,-0.071,0.595
HML,0.0656,0.095,0.692,0.490,-0.121,0.253
IV_skew,-0.0425,0.040,-1.071,0.285,-0.121,0.036

0,1,2,3
Omnibus:,7.586,Durbin-Watson:,2.236
Prob(Omnibus):,0.023,Jarque-Bera (JB):,9.978
Skew:,0.244,Prob(JB):,0.00681
Kurtosis:,3.936,Cond. No.,69.3
