# IV Skew Trading Strategy: Short Low-Skew Stocks

## Hypothesis
When IV skew is extremely negative (put IV >> call IV), stocks tend to underperform.

## Strategy
1. Each week: rank stocks by IV_skew
2. Short bottom 20% (most negative skew)
3. Hold 1 week, rebalance
4. Equal-weighted portfolio

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (14, 6)
print('✓ Setup complete')

✓ Setup complete


## 1. Load and Prepare Data

We'll load directly from raw data files and construct the dataset we need.

In [2]:
import polars as pl

print("Loading options data and computing IV skew...")

# Function to compute weekly IV skew
def compute_weekly_iv_skew(
    csv_path: str,
    dte_limit: int = 60,
    call_range: tuple = (0.95, 1.05),
    put_range: tuple = (0.95, 1.05),
):
    c = pl.col
    
    lf = pl.scan_csv(
        csv_path,
        schema_overrides={
            "secid": pl.Int64,
            "date": pl.Date,
            "tte": pl.Int32,
            "impl_volatility": pl.Float32,
            "cp_flag": pl.Categorical,
            "moneyness": pl.Float32,
        },
    ).select(["secid", "date", "tte", "impl_volatility", "cp_flag", "moneyness"])
    
    opt = (
        lf
        .filter(
            (c("tte") < dte_limit)
            & c("moneyness").is_not_null()
            & c("impl_volatility").is_finite()
        )
        .with_columns([
            c("date").dt.truncate("1w").alias("week_start"),
            pl.when(
                (c("cp_flag") == "C")
                & (c("moneyness") >= call_range[0])
                & (c("moneyness") <= call_range[1])
            ).then(c("impl_volatility")).otherwise(None).alias("call_iv"),
            pl.when(
                (c("cp_flag") == "P")
                & (c("moneyness") >= put_range[0])
                & (c("moneyness") <= put_range[1])
            ).then(c("impl_volatility")).otherwise(None).alias("put_iv"),
        ])
    )
    
    daily = (
        opt.group_by(["secid", "date", "week_start"])
           .agg([
               pl.mean("call_iv").alias("call_iv_d"),
               pl.mean("put_iv").alias("put_iv_d"),
           ])
           .filter(c("call_iv_d").is_not_null() & c("put_iv_d").is_not_null())
           .with_columns((c("call_iv_d") - c("put_iv_d")).alias("skew_d"))
    )
    
    daily_w = daily.with_columns((c("date").dt.weekday() + 1).cast(pl.Float32).alias("w"))
    
    weekly = (
        daily_w.group_by(["secid", "week_start"])
               .agg([
                   ((c("skew_d") * c("w")).sum() / c("w").sum()).alias("IV_skew"),
                   pl.max("date").alias("week_end"),
               ])
               .sort(["secid", "week_start"])
               .select(["secid", "week_start", "week_end", "IV_skew"])
    )
    
    return weekly.collect(streaming=True)

# Compute IV skew
weekly_option_df = compute_weekly_iv_skew("./raw_data/options_data.csv")
print(f"✓ Computed IV skew: {len(weekly_option_df):,} observations")

Loading options data and computing IV skew...


OSError: Operation canceled (os error 89)

This error occurred with the following context stack:
	[1] 'csv scan'
	[2] 'select'
	[3] 'filter'
	[4] 'with_columns'
	[5] 'group_by'
	[6] 'filter'
	[7] 'with_columns'
	[8] 'with_columns'
	[9] 'group_by'
	[10] 'sort'
	[11] 'select'


In [None]:
print("Loading equity returns...")

# Load returns
ret_df = (
    pl.read_csv(
        "./raw_data/all_equities.csv",
        schema_overrides={"RET": pl.Utf8}
    )
    .with_columns(pl.col("RET").cast(pl.Float64, strict=False))
)

# Aggregate to weekly returns
weekly_ret_df = (
    ret_df
    .with_columns([pl.col("date").str.to_date().alias("date")])
    .with_columns([pl.col("date").dt.truncate("1w").alias("week_start")])
    .filter(pl.col("RET").is_not_null() & pl.col("RET").is_finite())
    .group_by(["PERMNO", "week_start"])
    .agg([
        ((pl.col("RET") + 1).product() - 1).alias("weekly_return"),
        pl.max("date").alias("week_end"),
    ])
    .sort(["PERMNO", "week_start"])
)

print(f"✓ Computed weekly returns: {len(weekly_ret_df):,} observations")

# Load security mapping
print("Loading security mapping...")
map_df = pl.read_csv("./raw_data/permno_secid_mapping.csv")
filtered_map = map_df.filter(pl.col('edate') > "2019-01-02")

# Add secid to returns
weekly_ret_df = weekly_ret_df.join(filtered_map, on="PERMNO")
print(f"✓ Mapped {weekly_ret_df['secid'].n_unique()} unique securities")


In [None]:
print("Merging datasets...")

# Merge options with FUTURE returns (avoid look-ahead bias)
# IV_skew from week t predicts returns in week t+1
merged_df = (
    weekly_option_df
    .join(
        weekly_ret_df.with_columns([
            (pl.col("week_start") - pl.duration(days=7)).alias("prev_week_start")
        ]),
        left_on=["secid", "week_start"],
        right_on=["secid", "prev_week_start"],
        how="left"
    )
    .filter(
        pl.col("IV_skew").is_not_null() &
        pl.col("weekly_return").is_not_null()
    )
)

print(f"✓ Merged dataset: {len(merged_df):,} observations")
print(f"  Date range: {merged_df['week_start'].min()} to {merged_df['week_start'].max()}")
print(f"  Unique stocks: {merged_df['secid'].n_unique()}")

# Convert to pandas for easier manipulation
df = merged_df.to_pandas()
df['week_start'] = pd.to_datetime(df['week_start'])

print(f"\n✓ Data ready for strategy backtesting")
df.head()


## 2. Understand IV Skew Distribution

In [None]:
print('='*80)
print('IV SKEW STATISTICS')
print('='*80)
print(f'Mean:   {df["IV_skew"].mean():.4f}')
print(f'Median: {df["IV_skew"].median():.4f}')
print(f'Std:    {df["IV_skew"].std():.4f}')
print(f'\nPercentiles:')
for p in [1, 5, 10, 20, 50, 80, 90, 95, 99]:
    val = df['IV_skew'].quantile(p/100)
    print(f'  {p:2d}th: {val:7.4f}')

plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.hist(df['IV_skew'], bins=100, edgecolor='black', alpha=0.7, color='steelblue')
plt.axvline(x=0, color='red', linestyle='--', linewidth=2, label='Zero')
plt.axvline(x=df['IV_skew'].quantile(0.20), color='green', linestyle='--', linewidth=2, label='20th pct (short threshold)')
plt.xlabel('IV Skew')
plt.ylabel('Frequency')
plt.title('IV Skew Distribution')
plt.legend()
plt.grid(alpha=0.3)

plt.subplot(1, 2, 2)
plt.boxplot(df['IV_skew'], vert=True)
plt.axhline(y=0, color='red', linestyle='--', alpha=0.7)
plt.ylabel('IV Skew')
plt.title('IV Skew Box Plot')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Define Trading Strategy

In [None]:
PERCENTILE_THRESHOLD = 20  # Short bottom 20%

# For each week, find threshold
df['skew_threshold'] = df.groupby('week_start')['IV_skew'].transform(lambda x: x.quantile(PERCENTILE_THRESHOLD / 100))

# Signal: 1 = short
df['signal'] = (df['IV_skew'] <= df['skew_threshold']).astype(int)

print(f'Strategy: SHORT stocks with IV_skew <= {PERCENTILE_THRESHOLD}th percentile')
print(f'Average stocks per week: {df.groupby("week_start")["signal"].sum().mean():.0f}')
print(f'Total signals: {df["signal"].sum():,}')

## 4. Calculate Strategy Returns

In [None]:
# Equal-weighted short portfolio
strategy_returns = (
    df[df['signal'] == 1]
    .groupby('week_start')
    .agg({'weekly_return': 'mean', 'secid': 'count'})
    .rename(columns={'weekly_return': 'avg_long_return', 'secid': 'n_positions'})
)

# Short returns = negative of long
strategy_returns['strategy_return'] = -strategy_returns['avg_long_return']

# Market benchmark
market_returns = df.groupby('week_start')['weekly_return'].mean().rename('market_return')

perf = strategy_returns.join(market_returns).reset_index()

print('='*80)
print('PERFORMANCE SUMMARY')
print('='*80)
print(f'Number of weeks: {len(perf)}')
print(f'\nStrategy (Short Low-Skew):')
print(f'  Mean weekly:     {perf["strategy_return"].mean():.4f} ({perf["strategy_return"].mean() * 52 * 100:.2f}% ann.)')
print(f'  Volatility:      {perf["strategy_return"].std():.4f} ({perf["strategy_return"].std() * np.sqrt(52):.4f} ann.)')
print(f'  Sharpe:          {(perf["strategy_return"].mean() / perf["strategy_return"].std()) * np.sqrt(52):.4f}')
print(f'\nMarket (Long All):')
print(f'  Mean weekly:     {perf["market_return"].mean():.4f} ({perf["market_return"].mean() * 52 * 100:.2f}% ann.)')
print(f'  Volatility:      {perf["market_return"].std():.4f} ({perf["market_return"].std() * np.sqrt(52):.4f} ann.)')
print(f'  Sharpe:          {(perf["market_return"].mean() / perf["market_return"].std()) * np.sqrt(52):.4f}')

## 5. Performance Visualization

In [None]:
perf = perf.sort_values('week_start')
perf['strategy_cumret'] = (1 + perf['strategy_return']).cumprod() - 1
perf['market_cumret'] = (1 + perf['market_return']).cumprod() - 1

# Drawdowns
perf['strategy_wealth'] = (1 + perf['strategy_return']).cumprod()
perf['strategy_peak'] = perf['strategy_wealth'].expanding().max()
perf['strategy_dd'] = (perf['strategy_wealth'] / perf['strategy_peak'] - 1) * 100

perf['market_wealth'] = (1 + perf['market_return']).cumprod()
perf['market_peak'] = perf['market_wealth'].expanding().max()
perf['market_dd'] = (perf['market_wealth'] / perf['market_peak'] - 1) * 100

fig, axes = plt.subplots(3, 1, figsize=(14, 12))

# Cumulative returns
axes[0].plot(perf['week_start'], perf['strategy_cumret'] * 100, linewidth=2.5, label='Short Low-Skew Strategy', color='darkgreen')
axes[0].plot(perf['week_start'], perf['market_cumret'] * 100, linewidth=2, label='Market (Long)', color='navy', alpha=0.7)
axes[0].axhline(y=0, color='red', linestyle='--', linewidth=1, alpha=0.5)
axes[0].set_ylabel('Cumulative Return (%)', fontsize=12)
axes[0].set_title('Strategy Performance: Short Negative IV Skew Stocks', fontweight='bold', fontsize=14)
axes[0].legend(loc='best', fontsize=11)
axes[0].grid(True, alpha=0.3)

# Weekly returns
axes[1].hist(perf['strategy_return'] * 100, bins=50, alpha=0.7, color='darkgreen', edgecolor='black')
axes[1].axvline(x=perf['strategy_return'].mean() * 100, color='red', linestyle='--', linewidth=2.5, label=f'Mean = {perf["strategy_return"].mean() * 100:.2f}%')
axes[1].axvline(x=0, color='black', linestyle='-', linewidth=1)
axes[1].set_xlabel('Weekly Return (%)', fontsize=11)
axes[1].set_ylabel('Frequency', fontsize=11)
axes[1].set_title('Distribution of Weekly Strategy Returns', fontweight='bold', fontsize=12)
axes[1].legend()
axes[1].grid(alpha=0.3)

# Drawdowns
axes[2].fill_between(perf['week_start'], perf['strategy_dd'], 0, color='red', alpha=0.3, label='Strategy')
axes[2].fill_between(perf['week_start'], perf['market_dd'], 0, color='blue', alpha=0.2, label='Market')
axes[2].set_ylabel('Drawdown (%)', fontsize=11)
axes[2].set_xlabel('Date', fontsize=11)
axes[2].set_title('Drawdown Analysis', fontweight='bold', fontsize=12)
axes[2].legend()
axes[2].grid(alpha=0.3)

plt.tight_layout()
plt.show()

print('\n' + '='*80)
print('RISK METRICS')
print('='*80)
print(f'Strategy:')
print(f'  Total return:    {perf["strategy_cumret"].iloc[-1] * 100:.2f}%')
print(f'  Max drawdown:    {perf["strategy_dd"].min():.2f}%')
print(f'  Win rate:        {(perf["strategy_return"] > 0).sum() / len(perf) * 100:.2f}%')
print(f'  Best week:       {perf["strategy_return"].max() * 100:.2f}%')
print(f'  Worst week:      {perf["strategy_return"].min() * 100:.2f}%')
print(f'\nMarket:')
print(f'  Total return:    {perf["market_cumret"].iloc[-1] * 100:.2f}%')
print(f'  Max drawdown:    {perf["market_dd"].min():.2f}%')

## 6. Statistical Significance

In [None]:
mean_ret = perf['strategy_return'].mean()
std_ret = perf['strategy_return'].std()
n = len(perf)

t_stat = mean_ret / (std_ret / np.sqrt(n))
p_val = 1 - stats.t.cdf(t_stat, n - 1)

print('='*80)
print('HYPOTHESIS TEST: Is Mean Return > 0?')
print('='*80)
print(f'Sample size:     {n} weeks')
print(f'Mean return:     {mean_ret:.6f} ({mean_ret * 52 * 100:.2f}% annualized)')
print(f't-statistic:     {t_stat:.4f}')
print(f'p-value:         {p_val:.6f}')

if p_val < 0.01:
    print('\n*** HIGHLY SIGNIFICANT (p < 0.01) ***')
elif p_val < 0.05:
    print('\n** SIGNIFICANT (p < 0.05) **')
elif p_val < 0.10:
    print('\n* MARGINALLY SIGNIFICANT (p < 0.10) *')
else:
    print('\n✗ NOT SIGNIFICANT')

ci_lower = mean_ret - 1.96 * (std_ret / np.sqrt(n))
ci_upper = mean_ret + 1.96 * (std_ret / np.sqrt(n))
print(f'\n95% CI (annualized): [{ci_lower * 52 * 100:.2f}%, {ci_upper * 52 * 100:.2f}%]')

## 7. Pre/Post COVID Analysis

In [None]:
covid_date = pd.Timestamp('2020-03-01')
pre = perf[perf['week_start'] < covid_date]
post = perf[perf['week_start'] >= covid_date]

print('='*80)
print('SUBPERIOD ANALYSIS')
print('='*80)

for name, period in [('Pre-COVID', pre), ('Post-COVID', post)]:
    m = period['strategy_return'].mean()
    s = period['strategy_return'].std()
    sharpe = (m / s) * np.sqrt(52)
    total = (1 + period['strategy_return']).prod() - 1
    
    print(f'\n{name} ({period["week_start"].min().date()} to {period["week_start"].max().date()}):')
    print(f'  Weeks:           {len(period)}')
    print(f'  Mean (ann.):     {m * 52 * 100:.2f}%')
    print(f'  Vol (ann.):      {s * np.sqrt(52):.4f}')
    print(f'  Sharpe:          {sharpe:.4f}')
    print(f'  Total return:    {total * 100:.2f}%')
    print(f'  Max DD:          {period["strategy_dd"].min():.2f}%')

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

for ax, (name, period) in zip([ax1, ax2], [('Pre-COVID', pre), ('Post-COVID', post)]):
    p = period.sort_values('week_start')
    cumret = (1 + p['strategy_return']).cumprod() - 1
    ax.plot(p['week_start'], cumret * 100, linewidth=2.5, color='darkgreen')
    ax.axhline(y=0, color='red', linestyle='--', linewidth=1, alpha=0.5)
    ax.set_title(name, fontweight='bold', fontsize=12)
    ax.set_ylabel('Cumulative Return (%)')
    ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Save Results

In [None]:
perf.to_csv('strategy_backtest_results.csv', index=False)
print('✓ Results saved to strategy_backtest_results.csv')