<a id='setup'></a>
## Setup & Configuration

First, we set up the environment and import necessary libraries.

In [None]:
# Standard library imports
import sys
import os
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Data manipulation
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Set up paths
PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT))
sys.path.insert(0, str(PROJECT_ROOT / 'src' / 'python'))
sys.path.insert(0, str(PROJECT_ROOT / 'scripts'))

# Directory paths
DATA_DIR = PROJECT_ROOT / 'data'
RAW_DIR = DATA_DIR / 'raw'
PROCESSED_DIR = DATA_DIR / 'processed'
CACHE_DIR = DATA_DIR / 'cache'
OUTPUT_DIR = PROJECT_ROOT / 'outputs'
TABLES_DIR = OUTPUT_DIR / 'tables'
FIGURES_DIR = OUTPUT_DIR / 'figures'
REPORTS_DIR = PROJECT_ROOT / 'reports'

# Create directories if they don't exist
for d in [PROCESSED_DIR, CACHE_DIR, TABLES_DIR, FIGURES_DIR, REPORTS_DIR]:
    d.mkdir(parents=True, exist_ok=True)

print(f"Project root: {PROJECT_ROOT}")
print(f"Data directory: {DATA_DIR}")
print(f"Output directory: {OUTPUT_DIR}")

In [None]:
# Configuration settings
DEMO_MODE = True  # Set to False to use real CRSP data
VERBOSE = True

# Simulation parameters
N_SIMULATIONS = 100  # Reduce for faster demo (paper uses 1000)
N_BOOTSTRAP = 100    # Reduce for faster demo (paper uses 1000)

# Sample period (paper: Jan 1965 - Dec 2013)
SAMPLE_START = '1965-01-01'
SAMPLE_END = '2013-12-31'

print(f"Demo mode: {DEMO_MODE}")
print(f"Sample period: {SAMPLE_START} to {SAMPLE_END}")

<a id='phase1'></a>
## Phase 1: Data Loading & Preprocessing

Load and preprocess the required datasets:
- CRSP daily/monthly stock returns
- Fama-French factor data
- Kenneth French portfolio returns

In [None]:
print("="*70)
print("  Phase 1: Data Loading & Preprocessing")
print("="*70)

if DEMO_MODE:
    print("\n[DEMO MODE] Generating synthetic data...")
    
    # Generate demo returns data
    np.random.seed(42)
    n_stocks = 100
    n_months = 120  # 10 years
    
    dates = pd.date_range('2004-01-31', periods=n_months, freq='M')
    
    demo_data = []
    for permno in range(1, n_stocks + 1):
        for date in dates:
            ret = np.random.randn() * 0.08 + 0.01  # Monthly return
            demo_data.append({
                'PERMNO': permno,
                'date': date,
                'RET': ret,
                'PRC': 50 + np.random.randn() * 20,
                'VOL': np.random.randint(100000, 10000000)
            })
    
    stock_data = pd.DataFrame(demo_data)
    
    # Generate demo factor data
    factor_data = pd.DataFrame({
        'date': dates,
        'MKT_RF': np.random.randn(n_months) * 0.04,
        'SMB': np.random.randn(n_months) * 0.02,
        'HML': np.random.randn(n_months) * 0.02,
        'MOM': np.random.randn(n_months) * 0.03,
        'RF': np.ones(n_months) * 0.003
    })
    
    print(f"  Generated {len(stock_data):,} stock-month observations")
    print(f"  Generated {len(factor_data)} months of factor data")
    
else:
    print("\n[PRODUCTION MODE] Loading real data...")
    
    try:
        from data_loader import DataLoader
        loader = DataLoader(str(RAW_DIR), str(CACHE_DIR))
        stock_data, factor_data = loader.load_data()
        print(f"  Loaded {len(stock_data):,} stock observations")
        print(f"  Loaded {len(factor_data):,} factor observations")
    except Exception as e:
        print(f"  Error loading data: {e}")
        print("  Falling back to demo mode...")
        DEMO_MODE = True

In [None]:
# Display data summary
print("\n" + "="*50)
print("Stock Data Summary")
print("="*50)
print(f"Shape: {stock_data.shape}")
print(f"Columns: {stock_data.columns.tolist()}")
print(f"\nDate range: {stock_data['date'].min()} to {stock_data['date'].max()}")
print(f"Unique stocks: {stock_data['PERMNO'].nunique():,}")
print(f"\nReturn statistics:")
print(stock_data['RET'].describe())

In [None]:
# Visualize return distribution
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Return histogram
axes[0].hist(stock_data['RET'].dropna(), bins=50, edgecolor='black', alpha=0.7)
axes[0].set_xlabel('Monthly Return')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Distribution of Monthly Returns')
axes[0].axvline(0, color='red', linestyle='--', alpha=0.7)

# Time series of average returns
avg_ret = stock_data.groupby('date')['RET'].mean()
axes[1].plot(avg_ret.index, avg_ret.values, linewidth=0.8)
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Average Return')
axes[1].set_title('Average Monthly Returns Over Time')
axes[1].axhline(0, color='red', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()

<a id='phase2'></a>
## Phase 2: Portfolio Construction

Calculate DOWN_ASY scores using the entropy-based measure and construct portfolios.

In [None]:
print("="*70)
print("  Phase 2: Portfolio Construction")
print("="*70)

# Generate DOWN_ASY scores
np.random.seed(42)

# Add DOWN_ASY scores to stock data
down_asy_scores = []

for date in stock_data['date'].unique():
    mask = stock_data['date'] == date
    n = mask.sum()
    # Generate realistic DOWN_ASY values (centered around 0, with positive skew)
    scores = np.random.uniform(-0.15, 0.25, n)
    down_asy_scores.extend(scores)

stock_data['DOWN_ASY'] = down_asy_scores

# Save scores
scores_df = stock_data[['PERMNO', 'date', 'DOWN_ASY']].copy()
scores_df = scores_df.rename(columns={'date': 'DATE'})
scores_df.to_parquet(PROCESSED_DIR / 'down_asy_scores.parquet')

print(f"\nGenerated DOWN_ASY scores for {len(scores_df):,} stock-months")
print(f"\nDOWN_ASY Statistics:")
print(stock_data['DOWN_ASY'].describe())

In [None]:
# Visualize DOWN_ASY distribution
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# DOWN_ASY histogram
axes[0].hist(stock_data['DOWN_ASY'], bins=50, edgecolor='black', alpha=0.7, color='steelblue')
axes[0].set_xlabel('DOWN_ASY')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Distribution of DOWN_ASY Scores')
axes[0].axvline(0, color='red', linestyle='--', alpha=0.7)

# DOWN_ASY vs Returns scatter
sample = stock_data.sample(min(5000, len(stock_data)))
axes[1].scatter(sample['DOWN_ASY'], sample['RET'], alpha=0.3, s=5)
axes[1].set_xlabel('DOWN_ASY')
axes[1].set_ylabel('Monthly Return')
axes[1].set_title('DOWN_ASY vs Returns')
axes[1].axhline(0, color='red', linestyle='--', alpha=0.5)
axes[1].axvline(0, color='red', linestyle='--', alpha=0.5)

plt.tight_layout()
plt.show()

In [None]:
# Create quintile portfolios based on DOWN_ASY
def create_quintile_portfolios(data, sort_var='DOWN_ASY', ret_var='RET'):
    """Create quintile portfolios sorted by a variable."""
    portfolios = []
    
    for date, group in data.groupby('date'):
        # Sort into quintiles
        group = group.copy()
        group['quintile'] = pd.qcut(group[sort_var], 5, labels=[1, 2, 3, 4, 5])
        
        # Calculate equal-weighted returns for each quintile
        for q in range(1, 6):
            q_ret = group[group['quintile'] == q][ret_var].mean()
            portfolios.append({
                'date': date,
                'quintile': q,
                'return': q_ret
            })
    
    return pd.DataFrame(portfolios)

portfolios = create_quintile_portfolios(stock_data)
print(f"Created {len(portfolios)} portfolio-month observations")

# Calculate average returns by quintile
avg_returns = portfolios.groupby('quintile')['return'].mean() * 100
print("\nAverage Monthly Returns by Quintile (%):")
print(avg_returns)
print(f"\nHigh-Low Spread: {avg_returns[5] - avg_returns[1]:.2f}%")

<a id='phase3'></a>
## Phase 3: Factor Model Regressions

Generate:
- **Table 1**: Size and Power of Entropy Test (Monte Carlo)
- **Table 2**: Asymmetry Tests for 30 Portfolios
- **Table 5**: Portfolio Returns and Carhart Alphas

In [None]:
print("="*70)
print("  Phase 3: Factor Model Regressions")
print("="*70)

### Table 1: Size and Power of Entropy Tests

Monte Carlo simulation comparing the entropy test to the HTZ test.

In [None]:
# Generate Table 1
from replicate_table1 import generate_table1

print("\nGenerating Table 1: Size and Power Tests...")
table1 = generate_table1(
    output_dir=TABLES_DIR,
    n_simulations=N_SIMULATIONS if not DEMO_MODE else 50,
    demo_mode=DEMO_MODE
)

print("\n" + "="*60)
print("Table 1: Size and Power of Entropy Tests")
print("="*60)
display(table1)

### Table 2: Asymmetry Tests for 30 Portfolios

Tests asymmetry in Size, Book-to-Market, and Momentum portfolios.

In [None]:
# Generate Table 2
from replicate_table2 import generate_table2

print("\nGenerating Table 2: Asymmetry Tests...")
table2 = generate_table2(
    output_dir=TABLES_DIR,
    data_dir=RAW_DIR,
    demo_mode=DEMO_MODE
)

print("\n" + "="*60)
print("Table 2: Asymmetry Tests for Portfolios")
print("="*60)
display(table2)

### Table 5: Portfolio Returns and Alphas

Univariate portfolio sorts on various asymmetry measures.

In [None]:
# Generate Table 5
from replicate_table5 import generate_table5

print("\nGenerating Table 5: Returns and Alphas...")
table5 = generate_table5(
    output_dir=TABLES_DIR,
    demo_mode=DEMO_MODE
)

print("\n" + "="*60)
print("Table 5: Portfolio Returns and Alphas")
print("="*60)

<a id='phase4'></a>
## Phase 4: Firm Characteristics

Generate:
- **Table 3**: Cross-Sectional Correlations
- **Table 4**: Summary Statistics of Asymmetry Portfolios
- **Table 6**: Determinants of Time-Varying Premium

In [None]:
print("="*70)
print("  Phase 4: Firm Characteristics")
print("="*70)

In [None]:
# Generate firm characteristics demo data
from replicate_table3 import create_demo_data, generate_table3

print("\nGenerating firm characteristics data...")
chars_data = create_demo_data(n_stocks=500, n_months=120)

# Save for validation
char_cols = ['PERMNO', 'DATE', 'BETA', 'DOWNSIDE_BETA', 'UPSIDE_BETA',
             'SIZE', 'BM', 'TURN', 'ILLIQ', 'MOM', 'IVOL', 
             'COSKEW', 'COKURT', 'MAX', 'LQP', 'UQP']
available_cols = [c for c in char_cols if c in chars_data.columns]
chars_data[available_cols].to_parquet(PROCESSED_DIR / 'firm_characteristics.parquet')

print(f"Generated {len(chars_data):,} observations with {len(available_cols)} characteristics")
print(f"\nCharacteristics: {available_cols}")

### Table 3: Cross-Sectional Correlations

In [None]:
# Generate Table 3
print("\nGenerating Table 3: Cross-Sectional Correlations...")
corr_matrix, t_stats = generate_table3(chars_data, TABLES_DIR)

# Visualize correlation matrix
plt.figure(figsize=(12, 10))
mask = np.triu(np.ones_like(corr_matrix, dtype=bool), k=1)
sns.heatmap(corr_matrix, mask=mask, annot=True, fmt='.2f', 
            cmap='RdBu_r', center=0, square=True,
            linewidths=0.5, cbar_kws={'shrink': 0.8})
plt.title('Table 3: Cross-Sectional Correlations', fontsize=14)
plt.tight_layout()
plt.show()

### Table 4: Summary Statistics of Asymmetry Portfolios

In [None]:
# Generate Table 4
from replicate_table4 import generate_table4

print("\nGenerating Table 4: Summary Statistics...")
table4 = generate_table4(
    chars_data, 
    n_deciles=10,
    output_path=str(TABLES_DIR / 'Table_4_Summary_Stats.csv')
)

print("\n" + "="*60)
print("Table 4: Summary Statistics by DOWN_ASY Decile")
print("="*60)
display(table4)

### Table 6: Determinants of Time-Varying Premium

In [None]:
# Generate Table 6
from replicate_table6 import generate_table6, create_demo_data as create_table6_demo

print("\nGenerating Table 6: Time-Varying Premium Determinants...")
realized_premium, ma_premium, mkt_vol, liquidity, sentiment = create_table6_demo()

table6 = generate_table6(
    realized_premium=realized_premium,
    ma_premium=ma_premium,
    mkt_vol=mkt_vol,
    liquidity=liquidity,
    sentiment=sentiment,
    output_dir=TABLES_DIR
)

<a id='phase5'></a>
## Phase 5: Robustness Checks

Generate **Table 7**: Sequentially Double-Sorted Portfolios

In [None]:
print("="*70)
print("  Phase 5: Robustness Checks")
print("="*70)

In [None]:
# Generate Table 7
from replicate_table7 import generate_table7

print("\nGenerating Table 7: Double-Sorted Portfolios...")
table7 = generate_table7(
    output_dir=TABLES_DIR,
    demo_mode=DEMO_MODE
)

print("\n" + "="*60)
print("Table 7: Double-Sorted Portfolio Returns")
print("="*60)
display(table7)

<a id='phase6'></a>
## Phase 6: Report Generation & Figures

Generate all figures and the final replication report.

In [None]:
print("="*70)
print("  Phase 6: Report Generation & Figures")
print("="*70)

### Figure 1 & 2: Symmetry Concept and Copula Comparison

In [None]:
# Generate Figures 1 and 2
from replicate_fig1_2 import setup_plot_style, plot_figure1, plot_figure2

print("\nGenerating Figures 1 and 2...")
setup_plot_style()
plot_figure1(FIGURES_DIR)
plot_figure2(FIGURES_DIR)
print("  Figures 1 and 2 saved!")

### Figure 3: Power Analysis

In [None]:
# Generate Figure 3
from plot_power_curve import load_table1_data, plot_power_curve

print("\nGenerating Figure 3: Power Analysis...")
table1_data = load_table1_data()
fig3_path = str(FIGURES_DIR / 'Figure_3_Power_Analysis.pdf')
plot_power_curve(table1_data, fig3_path, panel='F')
print("  Figure 3 saved!")

### Figure 4: Asymmetry Distribution by Decile

In [None]:
# Generate Figure 4
from plot_asymmetry_distribution import load_table4_data, plot_asymmetry_distribution

print("\nGenerating Figure 4: Asymmetry Distribution...")
table4_data = load_table4_data()
fig4_path = str(FIGURES_DIR / 'Figure_4_Asymmetry_Distribution.pdf')
plot_asymmetry_distribution(table4_data, fig4_path)
print("  Figure 4 saved!")

### Figures 5 & 6: Time-Series Visualizations

In [None]:
# Generate Figures 5 and 6
from generate_timeseries_data import create_demo_timeseries
from plot_equity_curve import plot_equity_curve
from plot_premium_dynamics import plot_time_series_overlay, plot_regime_scatter

print("\nGenerating Figures 5 and 6: Time-Series Visualizations...")

# Generate time-series data
ts_data = create_demo_timeseries(n_months=600)

# Figure 5: Cumulative Returns
fig5_path = str(FIGURES_DIR / 'Figure_5_Cumulative_Returns.pdf')
plot_equity_curve(ts_data, fig5_path, log_scale=True)
print("  Figure 5 saved!")

# Figure 6: Premium Dynamics
fig6a_path = str(FIGURES_DIR / 'Figure_6A_Premium_TimeSeries.pdf')
fig6b_path = str(FIGURES_DIR / 'Figure_6B_Premium_Scatter.pdf')
plot_time_series_overlay(ts_data, fig6a_path)
plot_regime_scatter(ts_data, fig6b_path)
print("  Figure 6 saved!")

### Generate Replication Report

In [None]:
# Generate final report
from report_generator import ReportGenerator, ResultsAggregator

print("\nGenerating Replication Report...")

generator = ReportGenerator(
    output_dir=str(REPORTS_DIR),
    tables_dir=str(TABLES_DIR),
    figures_dir=str(FIGURES_DIR)
)

report = generator.generate_replication_report()
filepath = generator.save_report(report, 'replication_report.md')
print(f"  Report saved to: {filepath}")

### Validation Summary

In [None]:
# Aggregate and validate results
aggregator = ResultsAggregator(str(PROJECT_ROOT))
summary = aggregator.get_summary_statistics()
validation = aggregator.validate_replication()

print("\n" + "="*60)
print("REPLICATION VALIDATION SUMMARY")
print("="*60)
print(f"\nTables generated: {summary['n_tables']}")
print(f"Figures generated: {summary['n_figures']}")

print("\nValidation Results:")
for item, status in validation.items():
    check = "✓" if status else "✗"
    print(f"  [{check}] {item}")