# Seminar QF: Credit Risk Analysis Pipeline

This notebook implements a comprehensive pipeline for credit risk analysis using various volatility models:
1.  **Merton Model**: To estimate asset values and volatility from equity data.
2.  **GARCH(1,1)**: Single-regime volatility modeling.
3.  **Regime-Switching (Hamilton Filter)**: Volatility modeling with regime changes.
4.  **MS-GARCH**: Optimized Markov-Switching GARCH model.
5.  **Monte Carlo Simulation**: To forecast future asset values.
6.  **CDS Spread Calculation**: Estimating credit default swap spreads based on simulated default probabilities.

### 0. Setup
Initialize the environment, add `src` to the system path, and define configuration.

In [None]:
# Setup
import sys
import os
import shutil
from pathlib import Path
import pandas as pd
import numpy as np

# Add project root to path so we can import src
# Assuming notebook is in notebooks/, project root is parent
project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

# Import config and modules
try:
    from src.utils import config
    from src.data.data_processing import load_and_preprocess_data, run_merton_estimation, load_interest_rates
    from src.models.garch_model import run_garch_estimation
    from src.models.regime_switching import run_regime_switching_estimation
    from src.models.ms_garch_optimized import run_ms_garch_estimation_optimized
    from src.models.probability_of_default import run_pd_pipeline, calculate_merton_pd_normal
    from src.analysis.result_summary import generate_results_summary
    from src.analysis.monte_carlo_garch import monte_carlo_garch_1year_parallel  # Use parallel version
    from src.analysis.volatility_diagnostics import run_volatility_diagnostics, filter_problematic_firms
    from src.analysis.monte_carlo_regime_switching import monte_carlo_regime_switching_1year_parallel  # Use parallel version
    from src.analysis.monte_carlo_ms_garch import monte_carlo_ms_garch_1year_parallel  # Use parallel version
    from src.analysis.cds_spread_calculator import CDSSpreadCalculator
    from src.analysis.cds_correlation import run_cds_correlation_analysis, plot_cds_correlations

    print("Imports successful.")
    print(f"Data Directory: {config.DATA_DIR}")
except ImportError as e:
    print(f"Import Error: {e}")
    # print(f"sys.path: {sys.path}")

### 1. Cache Cleanup
Remove intermediate files.

In [None]:
# Cache Cleanup
print("Cleaning up cache files...")
cache_dir = config.INTERMEDIATES_DIR
cache_files = [
    'merton_results_cache.pkl',
    'mc_garch_cache.csv'
]

for cache_file in cache_files:
    cache_path = cache_dir / cache_file
    if cache_path.exists():
        try:
            os.remove(cache_path)
            print(f"✓ Deleted: {cache_path}")
        except Exception as e:
            print(f"⚠ Could not delete {cache_path}: {e}")
    else:
        print(f"  (No cached file: {cache_path})")

print("Cache cleanup complete.")


### 2. Data Loading & Merton Model Estimation
*   **Load Interest Rates**: From ECB data.
*   **Load Equity Data**: From Excel inputs.
*   **Run Merton Model**: Solves for Asset Value ($V_t$) and Asset Volatility ($\sigma_A$) using the iterative approach.
*   **Output**: `merged_data_with_merton.csv` and `daily_asset_returns.csv`.

In [None]:
# Load Interest Rates
interest_rates_df = load_interest_rates()
print(f"Loaded {len(interest_rates_df)} months of interest rate data")

# Load Equity/Liability Data
df = load_and_preprocess_data()

# Run Merton Model
df_merged, daily_returns_df = run_merton_estimation(df, interest_rates_df)

# Save Results
df_merged.to_csv(config.OUTPUT_DIR / "merged_data_with_merton.csv", index=False)
daily_returns_df.to_csv(config.OUTPUT_DIR / "daily_asset_returns.csv", index=False)

print(f"Saved to {config.OUTPUT_DIR}")

### 3. GARCH(1,1) Estimation
Estimates a standard GARCH(1,1) model on the daily asset returns derived from the Merton model.
*   **Input**: `daily_asset_returns.csv`
*   **Output**: `daily_asset_returns_with_garch.csv` (adds conditional volatility columns).

In [None]:
final_daily_returns = run_garch_estimation(daily_returns_df)

# Save Results
final_daily_returns.to_csv(config.OUTPUT_DIR / "daily_asset_returns_with_garch.csv", index=False)
print("Saved 'daily_asset_returns_with_garch.csv'")

### 4. Regime-Switching Model (Hamilton Filter)
Estimates a 2-state Markov Switching model on returns (High Volatility / Low Volatility states), independent of GARCH dynamics inside the states.
*   **Output**: `daily_asset_returns_with_regime.csv`.

In [None]:
final_daily_returns_rs = run_regime_switching_estimation(daily_returns_df)

# Save Results
final_daily_returns_rs.to_csv(config.OUTPUT_DIR / "daily_asset_returns_with_regime.csv", index=False)
print("Saved 'daily_asset_returns_with_regime.csv'")

### 5. Optimized MS-GARCH Estimation
Estimates a "True" MS-GARCH model where each regime has its own GARCH(1,1) process.
*   **Optimizations**: Uses Warm Start, JIT Compilation (Numba), and Numerical Optimizations.
*   **Output**: `daily_asset_returns_with_msgarch.csv` and parameter file.

In [None]:
print("Running MS-GARCH Estimation...")
final_daily_returns_msgarch = run_ms_garch_estimation_optimized(
    daily_returns_df,
    output_file=str(config.OUTPUT_DIR / "ms_garch_parameters.csv")
)

# Save Results
final_daily_returns_msgarch.to_csv(config.OUTPUT_DIR / "daily_asset_returns_with_msgarch.csv", index=False)
print("Saved 'daily_asset_returns_with_msgarch.csv'")

### 6. Probability of Default (PD) Calculation
Calculates PD using the Merton Model formula but substituting the volatility estimates from GARCH, RS, and MS-GARCH models.
*   **Benchmark**: Calculates standard Merton PD assuming Normal distribution.
*   **Output**: `daily_pd_results.csv`.

In [None]:
pd_results = run_pd_pipeline(
    str(config.OUTPUT_DIR / 'daily_asset_returns_with_garch.csv'), 
    str(config.OUTPUT_DIR / 'daily_asset_returns_with_regime.csv'), 
    str(config.OUTPUT_DIR / 'daily_asset_returns_with_msgarch.csv')
)

pd_results.to_csv(config.OUTPUT_DIR / "daily_pd_results.csv", index=False)
print("Saved 'daily_pd_results.csv'")

# Merton PD (Benchmark)
merton_normal_pd = calculate_merton_pd_normal(str(config.OUTPUT_DIR / 'daily_asset_returns.csv'))
merton_normal_pd.to_csv(config.OUTPUT_DIR / "daily_pd_results_merton_normal.csv", index=False)
print("Saved 'daily_pd_results_merton_normal.csv'")

### 7. Monte Carlo Simulation (GARCH) & Diagnostics
Simulates future asset values for 1 year (252 days) using GARCH volatility dynamics.
Also runs **Volatility Diagnostics** to identify firms with explosive volatility that might distort results.
*   **Output**: `daily_monte_carlo_garch_results.csv` and diagnostic files in `data/diagnostics/`.

In [None]:
print("Run Monte Carlo GARCH (1 year) - PARALLEL with all cores...")
mc_results = monte_carlo_garch_1year_parallel(
    str(config.OUTPUT_DIR / 'daily_asset_returns_with_garch.csv'), 
    gvkey_selected=None, 
    num_simulations=10000,
    num_days=252,
    n_jobs=-1  # Use all available cores
)
mc_results.to_csv(config.OUTPUT_DIR / "daily_monte_carlo_garch_results.csv", index=False)

# Diagnostics
print("Running Volatility Diagnostics...")
diagnostics_results = run_volatility_diagnostics(
    garch_file=str(config.OUTPUT_DIR / 'daily_asset_returns_with_garch.csv'),
    mc_garch_file=str(config.OUTPUT_DIR / 'daily_monte_carlo_garch_results.csv'),
    output_dir=str(config.DIAGNOSTICS_DIR)
)

PROBLEMATIC_FIRMS = diagnostics_results['problematic_firms']
CLEAN_FIRMS = diagnostics_results['clean_firms']
print(f"Problematic Firms: {len(PROBLEMATIC_FIRMS)}")

### 8. Monte Carlo Simulation (Regime Switching & MS-GARCH)
Runs MC simulations for the Regime-Switching and MS-GARCH models.
*   **Regime Switching**: Simulates regime changes and draws returns based on state-specific volatility (no GARCH).
*   **MS-GARCH**: Simulates regime changes AND GARCH dynamics within each state.

In [None]:
print("Run MC Regime Switching - PARALLEL with all cores...")
mc_rs_results = monte_carlo_regime_switching_1year_parallel(
    garch_file=str(config.OUTPUT_DIR / 'daily_asset_returns_with_garch.csv'),
    regime_params_file=str(config.OUTPUT_DIR / 'regime_switching_parameters.csv'),
    gvkey_selected=None,
    num_simulations=10000,
    num_days=252,
    n_jobs=-1  # Use all available cores
)
mc_rs_results.to_csv(config.OUTPUT_DIR / "daily_monte_carlo_regime_switching_results.csv", index=False)

In [None]:
print("Run MC MS-GARCH - PARALLEL with all cores...")
mc_msgarch_results = monte_carlo_ms_garch_1year_parallel(
    daily_returns_file=str(config.OUTPUT_DIR / 'daily_asset_returns_with_msgarch.csv'),
    ms_garch_params_file=str(config.OUTPUT_DIR / 'ms_garch_parameters.csv'),
    gvkey_selected=None,
    num_simulations=10000,
    num_days=252,
    n_jobs=-1  # Use all available cores
)
mc_msgarch_results.to_csv(config.OUTPUT_DIR / "daily_monte_carlo_ms_garch_results.csv", index=False)

### 9. CDS Spread Calculation
Calculates Model-Implied CDS Spreads for 1, 3, and 5-year horizons.

**Models:**
1. **Classical Merton (Analytical)**: Uses the asset volatility from the Merton model estimation directly (no Monte Carlo).
2. **GARCH**: Uses Monte Carlo simulated volatility from GARCH(1,1) model.
3. **Regime Switching**: Uses Monte Carlo simulated volatility from the Hamilton Filter RS model.
4. **MS-GARCH**: Uses Monte Carlo simulated volatility from the Markov-Switching GARCH model.

**Output:** Separate CSV files for CDS spreads for each model.

In [None]:
cds_calc = CDSSpreadCalculator(maturity_horizons=[1, 3, 5])

# Classical Merton (Analytical - no Monte Carlo, uses asset_volatility from Merton estimation)
print("CDS Spreads: Classical Merton (Analytical)")
df_cds_spreads_merton = cds_calc.calculate_cds_spreads_analytical_merton(
    merton_file=str(config.OUTPUT_DIR / 'merged_data_with_merton.csv'),
    output_file=str(config.OUTPUT_DIR / 'cds_spreads_merton_analytical.csv')
)

# GARCH
print("CDS Spreads: GARCH")
df_cds_spreads_garch_all = cds_calc.calculate_cds_spreads_from_mc_garch(
    mc_garch_file=str(config.OUTPUT_DIR / 'daily_monte_carlo_garch_results.csv'),
    daily_returns_file=str(config.OUTPUT_DIR / 'daily_asset_returns.csv'),
    merton_file=str(config.OUTPUT_DIR / 'merged_data_with_merton.csv'),
    output_file=str(config.OUTPUT_DIR / 'cds_spreads_garch_mc_all_firms.csv')
)

# Regime Switching
print("CDS Spreads: RS")
df_cds_spreads_rs_all = cds_calc.calculate_cds_spreads_from_mc_garch(
    mc_garch_file=str(config.OUTPUT_DIR / 'daily_monte_carlo_regime_switching_results.csv'),
    daily_returns_file=str(config.OUTPUT_DIR / 'daily_asset_returns.csv'),
    merton_file=str(config.OUTPUT_DIR / 'merged_data_with_merton.csv'),
    output_file=str(config.OUTPUT_DIR / 'cds_spreads_regime_switching_mc_all_firms.csv')
)

# MS-GARCH
print("CDS Spreads: MS-GARCH")
df_cds_spreads_msgarch_all = cds_calc.calculate_cds_spreads_from_mc_garch(
    mc_garch_file=str(config.OUTPUT_DIR / 'daily_monte_carlo_ms_garch_results.csv'),
    daily_returns_file=str(config.OUTPUT_DIR / 'daily_asset_returns.csv'),
    merton_file=str(config.OUTPUT_DIR / 'merged_data_with_merton.csv'),
    output_file=str(config.OUTPUT_DIR / 'cds_spreads_ms_garch_mc_all_firms.csv'),
    volatility_column='mc_msgarch_cumulative_volatility'
)

### 10. Model vs Market CDS Spread Correlation
Compare model-implied CDS spreads with actual market CDS data.
- Loads real CDS data (in basis points) for 1Y, 3Y, and 5Y maturities.
- Matches companies between model and market data.
- Calculates correlations for each firm and overall.

In [None]:
# Reload the module to pick up changes
import importlib
from src.analysis import cds_correlation
importlib.reload(cds_correlation)
from src.analysis.cds_correlation import run_cds_correlation_analysis, plot_cds_correlations

# Run CDS Correlation Analysis
print("Running CDS Correlation Analysis...")
correlation_results = run_cds_correlation_analysis()

# Plot scatter plots (5-year maturity)
# Use axis_limit=500 to zoom in on reasonable CDS spread ranges and exclude extreme outliers
# This makes the visualization clearer without affecting correlation calculations
plot_cds_correlations(correlation_results, maturity=5, axis_limit=500)