# 7. Calibration

Fit models to historical data using calibration tools.

## Contents
1. Historical Default Rate Calibration
2. Transition Matrix Estimation
3. LGD Distribution Fitting
4. Bayesian Estimation

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

from privatecredit.calibration import (
    HistoricalCalibrator,
    TransitionCalibrator,
    ParameterEstimator,
    BayesianEstimator
)

## 1. Historical Default Rate Calibration

In [None]:
# Generate synthetic historical data
np.random.seed(42)
n_periods = 60

true_pd = 0.025
exposure_per_period = np.random.poisson(1000, n_periods)
defaults = np.random.binomial(exposure_per_period, true_pd)

print(f"Historical data: {n_periods} periods")
print(f"Total defaults: {defaults.sum()}")
print(f"Total exposure: {exposure_per_period.sum()}")
print(f"Observed default rate: {defaults.sum() / exposure_per_period.sum():.4f}")

In [None]:
# Fit default rates
calibrator = HistoricalCalibrator()
params = calibrator.fit_default_rates(defaults, exposure_per_period)

print("\nCalibrated Parameters:")
print(f"  Mean default rate: {params['mean_default_rate']:.4f}")
print(f"  Std default rate: {params['std_default_rate']:.4f}")
print(f"  Beta alpha: {params['beta_alpha']:.2f}")
print(f"  Beta beta: {params['beta_beta']:.2f}")

In [None]:
# Bootstrap confidence intervals
observed_rates = defaults / (exposure_per_period + 1e-10)
ci = calibrator.bootstrap_confidence_intervals(observed_rates, statistic='mean')

print(f"\n95% Bootstrap CI for mean default rate:")
print(f"  Point estimate: {ci['point_estimate']:.4f}")
print(f"  Lower bound: {ci['lower_bound']:.4f}")
print(f"  Upper bound: {ci['upper_bound']:.4f}")

## 2. Transition Matrix Estimation

In [None]:
# Calibrate transition matrix to targets
trans_calibrator = TransitionCalibrator()
P = trans_calibrator.fit_to_target_rates(
    target_default_rate=0.02,
    target_prepayment_rate=0.15,
    maturity=60
)

states = ['Perf', '30D', '60D', '90D', 'Def', 'Pre', 'Mat']
print("\nCalibrated Transition Matrix:")
print(pd.DataFrame(P, index=states, columns=states).round(4))

In [None]:
# Verify calibration
state_probs = np.zeros(7)
state_probs[0] = 1.0

for _ in range(60):
    state_probs = state_probs @ P

print(f"\nVerification (60-month projection):")
print(f"  Cumulative default: {state_probs[4]:.4f}")
print(f"  Cumulative prepayment: {state_probs[5]:.4f}")
print(f"  Still performing: {state_probs[:4].sum():.4f}")

## 3. LGD Distribution Fitting

In [None]:
# Generate synthetic LGD data
lgd_data = np.random.beta(2, 5, size=500)

# Fit LGD distribution
estimator = ParameterEstimator()
lgd_params = estimator.fit_lgd_mle(lgd_data, model='beta')

print("LGD Distribution Parameters:")
print(f"  Alpha: {lgd_params['alpha']:.2f}")
print(f"  Beta: {lgd_params['beta']:.2f}")
print(f"  Mean LGD: {lgd_params['mean']:.4f}")
print(f"  Mode LGD: {lgd_params['mode']:.4f}")

In [None]:
# Plot fitted vs observed
fig, ax = plt.subplots(figsize=(10, 5))

# Observed histogram
ax.hist(lgd_data, bins=30, density=True, alpha=0.7, label='Observed', color='steelblue')

# Fitted distribution
x = np.linspace(0, 1, 100)
fitted_pdf = stats.beta.pdf(x, lgd_params['alpha'], lgd_params['beta'])
ax.plot(x, fitted_pdf, 'r-', linewidth=2, label='Fitted Beta')

ax.set_xlabel('LGD')
ax.set_ylabel('Density')
ax.set_title('LGD Distribution: Observed vs Fitted')
ax.legend()
plt.tight_layout()
plt.show()

## 4. Bayesian Estimation

In [None]:
# Bayesian estimation of default rate
bayes_estimator = BayesianEstimator()
bayes_results = bayes_estimator.fit_default_rate_bayesian(
    defaults, 
    exposure_per_period,
    prior_alpha=1.0,  # Weak prior
    prior_beta=1.0
)

print("Bayesian Estimation Results:")
print(f"  Posterior mean: {bayes_results['mean']:.4f}")
print(f"  Posterior median: {bayes_results['median']:.4f}")
print(f"  95% Credible Interval: [{bayes_results['ci_lower']:.4f}, {bayes_results['ci_upper']:.4f}]")
print(f"  95% HPD Interval: [{bayes_results['hpd_lower']:.4f}, {bayes_results['hpd_upper']:.4f}]")

In [None]:
# Plot posterior distribution
fig, ax = plt.subplots(figsize=(10, 5))

ax.hist(bayes_results['samples'], bins=50, density=True, alpha=0.7, color='steelblue')
ax.axvline(bayes_results['mean'], color='red', linestyle='--', label=f"Mean: {bayes_results['mean']:.4f}")
ax.axvline(bayes_results['hpd_lower'], color='green', linestyle=':', label='95% HPD')
ax.axvline(bayes_results['hpd_upper'], color='green', linestyle=':')
ax.axvline(true_pd, color='black', linestyle='-', linewidth=2, label=f'True PD: {true_pd}')

ax.set_xlabel('Default Rate')
ax.set_ylabel('Posterior Density')
ax.set_title('Posterior Distribution of Default Rate')
ax.legend()
plt.tight_layout()
plt.show()

## Summary

- MLE for default rate and LGD distributions
- Transition matrix calibration to target rates
- Bayesian inference with posterior uncertainty
- Bootstrap confidence intervals

**Next:** Model comparison (Notebook 08)