# Extended Distribution Support Examples

This notebook demonstrates the extended distribution support in quActuary, including:
- Compound Binomial distributions
- Mixed Poisson processes
- Zero-inflated models
- Edgeworth expansions


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Import quActuary distributions
from quactuary.distributions.frequency import Poisson, Binomial, NegativeBinomial
from quactuary.distributions.severity import Exponential, Gamma, LogNormal
from quactuary.distributions.compound_binomial import (
    BinomialExponentialCompound, BinomialGammaCompound, BinomialLognormalCompound
)
from quactuary.distributions.mixed_poisson import (
    PoissonGammaMixture, PoissonInverseGaussianMixture, HierarchicalPoissonMixture
)
from quactuary.distributions.zero_inflated import ZeroInflatedCompound, detect_zero_inflation
from quactuary.distributions.edgeworth import EdgeworthExpansion
from quactuary.distributions.compound_extensions import create_extended_compound_distribution

## 1. Compound Binomial Distributions

Useful when the number of claims has an upper bound (e.g., fixed number of policies).

In [None]:
# Example: Insurance portfolio with 100 policies
# Each policy has 30% chance of claim, average claim size $5,000

n_policies = 100
claim_prob = 0.3
avg_claim = 5000

# Create compound distribution
freq = Binomial(n=n_policies, p=claim_prob)
sev = Exponential(scale=avg_claim)
compound = BinomialExponentialCompound(freq, sev)

print(f"Expected total claims: ${compound.mean():,.2f}")
print(f"Standard deviation: ${compound.std():,.2f}")
print(f"95% VaR: ${compound.ppf(0.95):,.2f}")
print(f"99% VaR: ${compound.ppf(0.99):,.2f}")

In [None]:
# Compare different severity distributions
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Exponential severity
compound_exp = BinomialExponentialCompound(freq, Exponential(scale=avg_claim))
x = np.linspace(0, 300000, 1000)
axes[0].plot(x, compound_exp.pdf(x), 'b-', lw=2)
axes[0].set_title('Binomial-Exponential')
axes[0].set_xlabel('Total Loss')
axes[0].set_ylabel('Density')

# Gamma severity
compound_gamma = BinomialGammaCompound(freq, Gamma(a=2, scale=avg_claim/2))
axes[1].plot(x, compound_gamma.pdf(x), 'g-', lw=2)
axes[1].set_title('Binomial-Gamma')
axes[1].set_xlabel('Total Loss')

# Lognormal severity
compound_lognorm = BinomialLognormalCompound(freq, LogNormal(s=1, scale=avg_claim))
axes[2].plot(x, compound_lognorm.pdf(x), 'r-', lw=2)
axes[2].set_title('Binomial-Lognormal')
axes[2].set_xlabel('Total Loss')

plt.tight_layout()
plt.show()

## 2. Mixed Poisson Processes

Capture heterogeneity in risk exposure across the portfolio.

In [None]:
# Example: Heterogeneous portfolio with varying risk levels
# Risk parameter λ ~ Gamma(α=3, β=0.5)

mixed_poisson = PoissonGammaMixture(alpha=3, beta=0.5)

# Compare with standard Poisson
standard_poisson = Poisson(mu=mixed_poisson.mean())

print("Comparison of Poisson vs Mixed Poisson:")
print(f"Mean (both): {mixed_poisson.mean():.2f}")
print(f"Variance - Poisson: {standard_poisson._dist.var():.2f}")
print(f"Variance - Mixed: {mixed_poisson.var():.2f}")
print(f"Overdispersion factor: {mixed_poisson.var() / mixed_poisson.mean():.2f}")

In [None]:
# Plot PMF comparison
k_values = np.arange(0, 25)
pmf_standard = standard_poisson.pmf(k_values)
pmf_mixed = mixed_poisson.pmf(k_values)

plt.figure(figsize=(10, 6))
plt.bar(k_values - 0.2, pmf_standard, width=0.4, label='Standard Poisson', alpha=0.7)
plt.bar(k_values + 0.2, pmf_mixed, width=0.4, label='Mixed Poisson (NB)', alpha=0.7)
plt.xlabel('Number of Claims')
plt.ylabel('Probability')
plt.title('Standard vs Mixed Poisson: Capturing Heterogeneity')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Hierarchical model for portfolio with multiple business lines
hierarchical = HierarchicalPoissonMixture(
    portfolio_alpha=2.0,
    portfolio_beta=0.3,
    group_alpha=4.0,
    n_groups=5  # 5 business lines
)

# Simulate portfolio structure
sim_results = hierarchical.simulate_portfolio(size=1000, random_state=42)

print("Portfolio Simulation Results:")
print(f"Average total claims: {np.mean(sim_results['total']):.2f}")
print(f"Std dev of total claims: {np.std(sim_results['total']):.2f}")
print("\nAverage claims by business line:")
for i in range(5):
    print(f"  Line {i+1}: {np.mean(sim_results['by_group'][:, i]):.2f}")

## 3. Zero-Inflated Models

Handle datasets with excess zeros (e.g., many policies with no claims).

In [None]:
# Generate synthetic data with zero-inflation
np.random.seed(42)
n_obs = 1000
true_zero_prob = 0.3  # 30% structural zeros

# Base distributions
freq_base = Poisson(mu=3.0)
sev_base = Gamma(a=2, scale=1000)

# Generate zero-inflated data
is_structural_zero = np.random.rand(n_obs) < true_zero_prob
data = np.zeros(n_obs)

for i in range(n_obs):
    if not is_structural_zero[i]:
        n_claims = freq_base.rvs()
        if n_claims > 0:
            data[i] = np.sum(sev_base.rvs(size=n_claims))

print(f"Proportion of zeros in data: {np.mean(data == 0):.3f}")
print(f"Expected without zero-inflation: {np.exp(-3):.3f}")

In [None]:
# Detect zero-inflation
is_zi, diagnostics = detect_zero_inflation(data, freq_base, sev_base)

print("Zero-Inflation Detection Results:")
print(f"Zero-inflated: {is_zi}")
print(f"Observed zero proportion: {diagnostics['observed_zero_proportion']:.3f}")
print(f"Expected zero proportion: {diagnostics['expected_zero_proportion']:.3f}")
print(f"Excess zeros: {diagnostics['excess_zeros']:.3f}")
print(f"Score statistic: {diagnostics['score_statistic']:.2f}")
print(f"P-value: {diagnostics['p_value']:.4f}")

In [None]:
# Fit zero-inflated model
zi_model = ZeroInflatedCompound(freq_base, sev_base)
fit_result = zi_model.fit_em(data, max_iter=50)

print("\nEM Algorithm Results:")
print(f"Estimated zero-inflation probability: {fit_result['zero_prob']:.3f}")
print(f"True zero-inflation probability: {true_zero_prob:.3f}")
print(f"Converged: {fit_result['converged']}")
print(f"Iterations: {fit_result['iterations']}")

## 4. Edgeworth Expansion

Approximate distributions using moment-based corrections to the normal.

In [None]:
# Example: Moderate skewness and kurtosis
mean = 10000
variance = 2500000  # std = 1581
skewness = 0.8
excess_kurtosis = 0.5

# Create Edgeworth expansion
edgeworth = EdgeworthExpansion(
    mean=mean,
    variance=variance,
    skewness=skewness,
    excess_kurtosis=excess_kurtosis
)

# Validate expansion
validation = edgeworth.validate_expansion(order=4)
print("Edgeworth Expansion Validation:")
for key, value in validation.items():
    print(f"  {key}: {value}")

In [None]:
# Compare Edgeworth approximation with normal
x = np.linspace(mean - 4*np.sqrt(variance), mean + 4*np.sqrt(variance), 1000)

# Normal approximation
normal_pdf = stats.norm.pdf(x, loc=mean, scale=np.sqrt(variance))

# Edgeworth approximations of different orders
edge_pdf_2 = edgeworth.pdf(x, order=2)  # Just normal
edge_pdf_3 = edgeworth.pdf(x, order=3)  # With skewness
edge_pdf_4 = edgeworth.pdf(x, order=4)  # With skewness and kurtosis

plt.figure(figsize=(12, 6))
plt.plot(x, normal_pdf, 'b--', label='Normal', lw=2)
plt.plot(x, edge_pdf_3, 'g-', label='Edgeworth (order 3)', lw=2)
plt.plot(x, edge_pdf_4, 'r-', label='Edgeworth (order 4)', lw=2)
plt.xlabel('Value')
plt.ylabel('Density')
plt.title('Edgeworth Expansion vs Normal Approximation')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Quantile comparison
quantiles = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]

normal_quantiles = stats.norm.ppf(quantiles, loc=mean, scale=np.sqrt(variance))
edge_quantiles_cf = edgeworth.ppf(quantiles, order=4, method='cornish-fisher')
edge_quantiles_num = edgeworth.ppf(quantiles, order=4, method='numerical')

print("Quantile Comparison:")
print("q\tNormal\tEdge(CF)\tEdge(Num)")
for i, q in enumerate(quantiles):
    print(f"{q:.2f}\t{normal_quantiles[i]:.0f}\t{edge_quantiles_cf[i]:.0f}\t\t{edge_quantiles_num[i]:.0f}")

## 5. Integrated Example: Full Analysis Pipeline

Combine all features for a comprehensive actuarial analysis.

In [None]:
# Scenario: Auto insurance portfolio analysis
# - Mixed customer risk levels (mixed Poisson)
# - Some customers never claim (zero-inflation)
# - Need accurate tail estimates (Edgeworth)

# Step 1: Create extended compound distribution
compound_model = create_extended_compound_distribution(
    frequency='poisson',
    severity='gamma',
    zero_inflated=True,
    zero_prob=0.15,
    use_edgeworth=False,  # Will compare later
    mu=2.5,  # Average 2.5 claims per year
    a=2.0,   # Gamma shape
    scale=3000  # Average claim $6000
)

# Step 2: Generate sample data
np.random.seed(42)
portfolio_size = 5000
simulated_losses = compound_model.rvs(size=portfolio_size)

# Step 3: Analyze results
print("Portfolio Analysis Results:")
print(f"Number of policies: {portfolio_size}")
print(f"Policies with no claims: {np.sum(simulated_losses == 0)} ({100*np.mean(simulated_losses == 0):.1f}%)")
print(f"Average loss per policy: ${np.mean(simulated_losses):,.2f}")
print(f"Total portfolio loss: ${np.sum(simulated_losses):,.2f}")
print("\nRisk Metrics:")
print(f"95% VaR: ${np.percentile(simulated_losses, 95):,.2f}")
print(f"99% VaR: ${np.percentile(simulated_losses, 99):,.2f}")
print(f"99.5% VaR: ${np.percentile(simulated_losses, 99.5):,.2f}")

In [None]:
# Step 4: Compare approximation methods
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Histogram of simulated data
ax1.hist(simulated_losses[simulated_losses > 0], bins=50, density=True, 
         alpha=0.7, label='Simulated Data')
ax1.set_xlabel('Loss Amount')
ax1.set_ylabel('Density')
ax1.set_title('Distribution of Non-Zero Losses')
ax1.set_xlim(0, 50000)

# Q-Q plot for tail behavior
from scipy.stats import probplot
probplot(simulated_losses[simulated_losses > 0], dist="norm", plot=ax2)
ax2.set_title('Q-Q Plot: Checking Normality in Tails')

plt.tight_layout()
plt.show()

# Step 5: Premium calculation
safety_loading = 0.2  # 20% safety margin
pure_premium = np.mean(simulated_losses)
loaded_premium = pure_premium * (1 + safety_loading)

print(f"\nPremium Calculation:")
print(f"Pure premium: ${pure_premium:,.2f}")
print(f"Safety loading: {safety_loading*100:.0f}%")
print(f"Loaded premium: ${loaded_premium:,.2f}")

## Summary

This notebook demonstrated:

1. **Compound Binomial Distributions**: For bounded claim counts
2. **Mixed Poisson Processes**: For heterogeneous portfolios
3. **Zero-Inflated Models**: For excess zeros in data
4. **Edgeworth Expansion**: For accurate tail approximations
5. **Integrated Analysis**: Combining features for real-world applications

These tools enable more accurate modeling of complex actuarial scenarios.