# Loss Distribution Analysis

Interactive exploration of manufacturing loss distributions including attritional, large, and catastrophic losses.

In [1]:
import sys
from pathlib import Path

# Add parent directory to path for imports
notebook_dir = Path().absolute()
parent_dir = notebook_dir.parent
sys.path.insert(0, str(parent_dir))

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, HTML

from src.loss_distributions import (
    ManufacturingLossGenerator,
    AttritionalLossGenerator,
    LargeLossGenerator,
    CatastrophicLossGenerator,
    LossEvent
)
from src.visualization import (
    WSJ_COLORS,
    format_currency
)

# Set default plotly theme
import plotly.io as pio
pio.templates.default = "plotly_white"

print("Loss Distribution Analysis Notebook")
print("="*50)

Loss Distribution Analysis Notebook


## 1. Interactive Loss Parameter Exploration

In [2]:
# Create interactive widgets for loss parameters
attritional_freq = widgets.FloatSlider(
    value=5.0, min=1.0, max=20.0, step=0.5,
    description='Frequency:', continuous_update=False
)
attritional_severity = widgets.IntSlider(
    value=50000, min=10000, max=200000, step=10000,
    description='Severity:', continuous_update=False
)
attritional_cv = widgets.FloatSlider(
    value=0.8, min=0.2, max=2.0, step=0.1,
    description='CV:', continuous_update=False
)

large_freq = widgets.FloatSlider(
    value=0.5, min=0.1, max=2.0, step=0.1,
    description='Frequency:', continuous_update=False
)
large_severity = widgets.IntSlider(
    value=2000000, min=500000, max=10000000, step=500000,
    description='Severity:', continuous_update=False
)
large_cv = widgets.FloatSlider(
    value=1.2, min=0.5, max=3.0, step=0.1,
    description='CV:', continuous_update=False
)

cat_freq = widgets.FloatSlider(
    value=0.02, min=0.001, max=0.1, step=0.001,
    description='Frequency:', continuous_update=False
)
cat_xm = widgets.IntSlider(
    value=10000000, min=5000000, max=50000000, step=5000000,
    description='Min Loss:', continuous_update=False
)
cat_alpha = widgets.FloatSlider(
    value=2.5, min=1.5, max=4.0, step=0.1,
    description='Alpha:', continuous_update=False
)

n_simulations = widgets.IntSlider(
    value=10000, min=1000, max=100000, step=1000,
    description='Simulations:', continuous_update=False
)

def update_loss_distribution(att_freq, att_sev, att_cv, 
                           large_freq, large_sev, large_cv,
                           cat_freq, cat_xm, cat_alpha, n_sims):
    """Update loss distribution visualization."""
    
    # Create generator with parameters
    generator = ManufacturingLossGenerator(
        attritional_params={
            'base_frequency': att_freq,
            'severity_mean': att_sev,
            'severity_cv': att_cv
        },
        large_params={
            'base_frequency': large_freq,
            'severity_mean': large_sev,
            'severity_cv': large_cv
        },
        catastrophic_params={
            'base_frequency': cat_freq,
            'severity_xm': cat_xm,
            'severity_alpha': cat_alpha
        },
        seed=42
    )
    
    # Generate losses
    all_losses = []
    loss_types = []
    
    for _ in range(n_sims):
        events, _ = generator.generate_losses(duration=1.0, revenue=10_000_000)
        for event in events:
            all_losses.append(event.amount)
            loss_types.append(event.loss_type)
    
    if not all_losses:
        print("No losses generated. Try adjusting parameters.")
        return
    
    # Create DataFrame
    df = pd.DataFrame({
        'amount': all_losses,
        'type': loss_types
    })
    
    # Create visualizations
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'Loss Distribution by Type',
            'Empirical CDF',
            'Loss Frequency',
            'Summary Statistics'
        ),
        specs=[
            [{'type': 'histogram'}, {'type': 'scatter'}],
            [{'type': 'bar'}, {'type': 'table'}]
        ]
    )
    
    # Histogram by type
    for loss_type in df['type'].unique():
        type_data = df[df['type'] == loss_type]['amount']
        fig.add_trace(
            go.Histogram(
                x=type_data,
                name=loss_type,
                opacity=0.7,
                nbinsx=30
            ),
            row=1, col=1
        )
    
    # Empirical CDF
    sorted_losses = np.sort(all_losses)
    cdf = np.arange(1, len(sorted_losses) + 1) / len(sorted_losses)
    fig.add_trace(
        go.Scatter(
            x=sorted_losses,
            y=cdf,
            mode='lines',
            name='ECDF',
            line=dict(color=WSJ_COLORS['blue'])
        ),
        row=1, col=2
    )
    
    # Loss frequency by type
    freq_data = df.groupby('type').size().reset_index(name='count')
    fig.add_trace(
        go.Bar(
            x=freq_data['type'],
            y=freq_data['count'],
            marker_color=[WSJ_COLORS['blue'], WSJ_COLORS['orange'], WSJ_COLORS['red']]
        ),
        row=2, col=1
    )
    
    # Summary statistics table
    stats = df.groupby('type')['amount'].agg([
        'count', 'mean', 'std', 'min', 'max'
    ]).round(0)
    
    fig.add_trace(
        go.Table(
            header=dict(
                values=['Type', 'Count', 'Mean', 'Std', 'Min', 'Max'],
                fill_color=WSJ_COLORS['light_gray'],
                align='left'
            ),
            cells=dict(
                values=[
                    stats.index,
                    stats['count'],
                    ['${:,.0f}'.format(x) for x in stats['mean']],
                    ['${:,.0f}'.format(x) for x in stats['std']],
                    ['${:,.0f}'.format(x) for x in stats['min']],
                    ['${:,.0f}'.format(x) for x in stats['max']]
                ],
                align='left'
            )
        ),
        row=2, col=2
    )
    
    # Update layout
    fig.update_layout(
        height=800,
        showlegend=True,
        title_text=f"Loss Distribution Analysis ({n_sims:,} simulations)",
        template='plotly_white'
    )
    
    fig.update_xaxes(title_text="Loss Amount ($)", row=1, col=1, tickformat='$,.0f')
    fig.update_xaxes(title_text="Loss Amount ($)", row=1, col=2, tickformat='$,.0f', type='log')
    fig.update_xaxes(title_text="Loss Type", row=2, col=1)
    
    fig.update_yaxes(title_text="Frequency", row=1, col=1)
    fig.update_yaxes(title_text="Cumulative Probability", row=1, col=2)
    fig.update_yaxes(title_text="Count", row=2, col=1)
    
    fig.show()
    
    # Print summary
    print(f"\nTotal losses generated: {len(all_losses):,}")
    print(f"Average annual loss: ${np.mean(all_losses):,.0f}")
    print(f"95th percentile: ${np.percentile(all_losses, 95):,.0f}")
    print(f"99th percentile: ${np.percentile(all_losses, 99):,.0f}")
    print(f"Maximum loss: ${np.max(all_losses):,.0f}")

# Create interactive interface
print("Adjust parameters to explore loss distributions:")
print()

attritional_box = widgets.VBox([
    widgets.HTML("<b>Attritional Losses</b>"),
    attritional_freq,
    attritional_severity,
    attritional_cv
])

large_box = widgets.VBox([
    widgets.HTML("<b>Large Losses</b>"),
    large_freq,
    large_severity,
    large_cv
])

cat_box = widgets.VBox([
    widgets.HTML("<b>Catastrophic Losses</b>"),
    cat_freq,
    cat_xm,
    cat_alpha
])

params_box = widgets.HBox([attritional_box, large_box, cat_box])
controls = widgets.VBox([params_box, n_simulations])

output = widgets.interactive_output(
    update_loss_distribution,
    {
        'att_freq': attritional_freq,
        'att_sev': attritional_severity,
        'att_cv': attritional_cv,
        'large_freq': large_freq,
        'large_sev': large_severity,
        'large_cv': large_cv,
        'cat_freq': cat_freq,
        'cat_xm': cat_xm,
        'cat_alpha': cat_alpha,
        'n_sims': n_simulations
    }
)

display(controls, output)

Adjust parameters to explore loss distributions:



VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Attritional Losses</b>'), FloatSlider(value=5.0, c…

Output()

## 2. Temporal Loss Patterns

In [3]:
def simulate_temporal_losses(years=10, seed=42):
    """Simulate losses over multiple years."""
    
    generator = ManufacturingLossGenerator(
        attritional_params={
            'base_frequency': 5.0,
            'severity_mean': 50_000,
            'severity_cv': 0.8
        },
        large_params={
            'base_frequency': 0.5,
            'severity_mean': 2_000_000,
            'severity_cv': 1.2
        },
        catastrophic_params={
            'base_frequency': 0.02,
            'severity_xm': 10_000_000,
            'severity_alpha': 2.5
        },
        seed=seed
    )
    
    yearly_data = []
    
    for year in range(years):
        events, stats = generator.generate_losses(duration=1.0, revenue=10_000_000)
        
        yearly_data.append({
            'year': year + 1,
            'total_loss': stats['total_amount'],
            'num_events': len(events),
            'attritional': sum(e.amount for e in events if e.loss_type == 'attritional'),
            'large': sum(e.amount for e in events if e.loss_type == 'large'),
            'catastrophic': sum(e.amount for e in events if e.loss_type == 'catastrophic')
        })
    
    return pd.DataFrame(yearly_data)

# Simulate temporal losses
temporal_df = simulate_temporal_losses(years=20)

# Create temporal visualization
fig = make_subplots(
    rows=3, cols=1,
    subplot_titles=(
        'Annual Total Losses',
        'Loss Composition by Type',
        'Cumulative Losses'
    ),
    row_heights=[0.35, 0.35, 0.3]
)

# Annual total losses
fig.add_trace(
    go.Bar(
        x=temporal_df['year'],
        y=temporal_df['total_loss'],
        name='Total Loss',
        marker_color=WSJ_COLORS['blue']
    ),
    row=1, col=1
)

# Add mean line
mean_loss = temporal_df['total_loss'].mean()
fig.add_hline(
    y=mean_loss,
    line_dash="dash",
    line_color=WSJ_COLORS['red'],
    annotation_text=f"Mean: ${mean_loss:,.0f}",
    row=1, col=1
)

# Stacked bar chart by type
fig.add_trace(
    go.Bar(
        x=temporal_df['year'],
        y=temporal_df['attritional'],
        name='Attritional',
        marker_color=WSJ_COLORS['light_blue']
    ),
    row=2, col=1
)

fig.add_trace(
    go.Bar(
        x=temporal_df['year'],
        y=temporal_df['large'],
        name='Large',
        marker_color=WSJ_COLORS['orange']
    ),
    row=2, col=1
)

fig.add_trace(
    go.Bar(
        x=temporal_df['year'],
        y=temporal_df['catastrophic'],
        name='Catastrophic',
        marker_color=WSJ_COLORS['red']
    ),
    row=2, col=1
)

# Cumulative losses
temporal_df['cumulative'] = temporal_df['total_loss'].cumsum()
fig.add_trace(
    go.Scatter(
        x=temporal_df['year'],
        y=temporal_df['cumulative'],
        mode='lines+markers',
        name='Cumulative',
        line=dict(color=WSJ_COLORS['blue'], width=2)
    ),
    row=3, col=1
)

# Update layout
fig.update_layout(
    height=900,
    showlegend=True,
    title_text="Temporal Loss Pattern Analysis (20 Years)",
    template='plotly_white',
    barmode='stack'
)

fig.update_xaxes(title_text="Year")
fig.update_yaxes(title_text="Loss Amount ($)", tickformat='$,.0f')

fig.show()

# Summary statistics
print("\nTemporal Loss Statistics:")
print("="*50)
print(f"Mean annual loss: ${temporal_df['total_loss'].mean():,.0f}")
print(f"Std deviation: ${temporal_df['total_loss'].std():,.0f}")
print(f"Coefficient of variation: {temporal_df['total_loss'].std()/temporal_df['total_loss'].mean():.2f}")
print(f"Min annual loss: ${temporal_df['total_loss'].min():,.0f}")
print(f"Max annual loss: ${temporal_df['total_loss'].max():,.0f}")
print(f"Years with catastrophic losses: {(temporal_df['catastrophic'] > 0).sum()}")


Temporal Loss Statistics:
Mean annual loss: $1,373,790
Std deviation: $2,453,443
Coefficient of variation: 1.79
Min annual loss: $29,633
Max annual loss: $8,834,661
Years with catastrophic losses: 0


## 3. Extreme Value Analysis

In [4]:
def extreme_value_analysis(n_simulations=10000):
    """Analyze extreme values and tail behavior."""
    
    generator = ManufacturingLossGenerator(
        attritional_params={
            'base_frequency': 5.0,
            'severity_mean': 50_000,
            'severity_cv': 0.8
        },
        large_params={
            'base_frequency': 0.5,
            'severity_mean': 2_000_000,
            'severity_cv': 1.2
        },
        catastrophic_params={
            'base_frequency': 0.02,
            'severity_xm': 10_000_000,
            'severity_alpha': 2.5
        },
        seed=42
    )
    
    # Simulate annual maximum losses
    annual_maxima = []
    annual_totals = []
    
    for _ in range(n_simulations):
        events, stats = generator.generate_losses(duration=1.0, revenue=10_000_000)
        if events:
            annual_maxima.append(max(e.amount for e in events))
        else:
            annual_maxima.append(0)
        annual_totals.append(stats['total_amount'])
    
    # Sort for percentile analysis
    sorted_maxima = np.sort(annual_maxima)
    sorted_totals = np.sort(annual_totals)
    
    # Create visualizations
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'Distribution of Annual Maximum Losses',
            'Exceedance Probability',
            'Return Period Analysis',
            'Tail Distribution (Log-Log)'
        )
    )
    
    # Distribution of annual maxima
    fig.add_trace(
        go.Histogram(
            x=annual_maxima,
            nbinsx=50,
            name='Annual Max',
            marker_color=WSJ_COLORS['blue']
        ),
        row=1, col=1
    )
    
    # Exceedance probability
    exceedance_prob = 1 - np.arange(len(sorted_totals)) / len(sorted_totals)
    fig.add_trace(
        go.Scatter(
            x=sorted_totals,
            y=exceedance_prob,
            mode='lines',
            name='Annual Total',
            line=dict(color=WSJ_COLORS['blue'])
        ),
        row=1, col=2
    )
    
    # Return period analysis
    return_periods = [2, 5, 10, 20, 50, 100, 200, 500]
    return_levels_max = []
    return_levels_total = []
    
    for rp in return_periods:
        percentile = 100 * (1 - 1/rp)
        return_levels_max.append(np.percentile(annual_maxima, percentile))
        return_levels_total.append(np.percentile(annual_totals, percentile))
    
    fig.add_trace(
        go.Scatter(
            x=return_periods,
            y=return_levels_max,
            mode='lines+markers',
            name='Max Loss',
            line=dict(color=WSJ_COLORS['red'])
        ),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=return_periods,
            y=return_levels_total,
            mode='lines+markers',
            name='Total Loss',
            line=dict(color=WSJ_COLORS['blue'])
        ),
        row=2, col=1
    )
    
    # Tail distribution (log-log plot)
    # Only plot non-zero values for log scale
    non_zero_maxima = [x for x in sorted_maxima if x > 0]
    if non_zero_maxima:
        tail_prob = 1 - np.arange(len(non_zero_maxima)) / len(non_zero_maxima)
        fig.add_trace(
            go.Scatter(
                x=non_zero_maxima,
                y=tail_prob,
                mode='markers',
                name='Empirical',
                marker=dict(size=3, color=WSJ_COLORS['blue'])
            ),
            row=2, col=2
        )
    
    # Update layout
    fig.update_layout(
        height=800,
        showlegend=True,
        title_text=f"Extreme Value Analysis ({n_simulations:,} simulations)",
        template='plotly_white'
    )
    
    fig.update_xaxes(title_text="Loss Amount ($)", row=1, col=1, tickformat='$,.0f')
    fig.update_xaxes(title_text="Loss Amount ($)", row=1, col=2, tickformat='$,.0f', type='log')
    fig.update_xaxes(title_text="Return Period (years)", row=2, col=1, type='log')
    fig.update_xaxes(title_text="Loss Amount ($)", row=2, col=2, type='log', tickformat='$,.0f')
    
    fig.update_yaxes(title_text="Frequency", row=1, col=1)
    fig.update_yaxes(title_text="Exceedance Probability", row=1, col=2, type='log')
    fig.update_yaxes(title_text="Return Level ($)", row=2, col=1, tickformat='$,.0f')
    fig.update_yaxes(title_text="Exceedance Probability", row=2, col=2, type='log')
    
    fig.show()
    
    # Print return period table
    print("\nReturn Period Analysis:")
    print("="*70)
    print(f"{'Return Period':<15} {'Max Loss':<20} {'Total Loss':<20}")
    print("-"*70)
    for i, rp in enumerate(return_periods):
        print(f"{rp:>10} year {'${:,.0f}'.format(return_levels_max[i]):<20} {'${:,.0f}'.format(return_levels_total[i]):<20}")

# Run extreme value analysis
extreme_value_analysis(n_simulations=10000)


Return Period Analysis:
Return Period   Max Loss             Total Loss          
----------------------------------------------------------------------
         2 year $141,029             $380,042            
         5 year $1,674,809           $2,114,531          
        10 year $3,222,014           $3,931,743          
        20 year $5,525,463           $6,553,537          
        50 year $11,216,444          $12,317,395         
       100 year $14,356,726          $16,106,695         
       200 year $18,719,736          $20,641,865         
       500 year $24,278,689          $26,010,232         


## 4. Loss Correlation Analysis

In [5]:
def correlation_analysis(n_years=100):
    """Analyze correlations between different loss types."""
    
    generator = ManufacturingLossGenerator(
        attritional_params={
            'base_frequency': 5.0,
            'severity_mean': 50_000,
            'severity_cv': 0.8
        },
        large_params={
            'base_frequency': 0.5,
            'severity_mean': 2_000_000,
            'severity_cv': 1.2
        },
        catastrophic_params={
            'base_frequency': 0.02,
            'severity_xm': 10_000_000,
            'severity_alpha': 2.5
        },
        seed=42
    )
    
    # Simulate multiple years
    data = {
        'year': [],
        'attritional_count': [],
        'attritional_total': [],
        'large_count': [],
        'large_total': [],
        'catastrophic_count': [],
        'catastrophic_total': [],
        'total_loss': []
    }
    
    for year in range(n_years):
        events, stats = generator.generate_losses(duration=1.0, revenue=10_000_000)
        
        att_events = [e for e in events if e.loss_type == 'attritional']
        large_events = [e for e in events if e.loss_type == 'large']
        cat_events = [e for e in events if e.loss_type == 'catastrophic']
        
        data['year'].append(year + 1)
        data['attritional_count'].append(len(att_events))
        data['attritional_total'].append(sum(e.amount for e in att_events))
        data['large_count'].append(len(large_events))
        data['large_total'].append(sum(e.amount for e in large_events))
        data['catastrophic_count'].append(len(cat_events))
        data['catastrophic_total'].append(sum(e.amount for e in cat_events))
        data['total_loss'].append(stats['total_amount'])
    
    df = pd.DataFrame(data)
    
    # Calculate correlation matrix
    corr_cols = ['attritional_total', 'large_total', 'catastrophic_total', 'total_loss']
    corr_matrix = df[corr_cols].corr()
    
    # Create visualizations
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'Correlation Heatmap',
            'Scatter: Attritional vs Large',
            'Loss Components Over Time',
            'Distribution of Total Losses'
        ),
        specs=[
            [{'type': 'heatmap'}, {'type': 'scatter'}],
            [{'type': 'scatter'}, {'type': 'histogram'}]
        ]
    )
    
    # Correlation heatmap
    fig.add_trace(
        go.Heatmap(
            z=corr_matrix.values,
            x=['Attritional', 'Large', 'Catastrophic', 'Total'],
            y=['Attritional', 'Large', 'Catastrophic', 'Total'],
            colorscale='RdBu',
            zmid=0,
            text=corr_matrix.values.round(2),
            texttemplate='%{text}',
            textfont={"size": 10}
        ),
        row=1, col=1
    )
    
    # Scatter plot
    fig.add_trace(
        go.Scatter(
            x=df['attritional_total'],
            y=df['large_total'],
            mode='markers',
            marker=dict(
                size=8,
                color=df['total_loss'],
                colorscale='Viridis',
                showscale=True,
                colorbar=dict(title="Total Loss")
            ),
            name='Years'
        ),
        row=1, col=2
    )
    
    # Time series of components
    fig.add_trace(
        go.Scatter(
            x=df['year'],
            y=df['attritional_total'],
            mode='lines',
            name='Attritional',
            line=dict(color=WSJ_COLORS['light_blue'])
        ),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=df['year'],
            y=df['large_total'],
            mode='lines',
            name='Large',
            line=dict(color=WSJ_COLORS['orange'])
        ),
        row=2, col=1
    )
    
    # Distribution of total losses
    fig.add_trace(
        go.Histogram(
            x=df['total_loss'],
            nbinsx=30,
            name='Total Loss',
            marker_color=WSJ_COLORS['blue']
        ),
        row=2, col=2
    )
    
    # Update layout
    fig.update_layout(
        height=800,
        showlegend=True,
        title_text=f"Loss Correlation Analysis ({n_years} years)",
        template='plotly_white'
    )
    
    fig.update_xaxes(title_text="Attritional Loss ($)", row=1, col=2, tickformat='$,.0f')
    fig.update_xaxes(title_text="Year", row=2, col=1)
    fig.update_xaxes(title_text="Total Loss ($)", row=2, col=2, tickformat='$,.0f')
    
    fig.update_yaxes(title_text="Large Loss ($)", row=1, col=2, tickformat='$,.0f')
    fig.update_yaxes(title_text="Loss Amount ($)", row=2, col=1, tickformat='$,.0f')
    fig.update_yaxes(title_text="Frequency", row=2, col=2)
    
    fig.show()
    
    # Print correlation summary
    print("\nCorrelation Analysis Summary:")
    print("="*50)
    print("\nCorrelation Matrix:")
    print(corr_matrix.round(3))
    print("\nKey Insights:")
    print(f"- Attritional-Large correlation: {corr_matrix.loc['attritional_total', 'large_total']:.3f}")
    print(f"- Years with catastrophic losses: {(df['catastrophic_total'] > 0).sum()} ({100*(df['catastrophic_total'] > 0).sum()/n_years:.1f}%)")
    print(f"- Average contribution to total loss:")
    print(f"  - Attritional: {100*df['attritional_total'].sum()/df['total_loss'].sum():.1f}%")
    print(f"  - Large: {100*df['large_total'].sum()/df['total_loss'].sum():.1f}%")
    print(f"  - Catastrophic: {100*df['catastrophic_total'].sum()/df['total_loss'].sum():.1f}%")

# Run correlation analysis
correlation_analysis(n_years=100)


Correlation Analysis Summary:

Correlation Matrix:
                    attritional_total  large_total  catastrophic_total  \
attritional_total               1.000        0.033               0.079   
large_total                     0.033        1.000              -0.029   
catastrophic_total              0.079       -0.029               1.000   
total_loss                      0.125        0.662               0.729   

                    total_loss  
attritional_total        0.125  
large_total              0.662  
catastrophic_total       0.729  
total_loss               1.000  

Key Insights:
- Attritional-Large correlation: 0.033
- Years with catastrophic losses: 2 (2.0%)
- Average contribution to total loss:
  - Attritional: 13.2%
  - Large: 68.6%
  - Catastrophic: 18.2%


## Summary

This notebook provides comprehensive tools for analyzing manufacturing loss distributions:

1. **Interactive Parameter Exploration**: Real-time visualization of how parameters affect loss distributions
2. **Temporal Analysis**: Understanding loss patterns over time
3. **Extreme Value Analysis**: Return period calculations and tail behavior
4. **Correlation Analysis**: Relationships between different loss types

Key findings:
- Loss distributions are heavily right-skewed with catastrophic events driving tail risk
- Different loss types show low correlation, suggesting diversification benefits
- Return period analysis is crucial for insurance limit selection
- Temporal patterns help identify clustering and volatility