# Process Tail Simulation Results

In [1]:
import os, pickle

results_dir = r"D:\Ergodicity Simulations\2025-10-09 Run - Loss Tail and Limits - $25M and $50M"

# sample_file = "Cap (25M) - Ded (100K) - LR (0.6) - Pol_Lim (25M) - X_Th_%le (0.0005) - X_Shape (1.0) - X_Scale (1.0) - 250K Sims - 25 Yrs.pkl"

# file_path = os.path.join(results_dir, sample_file)
# with open(file_path, "rb") as f:
#     loaded_data = pickle.load(f)

In [2]:
# loaded_data.ruin_probability

In [3]:
# loaded_data.growth_rates.mean()

In [4]:
import pickle
import re

from pathlib import Path
from PIL import Image

from ergodic_insurance.monte_carlo import SimulationResults
import numpy as np
from time import perf_counter
from tqdm.auto import tqdm

def _parse_number(text):
    text = text.strip().replace(",", "")
    m = re.fullmatch(r'([+-]?\d+(?:\.\d+)?)([KMB])?$', text, re.I)
    if not m:
        # fallback: plain int/float or leave as-is
        try:
            return int(text)
        except ValueError:
            try:
                return float(text)
            except ValueError:
                return text
    num = float(m.group(1))
    mult = {"K": 1_000, "M": 1_000_000, "B": 1_000_000_000}.get((m.group(2) or "").upper(), 1)
    val = num * mult
    return int(val) if val.is_integer() else val


def parse_config_key(key: str) -> dict:
    parts = re.split(r"\s*-\s*", key.strip())
    out = {}
    for part in parts:
        if not part:
            continue

        # e.g. "Cap (100M)"
        m = re.match(r"^([A-Za-z_%]+)\s*\(\s*([^)]+)\s*\)$", part)
        if m:
            out[m.group(1)] = _parse_number(m.group(2))
            continue

        # e.g. "0K Sims" or "50 Yrs"
        m = re.match(r"^([+-]?\d+(?:\.\d+)?)\s*([KMB])?\s*([A-Za-z_]+)$", part)
        if m:
            value = _parse_number((m.group(1) or "") + (m.group(2) or ""))
            out[m.group(3)] = value
            continue

        # flags like "NOINS"
        if part.upper() == "NOINS":
            out["NOINS"] = True

    return out

results_dir = Path(results_dir)

sample_files = "*.pkl"

pkl_paths = sorted(results_dir.glob(sample_files))

qs = np.arange(0.01, 1.00, 0.01) # Growth Rate Quantiles

all_configurations = {}
if not pkl_paths:
    print(f"No pickle files found in {results_dir}.")
else:
    try:
        iterator = tqdm(pkl_paths, desc="Processing pickle files", unit="file")
    except Exception:
        iterator = pkl_paths  # fallback without progress bar

    start_time = perf_counter()
    for idx, path in enumerate(iterator, 1):
        if idx > 1:
            elapsed = perf_counter() - start_time
            avg = elapsed / (idx - 1)
            remaining = avg * (len(pkl_paths) - (idx - 1))
            if hasattr(iterator, "set_postfix"):
                iterator.set_postfix(avg_s=f"{avg:.2f}", eta_s=f"{remaining:.1f}")
        try:
            with open(path, "rb") as f:
                one_config = pickle.load(f)
                growth_rate = one_config.growth_rates.mean()
                growth_rate_ci = {str(q): val for q, val in zip(qs, np.quantile(one_config.growth_rates, qs))}
                ror = one_config.ruin_probability
                all_configurations[path.stem] = {
                    "growth_rate": growth_rate,
                    "growth_rate_ci": growth_rate_ci,
                    "risk_of_ruin": ror,
                    "annual_losses": one_config.annual_losses
                }
        except Exception as e:
            print(f"Skipping {path.name}: {e}")
    print(f"Loaded {len(all_configurations)} pickle files into all_configurations.")

parsed_params_by_key = {k: parse_config_key(k) | all_configurations[k] for k in all_configurations.keys()}
# parsed_params_by_key

Processing pickle files:   0%|          | 0/234 [00:00<?, ?file/s]

Skipping Cap (50M) - Ded (100K) - LR (0.6) - Pol_Lim (25M) - X_Th_%le (0.0001) - X_Shape (1.5) - X_Scale (1.0) - 250K Sims - 25 Yrs.pkl: invalid load key, '\xc3'.
Loaded 233 pickle files into all_configurations.


In [5]:
outfile = r"cache\parsed_params_by_key.pkl"
out_path = Path(outfile)
if out_path.parent and not out_path.parent.exists():
    out_path.parent.mkdir(parents=True, exist_ok=True)

with open(out_path, "wb") as f:
    pickle.dump(parsed_params_by_key, f, protocol=pickle.HIGHEST_PROTOCOL)
print(f"Wrote {len(parsed_params_by_key)} configurations to {outfile}")

Wrote 233 configurations to cache\parsed_params_by_key.pkl


In [6]:
# Replace the first failing comprehension with this safe version:
all_lims = sorted({sc['Pol_Lim'] for sc in parsed_params_by_key.values() if 'Pol_Lim' in sc})
all_lims

[25000000,
 50000000,
 75000000,
 100000000,
 150000000,
 250000000,
 350000000,
 500000000]

In [7]:
# Replace the first failing comprehension with this safe version:
all_keys = set().union(*(sc.keys() for sc in parsed_params_by_key.values()))
all_keys

{'Cap',
 'Ded',
 'LR',
 'NOINS',
 'Pol_Lim',
 'Sims',
 'X_Scale',
 'X_Shape',
 'X_Th_%le',
 'Yrs',
 'annual_losses',
 'growth_rate',
 'growth_rate_ci',
 'risk_of_ruin'}

In [8]:
for key in all_keys:
    all_key_vals = sorted({str(sc[key]) for sc in parsed_params_by_key.values() if key in sc and type(sc[key]) not in (list, dict, set)})
    if all_key_vals != []:
        print(f"{key}: {all_key_vals}")
    all_key_vals = {type(sc[key]) for sc in parsed_params_by_key.values() if key in sc and type(sc[key]) in (list, dict, set)}
    if all_key_vals != set():
        print(f"{key}: {all_key_vals}")

Cap: ['25000000', '50000000']
X_Scale: ['1']
risk_of_ruin: {<class 'dict'>}
growth_rate_ci: {<class 'dict'>}
Yrs: ['25']
LR: ['0.6']
Pol_Lim: ['100000000', '150000000', '25000000', '250000000', '350000000', '50000000', '500000000', '75000000']
NOINS: ['True']
X_Shape: ['0', '1', '1.5', '2', '2.5']
X_Th_%le: ['0.0001', '0.0005', '0.001', 'None']
annual_losses: ['[[1.0031791e+06 8.8366062e+05 6.7400362e+05 ... 5.8808838e+05\n  1.0263980e+07 1.2623881e+06]\n [7.0162038e+05 3.1872597e+05 8.0270519e+05 ... 5.9115831e+05\n  1.2280220e+07 1.0661625e+06]\n [1.8022064e+06 1.2357135e+06 3.0691770e+06 ... 1.0919821e+06\n  7.6176185e+06 7.0859512e+05]\n ...\n [7.2880394e+05 1.2240475e+06 7.7005469e+05 ... 3.8227785e+06\n  6.6730394e+05 1.4933042e+06]\n [3.9338581e+05 5.6093231e+05 1.0950296e+06 ... 1.3794489e+06\n  3.5838803e+05 1.1555466e+06]\n [6.9610838e+05 5.6770912e+05 5.5505481e+05 ... 1.0728046e+07\n  2.0006325e+06 9.2471369e+05]]', '[[1.0049663e+06 1.8432369e+06 6.7689938e+05 ... 1.1703766

I have the following set of data as a list of dictionaries for 1029 configurations that I'd like to explore visually in Python:

```
'risk_of_ruin': {<class 'dict'>} # Risk of Ruin snapshots at 5-year intervales (5, 10, 15, 20, 25)
'Ded': ['100000'] # Deductible
'X_Scale': ['0.5', '1', '1.5', '2'] # Scale parameter for the Generalized Pareto Distribution (GPD) tail
'X_Th_%le': ['0.0001', '0.0005', '0.001', 'None'] # Threshold percentile for the Generalized Pareto Distribution (GPD) tail
'growth_rate': <class 'float'> # Mean Growth Rate at the end of the simulation
'growth_rate_ci': {<class 'dict'>} # Growth Rate quantiles from 0.01 to 0.99 in increments of 0.01 at the end of the simulation
'Pol_Lim': ['100000000', '200000000', '50000000', '500000000']
'X_Shape': ['0', '1', '1.5', '2', '2.5'] # Shape parameter for the Generalized Pareto Distribution (GPD) tail
'LR': ['0.3', '0.4', '0.5', '0.6', '0.7'] # Loss Ratio (Claims / Premiums), lower implies a higher premium charge for the same expected losses
'Yrs': ['25'] # Simulations were run for 25 years only
'Sims': ['100000'] # Each configuration was run for 100,000 simulations
# There are also configurations without insurance for which there is no 'Pol_Lim', no 'Ded', and no 'LR' parameters. These scenarios are marked with `'NOINS': True`
```

For the first set of plots, I'd like to create an Efficiency Frontier plot with the following parameters:
x-axix: 'risk_of_ruin'
y-axis: 'growth_rate'
Display this curve for each tail configuration in the following setup:
row: 'X_Th_%le'
column: 'X_Shape'
Plot different graphs for each collection of 'X_Scale', which are transformations on the loss distribution with '1' representing a close match, '0.5' representing 0.5 loss density compared to default tail, '2' representing twice the density of the default tail, and so on.
Color represents percentiles, using the pallette "cividis"
Encode different limits as shapes for the median/mean/quantile, so for example, a triangle in different colors represents the same limit (50M)
Shapes are as follows:
'NOINS': plus sign, "P" shape code
'50000000': triangle, "v" shape code
'100000000': square, "s" shape code
'200000000': pentagon, "p" shape code
'500000000': hexagon, "H" shape code

There should be 4 plots (one for each 'X_Scale') with DPI set to 300px that get saved to individual PNG files.


In [9]:
ins_only_vals = [val for val in parsed_params_by_key.values() if val.get('NOINS', False) is False]
len(ins_only_vals)

207

In [10]:
[c for c in ins_only_vals if c['X_Shape'] == 0]


[{'Cap': 25000000,
  'Ded': 100000,
  'LR': 0.6,
  'Pol_Lim': 100000000,
  'X_Th_%le': 'None',
  'X_Shape': 0,
  'X_Scale': 1,
  'Sims': 250000,
  'Yrs': 25,
  'growth_rate': np.float64(0.013463694663405535),
  'growth_rate_ci': {'0.01': np.float64(0.012784306323155761),
   '0.02': np.float64(0.01288519162684679),
   '0.03': np.float64(0.012944726869463921),
   '0.04': np.float64(0.012989945635199547),
   '0.05': np.float64(0.013024454796686768),
   '0.060000000000000005': np.float64(0.013054451681673527),
   '0.06999999999999999': np.float64(0.013080934751778841),
   '0.08': np.float64(0.013103332631289958),
   '0.09': np.float64(0.013125470085069537),
   '0.09999999999999999': np.float64(0.01314465729519725),
   '0.11': np.float64(0.013163506053388119),
   '0.12': np.float64(0.013180948793888092),
   '0.13': np.float64(0.013196987304836512),
   '0.14': np.float64(0.013212349973618984),
   '0.15000000000000002': np.float64(0.013226890936493874),
   '0.16': np.float64(0.013240939080715

In [11]:
x_shapes = sorted(set(c['X_Shape'] for c in ins_only_vals))
x_shapes

[0, 1, 1.5, 2, 2.5]

In [12]:
[c for c in ins_only_vals if c['X_Shape'] == 0]

[{'Cap': 25000000,
  'Ded': 100000,
  'LR': 0.6,
  'Pol_Lim': 100000000,
  'X_Th_%le': 'None',
  'X_Shape': 0,
  'X_Scale': 1,
  'Sims': 250000,
  'Yrs': 25,
  'growth_rate': np.float64(0.013463694663405535),
  'growth_rate_ci': {'0.01': np.float64(0.012784306323155761),
   '0.02': np.float64(0.01288519162684679),
   '0.03': np.float64(0.012944726869463921),
   '0.04': np.float64(0.012989945635199547),
   '0.05': np.float64(0.013024454796686768),
   '0.060000000000000005': np.float64(0.013054451681673527),
   '0.06999999999999999': np.float64(0.013080934751778841),
   '0.08': np.float64(0.013103332631289958),
   '0.09': np.float64(0.013125470085069537),
   '0.09999999999999999': np.float64(0.01314465729519725),
   '0.11': np.float64(0.013163506053388119),
   '0.12': np.float64(0.013180948793888092),
   '0.13': np.float64(0.013196987304836512),
   '0.14': np.float64(0.013212349973618984),
   '0.15000000000000002': np.float64(0.013226890936493874),
   '0.16': np.float64(0.013240939080715

In [13]:
x_thresholds = sorted({(c['X_Th_%le'] if isinstance(c['X_Th_%le'], (float, np.floating)) else 0) for c in ins_only_vals})
x_thresholds

[0, 0.0001, 0.0005, 0.001]

In [14]:
lr = sorted(set(c['LR'] for c in ins_only_vals))
lr

[0.6]

## Plot Facets of Growth vs Limit

In [15]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
from matplotlib.lines import Line2D

# Assuming your data is stored in a list called 'configurations'
# configurations = [{'risk_of_ruin': {...}, 'growth_rate': ..., 'X_Scale': ..., ...}, ...]


def create_efficiency_frontier_plots(configurations):
    """
    Create efficiency frontier plots for actuarial simulation results.

    Parameters:
    -----------
    configurations : list of dict
        List of configuration dictionaries with simulation results

    Output:
    -------
    Saves PNG files for each X_Scale with efficiency frontier plots.
    """

    # Define shape mappings
    shape_map = {
        # 'NOINS': 'P',      # Plus sign
        50000000: 'v',   # Triangle
        100000000: 's',  # Square
        200000000: 'p',  # Pentagon
        500000000: 'H'   # Hexagon
    }

    # Get unique values for separate charts
    x_scales = sorted(set(c['X_Scale'] for c in configurations))
    lr = sorted(set(c['LR'] for c in configurations))

    # Define percentiles for overlay
    highlight_percentiles = [0.50, 0.75, 0.90, 0.95, 0.99]
    legend_percentiles = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]

    # Create a plot for each X_Scale
    for scale in x_scales:
        for loss_ratio in lr:
            config_subset = [c for c in configurations if c['X_Scale'] == scale and c['LR'] == loss_ratio]
            # Get unique values for faceting
            x_shapes = sorted(set(float(c['X_Shape']) for c in config_subset))
            x_shapes = [s for s in x_shapes if s != 0] # Filter out 0
            x_thresholds = sorted({(c['X_Th_%le'] if isinstance(c['X_Th_%le'], (float, np.floating)) else 0.0) for c in config_subset})
            x_thresholds = [s for s in x_thresholds if s != 0] # Filter out 0

            # all_gr_min, all_gr_max = [], []
            # for val in ins_only_vals:
            #     val_min, val_max = min(val['growth_rate_ci'].values()), max(val['growth_rate_ci'].values())
            #     all_gr_min.append(val_min)
            #     all_gr_max.append(val_max)

            # x_min, x_max = min(all_gr_min), max(all_gr_max)

            all_ror_min, all_ror_max = [], []
            for val in config_subset:
                val_min, val_max = min(val['risk_of_ruin'].values()), max(
                    val['risk_of_ruin'].values())
                all_ror_min.append(val_min)
                all_ror_max.append(val_max)

            y_min, y_max = min(all_ror_min), max(all_ror_max)

            # Filter configurations for this scale
            scale_configs = config_subset

            # Create figure with subplots
            n_rows = len(x_thresholds)
            n_cols = len(x_shapes)

            fig, axes = plt.subplots(n_rows, n_cols,
                                    figsize=(5*n_cols, 4*n_rows),
                                    dpi=300,
                                    squeeze=False)

            # Set overall title
            scale_label = f"{float(scale):.1f}x" if scale != '1' else "Baseline"
            lr_pct = f"{loss_ratio*100:.0f}%"
            fig.suptitle(f'Efficiency Frontier - Scale: {scale_label} - Loss Ratio: {lr_pct}',
                        fontsize=16, fontweight='bold', y=0.995)

            # Create colormap for percentiles
            cmap = plt.cm.viridis

            # Plot each facet
            for i, threshold in enumerate(x_thresholds):
                for j, shape_param in enumerate(x_shapes):
                    ax = axes[i, j]

                    # Debug: Check what values we're comparing
                    if threshold == 0 or shape_param == 0:
                        print(
                            f"\n=== Debug for threshold={threshold}, shape_param={shape_param} ===")
                        print(
                            f"shape_param type: {type(shape_param)}, value: {repr(shape_param)}")
                        print(
                            f"threshold type: {type(threshold)}, value: {repr(threshold)}")

                        # Sample a few configs to see their actual values
                        for c in scale_configs[:3]:
                            print(
                                f"  X_Shape: {repr(c['X_Shape'])} (type: {type(c['X_Shape'])})")
                            print(
                                f"  X_Th_%le: {repr(c['X_Th_%le'])} (type: {type(c['X_Th_%le'])})")
                            print(
                                f"  float(c['X_Shape']) == shape_param: {float(c['X_Shape']) == shape_param}")
                            print(
                                f"  Threshold match: {c['X_Th_%le'] == threshold or (c['X_Th_%le'] is None and threshold == 0)}")
                            print()

                    # Filter for this facet
                    facet_configs = [c for c in scale_configs
                                    if (c['X_Th_%le'] == threshold or (c['X_Th_%le'] == 'None' and threshold == 0))
                                    and float(c['X_Shape']) == shape_param]

                    if (threshold == 0 or shape_param == 0):
                        print(len(facet_configs))

                    # First pass: Plot all percentiles as heatmap (background)
                    for config in facet_configs:
                        # Get risk of ruin at year 25
                        # ror_25 = config['risk_of_ruin'].get(25, config['risk_of_ruin'].get('25'))

                        # Calculate log of policy limit for x-axis
                        if config.get('NOINS', False):
                            x_value = 0  # or np.nan if you want to exclude NOINS from plot
                        else:
                            pol_lim = config.get('Pol_Lim', [''])[0] if isinstance(
                                config.get('Pol_Lim'), list) else config.get('Pol_Lim', '')
                            x_value = np.log10(float(pol_lim))

                        # Plot all percentiles from growth_rate_ci as small dots
                        if 'growth_rate_ci' in config and config['growth_rate_ci']:
                            for pct_key, growth_value in config['growth_rate_ci'].items():
                                # Convert key to float percentile
                                try:
                                    pct = float(pct_key)
                                    color = cmap(pct)

                                    # Plot as small dot for heatmap effect
                                    ax.scatter(x_value, growth_value,
                                                c=[color],
                                                s=15,
                                                alpha=0.6,
                                                edgecolors='none')
                                except (ValueError, TypeError):
                                    continue

                    # Second pass: Overlay specific percentiles with shapes
                    for config in facet_configs:
                        # Get risk of ruin at year 25
                        # ror_25 = config['risk_of_ruin'].get(25, config['risk_of_ruin'].get('25'))

                        # Calculate log of policy limit for x-axis
                        if config.get('NOINS', False):
                            x_value = 0  # or np.nan if you want to exclude NOINS from plot
                        else:
                            pol_lim = config.get('Pol_Lim', [''])[0] if isinstance(
                                config.get('Pol_Lim'), list) else config.get('Pol_Lim', '')
                            x_value = np.log10(float(pol_lim))

                        # Determine shape based on limit
                        if config.get('NOINS', False):
                            marker = shape_map['NOINS']
                            marker_size = 150
                        else:
                            pol_lim = config.get('Pol_Lim', [''])[0] if isinstance(
                                config.get('Pol_Lim'), list) else config.get('Pol_Lim', '')
                            marker = shape_map.get(pol_lim, 'x')
                            marker_size = 120

                        # Plot specific percentiles with shapes
                        if 'growth_rate_ci' in config and config['growth_rate_ci']:
                            for pct in highlight_percentiles:
                                pct_key = f"{pct:.2f}" if f"{pct:.2f}" in config['growth_rate_ci'] else pct
                                if pct_key in config['growth_rate_ci']:
                                    growth = config['growth_rate_ci'][pct_key]
                                    color = cmap(pct)

                                    ax.scatter(x_value, growth,
                                                marker=marker,
                                                c=[color],
                                                s=marker_size,
                                                alpha=0.6,
                                                edgecolors='black',
                                                linewidths=1.5)

                        # Plot mean in red
                        if 'growth_rate' in config:
                            growth_mean = config['growth_rate']
                            ax.scatter(x_value, growth_mean,
                                        marker=marker,
                                        c='red',
                                        s=marker_size,
                                        alpha=0.6,
                                        edgecolors='black',
                                        linewidths=1.5,
                                        zorder=10)

                    # Formatting
                    ax.set_xlabel('Policy Limit (log scale)', fontsize=10)
                    ax.set_ylabel('Growth Rate', fontsize=10)
                    ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)

                    # Set title for each subplot
                    thresh_label = threshold if threshold != 'None' else 'No Tail'
                    ax.set_title(f'Threshold: {thresh_label}\nShape: {shape_param}',
                                fontsize=9)

                    # Set axis limits
                    # ax.set_xlim(x_min, x_max)
                    ax.set_xlim(np.log10(50000000* 0.9),
                                np.log10(500000000 * 1.1))
                    ax.set_ylim(y_min, y_max)

                    # Set x-ticks to show actual policy limit values
                    ax.set_xticks([np.log10(50e6), np.log10(
                        100e6), np.log10(200e6), np.log10(500e6)])
                    ax.set_xticklabels(['$50M', '$100M', '$200M', '$500M'])

            # Create legends
            # Shape legend
            shape_elements = []
            for limit_name, limit_value in [  # ('No Insurance', 'NOINS'),
                    ('$50M', 50000000),
                    ('$100M', 100000000),
                    ('$200M', 200000000),
                    ('$500M', 500000000)]:
                shape_elements.append(Line2D([0], [0], marker=shape_map[limit_value],
                                            color='w', markerfacecolor='gray',
                                            markersize=10, label=limit_name,
                                            markeredgecolor='black', linewidth=1))

            # Color legend (percentiles + mean)
            color_elements = []
            for pct in legend_percentiles:
                color_elements.append(mpatches.Patch(color=cmap(pct),
                                                    label=f'{int(pct*100)}th %ile',
                                                    edgecolor='black',
                                                    linewidth=0.5))
            color_elements.append(mpatches.Patch(color='red',
                                                label='Mean',
                                                edgecolor='black',
                                                linewidth=0.5))

            # Add legends to the figure
            fig.legend(handles=shape_elements, loc='upper left',
                        bbox_to_anchor=(0.0, 0.0), title='Policy Limit',
                        frameon=False, fontsize=9, ncol=len(shape_elements))
            fig.legend(handles=color_elements, loc='upper right',
                        bbox_to_anchor=(1.0, 0.0), title='Percentile',
                        frameon=False, fontsize=9, ncol=len(color_elements))

            plt.tight_layout(rect=[0, 0, 1, 0.99])

            # Save figure
            filename = f'cache/efficiency_frontier_scale_{str(scale).replace(".", "p")}_lr_{str(loss_ratio).replace(".", "p")}.png'
            plt.savefig(filename, dpi=300, bbox_inches='tight')
            print(f'Saved: {filename}')
            plt.close()


create_efficiency_frontier_plots(ins_only_vals)

  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.set_ylim(y_min, y_max)
  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.set_ylim(y_min, y_max)
  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.set_ylim(y_min, y_max)
  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.set_ylim(y_min, y_max)
  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.set_ylim(y_min, y_max)
  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.set_ylim(y_min, y_max)
  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.set_ylim(y_min, y_max)
  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.set_ylim(y_min, y_max)
  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.set_ylim(y_min, y_max)
  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.set_ylim(y_min, y_max)
  ax.scatter(x_value, growth,
  ax.scatter(x_value, growth_mean,
  ax.

Saved: cache/efficiency_frontier_scale_1_lr_0p6.png


## Plot Individual Graphs of Growth vs Limit

In [24]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np

from matplotlib.lines import Line2D
from scipy.interpolate import interp1d

# Assuming your data is sto#cc0000 in a list called 'configurations'
# configurations = [{'risk_of_ruin': {...}, 'growth_rate': ..., 'X_Scale': ..., ...}, ...]


def create_efficiency_frontier_plots_base_only(configurations):
    """
    Create efficiency frontier plots for actuarial simulation results.

    Parameters:
    -----------
    configurations : list of dict
        List of configuration dictionaries with simulation results

    Output:
    -------
    Saves PNG files for each X_Scale with efficiency frontier plots.
    """

    # Get unique values for separate charts
    x_scales = sorted(set(c['X_Scale'] for c in configurations))
    lr = sorted(set(c['LR'] for c in configurations))
    caps = sorted(set(c['Cap'] for c in configurations))

    # Define percentiles for overlay
    highlight_percentiles = [0.50, 0.75, 0.90, 0.95, 0.99]
    legend_percentiles = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
    all_percentiles = np.arange(0.01, 1.00, 0.01).tolist()

    for cap in caps:
        # Create a plot for each X_Scale
        for scale in x_scales:
            for loss_ratio in lr:
                if scale != 1.0 or loss_ratio != 0.6:
                    continue

                config_subset = [c for c in configurations if c['Cap'] == cap and c['X_Scale'] == scale and c['LR'] == loss_ratio]
                # Get unique values for faceting
                x_shapes = sorted(set(float(c['X_Shape']) for c in config_subset))
                x_shapes = [s for s in x_shapes if s != 0] # Filter out 0
                base_shape = 2.5
                x_shapes = [base_shape]
                x_thresholds = sorted({(c['X_Th_%le'] if isinstance(c['X_Th_%le'], (float, np.floating)) else 0.0) for c in config_subset})
                x_thresholds = [s for s in x_thresholds if s != 0] # Filter out 0
                base_threshold = 0.001
                x_thresholds = [base_threshold]

                # Filter configurations for this scale
                scale_configs = config_subset

                # Create figure with subplots
                n_rows = len(x_thresholds)
                n_cols = len(x_shapes)

                fig, axes = plt.subplots(n_rows, n_cols,
                                        figsize=(5*3, 4*2),
                                        dpi=300,
                                        squeeze=False)

                # Set overall title
                scale_label = f"{float(scale):.1f}x" if scale != '1' else "Baseline"
                lr_pct = f"{loss_ratio*100:.0f}%"
                threshold_pct = f"{base_threshold*100:.2f}%"
                fig.suptitle(f'Efficiency Frontier (Init Cap: {cap/1_000_000:.0f}M; Scale: {scale_label}; Loss Ratio: {lr_pct}; Tail Threshold: {threshold_pct}; Shape: {base_shape})',
                            fontsize=16, fontweight='bold', y=0.995)

                # Create colormap for percentiles
                cmap = plt.cm.viridis

                # Plot each facet
                for i, threshold in enumerate(x_thresholds):
                    for j, shape_param in enumerate(x_shapes):
                        ax = axes[i, j]

                        # Debug: Check what values we're comparing
                        if threshold == 0 or shape_param == 0:
                            print(
                                f"\n=== Debug for threshold={threshold}, shape_param={shape_param} ===")
                            print(
                                f"shape_param type: {type(shape_param)}, value: {repr(shape_param)}")
                            print(
                                f"threshold type: {type(threshold)}, value: {repr(threshold)}")

                            # Sample a few configs to see their actual values
                            for c in scale_configs[:3]:
                                print(
                                    f"  X_Shape: {repr(c['X_Shape'])} (type: {type(c['X_Shape'])})")
                                print(
                                    f"  X_Th_%le: {repr(c['X_Th_%le'])} (type: {type(c['X_Th_%le'])})")
                                print(
                                    f"  float(c['X_Shape']) == shape_param: {float(c['X_Shape']) == shape_param}")
                                print(
                                    f"  Threshold match: {c['X_Th_%le'] == threshold or (c['X_Th_%le'] is None and threshold == 0)}")
                                print()

                        # Filter for this facet
                        facet_configs = [c for c in scale_configs
                                        if (c['X_Th_%le'] == threshold or (c['X_Th_%le'] == 'None' and threshold == 0))
                                        and float(c['X_Shape']) == shape_param]

                        if (threshold == 0 or shape_param == 0):
                            print(len(facet_configs))

                        # Intermediate pass - Create smooth gradient using pcolormesh
                        # Organize data by percentile
                        percentile_data = {pct: [] for pct in all_percentiles}

                        for config in facet_configs:
                            # Calculate log of policy limit for x-axis
                            if config.get('NOINS', False):
                                continue  # Skip NOINS
                            else:
                                pol_lim = config.get('Pol_Lim', [''])[0] if isinstance(
                                    config.get('Pol_Lim'), list) else config.get('Pol_Lim', '')
                                x_value = np.log10(float(pol_lim))
                            
                            # Collect percentile values
                            if 'growth_rate_ci' in config and config['growth_rate_ci']:
                                for pct in all_percentiles:
                                    pct_key = f"{pct:.2f}" if f"{pct:.2f}" in config['growth_rate_ci'] else pct
                                    if pct_key in config['growth_rate_ci']:
                                        growth = config['growth_rate_ci'][pct_key]
                                        percentile_data[pct].append((x_value, growth, float(pol_lim)))

                        # Get unique x values (policy limits) and sort
                        unique_x_values = sorted(set(p[0] for pct_list in percentile_data.values() for p in pct_list))

                        if len(unique_x_values) > 1:
                            # Create interpolation grid with many points for smooth gradient
                            x_interp = np.linspace(min(unique_x_values), max(unique_x_values), 200)
                            
                            # For each percentile, interpolate growth rates across policy limits
                            from scipy.interpolate import interp1d
                            
                            y_grid = []
                            valid_percentiles = []
                            
                            for pct in sorted(all_percentiles):
                                if len(percentile_data[pct]) > 1:
                                    # Sort by x value
                                    sorted_data = sorted(percentile_data[pct], key=lambda p: p[0])
                                    x_data = np.array([p[0] for p in sorted_data])
                                    y_data = np.array([p[1] for p in sorted_data])
                                    
                                    # Interpolate
                                    f = interp1d(x_data, y_data, kind='linear', fill_value='extrapolate')
                                    y_interp = f(x_interp)
                                    y_grid.append(y_interp)
                                    valid_percentiles.append(pct)
                            
                            if len(y_grid) > 1:
                                # Convert to 2D array: rows = percentiles, columns = x positions
                                y_grid = np.array(y_grid)
                                
                                # Create meshgrid for pcolormesh
                                X, Y = np.meshgrid(x_interp, valid_percentiles)
                                
                                # Use pcolormesh with the colormap
                                mesh = ax.pcolormesh(X, y_grid, Y, 
                                                    cmap=cmap, 
                                                    shading='gouraud',  # Smooth interpolation
                                                    vmin=0, vmax=1,
                                                    zorder=5)

                        # Second pass: Overlay specific percentiles with shapes
                        for config in facet_configs:
                            # Get risk of ruin at year 25
                            # ror_25 = config['risk_of_ruin'].get(25, config['risk_of_ruin'].get('25'))

                            # Calculate log of policy limit for x-axis
                            if config.get('NOINS', False):
                                x_value = 0  # or np.nan if you want to exclude NOINS from plot
                            else:
                                pol_lim = config.get('Pol_Lim', [''])[0] if isinstance(
                                    config.get('Pol_Lim'), list) else config.get('Pol_Lim', '')
                                x_value = np.log10(float(pol_lim))

                            # Plot mean in #cc0000
                            if 'growth_rate' in config:
                                growth_mean = config['growth_rate']
                                ax.scatter(x_value, growth_mean,
                                            marker='o',
                                            c='#cc0000',
                                            alpha=1.0,
                                            edgecolors='#cc0000',
                                            linewidths=1.5,
                                            zorder=10)


                        ### Draw lines connecting means across limits #######
                        # Collect (x, growth_rate, raw_limit) tuples to connect mean growth rates
                        mean_points = []
                        
                        for config in facet_configs:
                            # Skip NOINS for connecting line (only plot actual policy limits)
                            if config.get('NOINS', False):
                                continue
                            pol_lim = config.get('Pol_Lim', [''])[0] if isinstance(
                                config.get('Pol_Lim'), list) else config.get('Pol_Lim', '')
                            try:
                                x_value = np.log10(float(pol_lim))
                            except (TypeError, ValueError):
                                continue
                            if 'growth_rate' in config and config['growth_rate'] is not None:
                                mean_points.append((x_value, float(config['growth_rate']), float(pol_lim)))
                        
                        if len(mean_points) > 1:
                            # Sort by actual (non-log) policy limit to ensure correct ordering
                            sorted_points = sorted(mean_points, key=lambda p: p[2])
                            x_coords = [p[0] for p in sorted_points]
                            y_coords = [p[1] for p in sorted_points]
                            ax.plot(x_coords, y_coords,
                                    color='#cc0000',
                                    linewidth=2,
                                    alpha=1.0,
                                    zorder=9)

                        # Formatting
                        ax.set_xlabel('Policy Limit (log scale)', fontsize=10)
                        ax.set_ylabel('Growth Rate', fontsize=10)
                        ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)

                        # Set title for each subplot
                        # thresh_label = threshold if threshold != 'None' else 'No Tail'
                        # ax.set_title(f'Threshold: {thresh_label}\nShape: {shape_param}',
                        #             fontsize=9)

                        # Set axis limits
                        # ax.set_xlim(x_min, x_max)
                        ax.set_xlim(np.log10(25000000* 0.9),
                                    np.log10(500000000 * 1.1))
                        # ax.set_ylim(y_min, y_max)

                        # Set x-ticks to show actual policy limit values
                        ax.set_xticks([np.log10(25e6),
                                        np.log10(50e6),
                                        np.log10(75e6),
                                        np.log10(100e6), 
                                        np.log10(150e6),
                                        np.log10(250e6),
                                        np.log10(350e6),
                                        np.log10(500e6)])
                        ax.set_xticklabels(['$25M',
                                            '$50M',
                                            '$75M', 
                                            '$100M', 
                                            '$150M',
                                            '$250M', 
                                            '$350M',
                                            '$500M'])

                # Color legend (percentiles + mean)
                color_elements = []
                for pct in legend_percentiles:
                    color_elements.append(mpatches.Patch(color=cmap(pct),
                                                        label=f'{int(pct*100)}th %ile',
                                                        edgecolor='black',
                                                        linewidth=0.5))
                color_elements.append(mpatches.Patch(color='#cc0000',
                                                    label='Mean',
                                                    edgecolor='#cc0000',
                                                    linewidth=0.5))

                fig.legend(handles=color_elements, loc='upper center',
                            bbox_to_anchor=(0.5, 0.0), title='Percentile',
                            frameon=False, fontsize=9, ncol=len(color_elements))

                plt.tight_layout(rect=[0, 0, 1, 0.99])

                # Save figure
                filename = f'cache/efficiency_frontier_base_cap({cap/1_000_000:.0f}M).png'
                plt.savefig(filename, dpi=300, bbox_inches='tight')
                print(f'Saved: {filename}')
                plt.close()

                # Create histogram of annual aggregate losses (memory-efficient)
                total_count = 0
                mean = 0.0
                M2 = 0.0
                min_val = np.inf
                max_val = -np.inf

                # First pass: compute stats and data range without materializing all losses
                for config in facet_configs:
                    if 'annual_losses' in config and config['annual_losses'] is not None:
                        # annual_losses is a list of 250K scenarios, each with 25 years
                        for scenario in config['annual_losses']:
                            arr = np.log10(np.clip(np.asarray(scenario, dtype=np.float64), a_min=1, a_max=None))
                            if arr.size == 0:
                                continue

                            # Parallel Welford merge for batch stats
                            n_before = total_count
                            chunk_n = int(arr.size)
                            total_count = n_before + chunk_n
                            chunk_mean = float(arr.mean())
                            chunk_M2 = float(arr.var(ddof=0) * chunk_n)
                            delta = chunk_mean - mean
                            if total_count > 0:
                                mean += delta * (chunk_n / total_count)
                                M2 += chunk_M2 + (delta * delta) * (chunk_n * n_before / total_count)

                            # Range
                            vmin = float(arr.min())
                            vmax = float(arr.max())
                            if vmin < min_val:
                                min_val = vmin
                            if vmax > max_val:
                                max_val = vmax

                if np.isfinite(min_val) and np.isfinite(max_val) and total_count > 0:
                    # Create histogram figure
                    fig_hist, ax_hist = plt.subplots(1, 1, figsize=(12, 8), dpi=300)

                    # Define bins
                    bins = 100
                    if max_val == min_val:
                        max_val = min_val + 1.0
                    bin_edges = np.linspace(min_val, max_val, bins + 1)

                    # Second pass: accumulate histogram counts without loading all data at once
                    counts = np.zeros(bins, dtype=np.int64)
                    for config in facet_configs:
                        if 'annual_losses' in config and config['annual_losses'] is not None:
                            for scenario in config['annual_losses']:
                                arr = np.log10(np.clip(np.asarray(scenario, dtype=np.float64), a_min=1, a_max=None))
                                if arr.size == 0:
                                    continue
                                c, _ = np.histogram(arr, bins=bin_edges)
                                counts += c

                    # Plot histogram using precomputed counts
                    bin_widths = np.diff(bin_edges)
                    bin_centers = bin_edges[:-1] + bin_widths / 2.0
                    ax_hist.bar(bin_centers, counts, width=bin_widths, color='steelblue',
                                edgecolor='black', alpha=0.7, align='center')

                    # Formatting
                    ax_hist.set_xlabel('Annual Aggregate Loss', fontsize=12)
                    ax_hist.set_ylabel('Frequency', fontsize=12)
                    ax_hist.set_title(f'Distribution of Annual Aggregate Losses\nScale: {scale_label} - Loss Ratio: {lr_pct} - Cap: ${cap/1e6:.0f}M',
                                        fontsize=14, fontweight='bold')
                    ax_hist.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)

                    # Compute median approximately from histogram
                    cumsum = np.cumsum(counts)
                    mid = 0.5 * total_count
                    median_val = bin_centers[0]
                    idx = int(np.searchsorted(cumsum, mid))
                    if 0 <= idx < bins:
                        left_count = cumsum[idx - 1] if idx > 0 else 0
                        within = counts[idx]
                        if within > 0:
                            frac = (mid - left_count) / within
                            frac = np.clip(frac, 0.0, 1.0)
                            median_val = bin_edges[idx] + frac * (bin_edges[idx + 1] - bin_edges[idx])
                        else:
                            median_val = bin_centers[idx]

                    # Add statistics text box
                    stats_text = f'N = {total_count:,}\n'
                    stats_text += f'Mean = ${mean:,.0f}\n'
                    std_val = np.sqrt(M2 / total_count) if total_count > 0 else 0.0
                    stats_text += f'Median = ${median_val:,.0f}\n'
                    stats_text += f'Std Dev = ${std_val:,.0f}\n'
                    stats_text += f'Max = ${max_val:,.0f}'

                    ax_hist.text(0.98, 0.97, stats_text,
                                transform=ax_hist.transAxes,
                                verticalalignment='top',
                                horizontalalignment='right',
                                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
                                fontsize=10)

                    plt.tight_layout()

                    # Save histogram
                    hist_filename = f'cache/histogram_base_cap({cap/1_000_000:.0f}M).png'
                    plt.savefig(hist_filename, dpi=300, bbox_inches='tight')
                    print(f'Saved: {hist_filename}')
                    plt.close()


create_efficiency_frontier_plots_base_only(ins_only_vals)

  color_elements.append(mpatches.Patch(color=cmap(pct),
  color_elements.append(mpatches.Patch(color='#cc0000',


Saved: cache/efficiency_frontier_base_cap(25M).png
Saved: cache/histogram_base_cap(25M).png


  color_elements.append(mpatches.Patch(color=cmap(pct),
  color_elements.append(mpatches.Patch(color='#cc0000',


Saved: cache/efficiency_frontier_base_cap(50M).png
Saved: cache/histogram_base_cap(50M).png
