# Code for Figures 10, 11, and 12

## Libraries

In [0]:
import pandas as pd
import xarray as xr
import pathlib as pl
import h5py
import time
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd

##Settings

In [0]:
ras_data_dir = "/dbfs/mnt/lwi-common/LWI_Production_forPurdue/LWI_Production_TCs_forPurdue/"
elev_data_file = "/dbfs/mnt/lwi-transition-zone/data/pilot_reanalysis/no/prcp/ras_output/Amite_20200114.p01.tmp.hdf"
out_data_dir = "/dbfs/mnt/lwi-transition-zone/data/pilot_reanalysis/prod_no_aleatory/"
prob_mass_dir = '/dbfs/FileStore/LWI_hydro/prob_masses/prob_masses.csv'
amite_geo_dir =  '/Volumes/lwi/default/transition-zone-data/pilot_reanalysis/output_data'
hdf_data_dir = pl.Path('/dbfs/mnt/lwi-transition-zone/data/pilot_reanalysis/no/prcp/ras_output') #Maybe delete if not used!

antecedent_conds = [5,25,50,75,95]
events_per_ac = 100
recurrence_rate = 1.184297
non_trop_recurrence_rate = 44/18.0
ac_probs = [0.12888,0.274451,0.194052,0.294581,0.108036]
#bias is modelled-true
abs_bias=0.4223155
abs_sd= 2.020001
#relative bias is expectation of (modelled-true)/modelled
rel_bias= -0.07017379
#rel sd is sd of modelled-true)
rel_sd = 0.448964

prob_mass_file = pl.Path(prob_mass_dir)
prob_mass_df = pd.read_csv(prob_mass_file)
storm_ids = prob_mass_df.storm_id.values




### Functions

In [0]:
def calc_annualized_cdf_val(in_df, recurrence_rate, cum_prob_col):
    return np.exp(-recurrence_rate * (1 - in_df[cum_prob_col]))

def get_coordinates_at_percentage(line, percentage):
    if not 0 <= percentage <= 1:
        raise ValueError("Percentage must be between 0 and 1.")
    
    total_length = line.length
    target_distance = total_length * percentage
    point = line.interpolate(target_distance)
    return point.x, point.y

def find_closest_ras_ids(geo_frame, coordinates_list):
    closest_ras_ids = []
    for target_x, target_y in coordinates_list:
        distances = np.sqrt((geo_frame['x'] - target_x) ** 2 + (geo_frame['y'] - target_y) ** 2)
        closest_ras_ids.append(geo_frame.loc[distances.idxmin(), 'ras_id'])
    return closest_ras_ids

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import gaussian_kde

def kde_quantile(data, quantile, prob_mass=None, bounds=(0, 100), bandwidth_method='scott',
                n_points=1000, reflection_padding=True, weight_method='resample', 
                n_resample=None, random_seed=None):
    """
    Estimate quantile from weighted KDE with tuning options
    
    Parameters:
    -----------
    data : array-like
        The data points
    quantile : float
        The quantile to estimate (between 0 and 1)
    prob_mass : array-like, optional
        Probability masses for each data point. If None, uniform weights are used.
        Will be normalized to sum to 1.
    bounds : tuple
        (min, max) bounds for the data
    bandwidth_method : str or float
        Bandwidth method for KDE:
        - 'scott': Scott's rule (default)
        - 'silverman': Silverman's rule  
        - float: Custom bandwidth factor (e.g., 0.5, 1.5)
    n_points : int
        Number of evaluation points for the KDE (default: 1000)
        Higher values give smoother results but are slower
    reflection_padding : bool
        Whether to use reflection padding at boundaries (default: True)
        Helps with boundary effects but adds computation
    weight_method : str
        Method for handling weights:
        - 'resample': Resample data points based on weights (default, robust)
        - 'direct': Directly weight the kernel contributions (more precise)
        - 'hybrid': Use both approaches and average (slower but most accurate)
    n_resample : int, optional
        Number of resampled points for 'resample' method. 
        If None, uses max(1000, len(data) * 10)
    random_seed : int, optional
        Random seed for reproducible resampling
    
    Returns:
    --------
    float : The estimated quantile value
    """
    if len(data) == 0 or np.all(np.isnan(data)):
        return np.nan
    
    # Convert to numpy arrays
    data = np.asarray(data)
    
    # Handle probability masses
    if prob_mass is None:
        prob_mass = np.ones(len(data))
    else:
        prob_mass = np.asarray(prob_mass)
        if len(prob_mass) != len(data):
            raise ValueError("prob_mass must have same length as data")
    
    # Remove NaN values and corresponding weights
    valid_mask = ~np.isnan(data) & ~np.isnan(prob_mass) & (prob_mass > 0)
    clean_data = data[valid_mask]
    clean_weights = prob_mass[valid_mask]
    
    if len(clean_data) == 0:
        return np.nan
    
    # Normalize weights to sum to 1
    clean_weights = clean_weights / np.sum(clean_weights)
    
    # Ensure data is within bounds
    clean_data = np.clip(clean_data, bounds[0], bounds[1])
    
    # If all values are the same, return that value
    if np.std(clean_data) == 0:
        return clean_data[0]
    
    # Set random seed for reproducibility
    if random_seed is not None:
        np.random.seed(random_seed)
    
    # Apply reflection padding if requested
    if reflection_padding:
        range_width = bounds[1] - bounds[0]
        reflected_data = []
        reflected_weights = []
        
        # Original data
        reflected_data.extend(clean_data)
        reflected_weights.extend(clean_weights)
        
        # Reflect across lower bound
        lower_reflected = bounds[0] - (clean_data - bounds[0])
        lower_mask = lower_reflected >= bounds[0] - range_width
        reflected_data.extend(lower_reflected[lower_mask])
        reflected_weights.extend(clean_weights[lower_mask])
        
        # Reflect across upper bound
        upper_reflected = bounds[1] + (bounds[1] - clean_data)
        upper_mask = upper_reflected <= bounds[1] + range_width
        reflected_data.extend(upper_reflected[upper_mask])
        reflected_weights.extend(clean_weights[upper_mask])
        
        clean_data = np.array(reflected_data)
        clean_weights = np.array(reflected_weights)
        
        # Renormalize weights after reflection
        clean_weights = clean_weights / np.sum(clean_weights)
    
    # Handle different weighting methods
    if weight_method == 'resample':
        # Resample based on weights
        if n_resample is None:
            n_resample = max(1000, len(clean_data) * 10)
        
        sample_indices = np.random.choice(len(clean_data), size=n_resample, 
                                        replace=True, p=clean_weights)
        weighted_samples = clean_data[sample_indices]
        
        # Create KDE
        if isinstance(bandwidth_method, str):
            kde = gaussian_kde(weighted_samples, bw_method=bandwidth_method)
        else:
            kde = gaussian_kde(weighted_samples)
            kde.set_bandwidth(kde.factor * bandwidth_method)
        
        # Evaluate KDE
        x_eval = np.linspace(bounds[0], bounds[1], n_points)
        pdf_vals = kde(x_eval)
    
    elif weight_method == 'direct':
        # Direct weighted kernel approach
        x_eval = np.linspace(bounds[0], bounds[1], n_points)
        pdf_vals = _calculate_weighted_pdf(clean_data, clean_weights, x_eval, bandwidth_method)
    
    elif weight_method == 'hybrid':
        # Combine both methods
        x_eval = np.linspace(bounds[0], bounds[1], n_points)
        
        # Resample method
        if n_resample is None:
            n_resample = max(1000, len(clean_data) * 10)
        sample_indices = np.random.choice(len(clean_data), size=n_resample, 
                                        replace=True, p=clean_weights)
        weighted_samples = clean_data[sample_indices]
        
        if isinstance(bandwidth_method, str):
            kde = gaussian_kde(weighted_samples, bw_method=bandwidth_method)
        else:
            kde = gaussian_kde(weighted_samples)
            kde.set_bandwidth(kde.factor * bandwidth_method)
        
        pdf_vals_resample = kde(x_eval)
        
        # Direct method
        pdf_vals_direct = _calculate_weighted_pdf(clean_data, clean_weights, x_eval, bandwidth_method)
        
        # Average the two approaches
        pdf_vals = (pdf_vals_resample + pdf_vals_direct) / 2
    
    else:
        raise ValueError("weight_method must be 'resample', 'direct', or 'hybrid'")
    
    # Normalize to ensure it integrates to 1 over the bounded domain
    dx = x_eval[1] - x_eval[0]
    pdf_vals = pdf_vals / np.trapz(pdf_vals, dx=dx)
    
    # Create CDF
    cdf_vals = np.cumsum(pdf_vals) * dx
    
    # Find quantile by interpolation
    if quantile <= 0:
        return bounds[0]
    elif quantile >= 1:
        return bounds[1]
    else:
        return np.interp(quantile, cdf_vals, x_eval)


def _calculate_weighted_pdf(data, weights, x_eval, bandwidth_method):
    """Helper function to calculate weighted PDF directly"""
    # Get bandwidth from scipy's KDE
    temp_kde = gaussian_kde(data, bw_method=bandwidth_method)
    if isinstance(bandwidth_method, str):
        bandwidth = temp_kde.factor * np.std(data)
    else:
        bandwidth = bandwidth_method * temp_kde.factor * np.std(data)
    
    # Calculate weighted PDF manually
    pdf_vals = np.zeros_like(x_eval)
    for i, xi in enumerate(data):
        # Gaussian kernel
        kernel_vals = np.exp(-0.5 * ((x_eval - xi) / bandwidth) ** 2)
        kernel_vals /= (bandwidth * np.sqrt(2 * np.pi))
        pdf_vals += weights[i] * kernel_vals
    
    return pdf_vals


# Simplified version that matches your original function signature
def kde_quantile_simple(data, quantile, prob_mass=None, bounds=(0, 100), 
                       bandwidth_method='scott', n_points=1000, reflection_padding=True):
    """
    Simplified version that works exactly like the original function.
    Just calls the full version with default weight settings.
    """
    return kde_quantile(data, quantile, prob_mass=prob_mass, bounds=bounds,
                       bandwidth_method=bandwidth_method, n_points=n_points,
                       reflection_padding=reflection_padding, weight_method='resample')


# Additional utility functions for weight handling
def validate_and_normalize_weights(data, weights, method='normalize'):
    """
    Utility function to handle different weight validation/normalization approaches
    
    Parameters:
    -----------
    data : array-like
        The data points
    weights : array-like
        The weights
    method : str
        - 'normalize': Normalize to sum to 1 (default)
        - 'clip': Clip negative weights to 0, then normalize
        - 'abs': Take absolute value, then normalize
        - 'strict': Raise error if any weight <= 0
    """
    data = np.asarray(data)
    weights = np.asarray(weights)
    
    if len(weights) != len(data):
        raise ValueError("Weights must have same length as data")
    
    if method == 'strict':
        if np.any(weights <= 0):
            raise ValueError("All weights must be positive")
        return weights / np.sum(weights)
    
    elif method == 'clip':
        weights = np.clip(weights, 0, np.inf)
        if np.sum(weights) == 0:
            raise ValueError("All weights are zero after clipping")
        return weights / np.sum(weights)
    
    elif method == 'abs':
        weights = np.abs(weights)
        if np.sum(weights) == 0:
            raise ValueError("All weights are zero after taking absolute value")
        return weights / np.sum(weights)
    
    elif method == 'normalize':
        if np.sum(weights) == 0:
            raise ValueError("Weights sum to zero")
        return weights / np.sum(weights)
    
    else:
        raise ValueError("method must be 'normalize', 'clip', 'abs', or 'strict'")


def effective_sample_size(weights):
    """
    Calculate the effective sample size for weighted data
    ESS = (sum of weights)^2 / (sum of squared weights)
    """
    weights = np.asarray(weights)
    normalized_weights = weights / np.sum(weights)
    return 1.0 / np.sum(normalized_weights**2)


def weight_statistics(weights):
    """
    Get statistics about the weight distribution
    """
    weights = np.asarray(weights)
    normalized_weights = weights / np.sum(weights)
    
    return {
        'ess': effective_sample_size(weights),
        'weight_variance': np.var(normalized_weights),
        'max_weight': np.max(normalized_weights),
        'min_weight': np.min(normalized_weights),
        'weight_entropy': -np.sum(normalized_weights * np.log(normalized_weights + 1e-16)),
        'concentration_ratio': np.max(normalized_weights) / np.mean(normalized_weights)
    }

## Load Data
1. Load the compound flooding tropical cyclone data as 'storm_data_w_id'.
2. Load the the storm surage only data as storm_data_surge_uncert'

In [0]:
#Tropical cyclone depth data for ras_id (model grit points) with prob weight and storm id 
storm_data_w_id = pd.read_parquet(f'{out_data_dir}/wse_dist_tc_no_uncert_vectorized_bartlett_edits.parquet')
storm_data_surge_uncert = pd.read_parquet(f'{out_data_dir}/wse_dist_tc_no_uncert_surge_only_vectorized_bartlett_edits.parquet')
storm_data_w_id

### Elevations & Amite River Line

In [0]:
with h5py.File(elev_data_file) as f:
    elevs = np.array(list(f['Geometry']['2D Flow Areas']['AmiteMaurepas']['Cells Minimum Elevation']))
    xy = list(f['Geometry']['2D Flow Areas']['AmiteMaurepas']['Cells Center Coordinate'])

geo_frame = pd.DataFrame(xy, columns = ['x','y'])
geo_frame['ras_id'] = range(1,len(elevs)+1)
geo_frame['elevs'] = elevs

wkt_string = 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Albers"],PARAMETER["false_easting",0.0],PARAMETER["false_northing",0.0],PARAMETER["central_meridian",-96.0],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["latitude_of_origin",23.0],UNIT["Foot_US",0.3048006096012192]]'
amite_line = gpd.read_file(f'{amite_geo_dir}/amite_line_pass_manchac_centered_userpoints.json').to_crs(wkt_string)

### Plot River Line
Plot the river line and calculate coordinates for every 5 km along the river. Also retreive a list of coordinates for points at 5 km increments along the Amite river line, moving from upstream to downstream through Lake Maurepas to the outlet to the inlet at Lake Ponchartrain.

In [0]:

total_length_km = amite_line.geometry.iloc[0].length * 0.0003048
percentages = [p / total_length_km for p in range(0, 116, 5)]
coordinates_list = [get_coordinates_at_percentage(amite_line.geometry.iloc[0], p) for p in percentages]

# Plot the line and the coordinates
plt.figure(figsize=(10, 6))
x, y = amite_line.geometry.iloc[0].xy
plt.plot(x, y, label='Amite Line')

for i, coords in enumerate(coordinates_list):
    plt.scatter(*coords, label=f'Point at {int(percentages[i]*100)}%', zorder=5)

plt.xlabel('X Coordinate')
plt.ylabel('Y Coordinate')
plt.title('Amite Line with Points at Specified Percentages')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.grid(True)
plt.text(1.05, 0.5, f'Total Length: {total_length_km:.2f} km', transform=plt.gca().transAxes, fontsize=12, verticalalignment='center')
plt.show()

coordinates_list


In [0]:

unique_ras_ids = storm_data_w_id['ras_id'].unique()
geo_frame_filtered = geo_frame[geo_frame['ras_id'].isin(unique_ras_ids)]
geo_frame_filtered

# Example usage
coordinates_list = coordinates_list  # Use the list of coordinates from the previous cell
closest_ras_ids = find_closest_ras_ids(geo_frame_filtered, coordinates_list)
closest_ras_ids


Extract data for just the closest_ras_ids, those ras_ids (cells) intersected with the points of interest every 5 km along the Amite river transect.

In [0]:
storm_data_w_id_filtered = storm_data_w_id[storm_data_w_id['ras_id'].isin(closest_ras_ids)]
storm_data_w_id_filtered = storm_data_w_id_filtered.rename(columns={"depth": "depth_raw"})
storm_data_w_id_filtered


Calculate the CDF and the return period. Also calculate the bias adjusted depth, though this is not used later.

In [0]:
grouped_table = storm_data_w_id_filtered.groupby(['ras_id']).apply(
    lambda g: g.sort_values('depth_raw', ascending=True, na_position='first')
).reset_index(drop=True)

storm_data_all_uncert = grouped_table

storm_data_all_uncert['cum_prob'] = storm_data_all_uncert.groupby('ras_id')['prob'].cumsum()

storm_data_all_uncert['annualized_cdf_val'] = calc_annualized_cdf_val(storm_data_all_uncert, recurrence_rate, 'cum_prob')
storm_data_all_uncert['return_period'] = 1/(1-storm_data_all_uncert['annualized_cdf_val'])

storm_data_all_uncert['bias_abs'] = abs_bias
storm_data_all_uncert['bias_rel'] = rel_bias*storm_data_all_uncert['depth_raw']
storm_data_all_uncert['bias_bounded'] = np.minimum(np.abs(storm_data_all_uncert['bias_abs']),np.abs(storm_data_all_uncert['bias_rel']))*np.sign(storm_data_all_uncert['bias_abs'])
storm_data_all_uncert['depth_adj'] = np.maximum(storm_data_all_uncert['depth_raw'] - storm_data_all_uncert['bias_bounded'],0)

Extract data for just the ras_ids coincidental with the points of interest along the Amite River Transect.

In [0]:
cdf_data_list = [
    storm_data_all_uncert[storm_data_all_uncert['ras_id'] == ras_id].copy()
    for ras_id in closest_ras_ids
]

Extract events withint +/- 0.5 inches (roughly 0.042 ft) for a series of return period events of interest

In [0]:
delta = 0.042
return_periods = [10, 50, 100, 500]
cdf_data_frames = []

# Pre-merge once per df (not inside inner loop!)
storm_merge = storm_data_surge_uncert[['storm_id', 'ras_id', 'depth_raw']].rename(
    columns={'depth_raw': 'depth_storm_raw'}
)

for idx, df in enumerate(cdf_data_list):

    df_merged = df.merge(storm_merge, on=['storm_id', 'ras_id'], how='left', validate='m:1')

    # Precompute min surge depth for this ras_id.. subtract out to make everything relative to the 1-year event, approximately.
    min_depth_adj_row_surge = storm_data_surge_uncert.loc[
        storm_data_surge_uncert['ras_id'] == closest_ras_ids[idx], 'depth_raw'
    ].min()
                         
    df_list = []
    for rp in return_periods:
        # Faster closest row
        idx_closest = (df_merged['return_period'] - rp).abs().idxmin()
        depth_raw_value = df_merged.loc[idx_closest, 'depth_raw']

        # Filter efficiently
        mask = (df_merged['depth_raw'] >= depth_raw_value - delta) & (df_merged['depth_raw'] <= depth_raw_value + delta)
        cdf_data_rp = df_merged.loc[mask].copy()

        # Compute derived cols
        cdf_data_rp['depth_base_raw'] = cdf_data_rp['depth_raw'] - min_depth_adj_row_surge
        cdf_data_rp['depth_base_storm_raw'] = cdf_data_rp['depth_storm_raw'] - min_depth_adj_row_surge
        cdf_data_rp['depth_hydrologic'] = np.maximum(cdf_data_rp['depth_raw'] - cdf_data_rp['depth_storm_raw'], 0)
        cdf_data_rp['depth_hydrologic_percentage'] = (
            cdf_data_rp['depth_hydrologic'] / cdf_data_rp['depth_base_raw'] * 100
        )
        cdf_data_rp['depth_surge_percentage'] = (
            (cdf_data_rp['depth_base_raw']- cdf_data_rp['depth_hydrologic'])/ cdf_data_rp['depth_base_raw'] * 100
        )

        df_list.append(cdf_data_rp)

    cdf_data_frames.append(df_list)

#Figure 10

In [0]:
# --- Plot four panels (2x2 grid) ---
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

# Define CFTZ extents (km ranges) for each return period
cftz_extents = {
    10: [(2.853293586573366, 79.08269891412961)],
    50: [(20.800533476964578, 80.4122950467338)],
    100: [(20.832551236178276, 105.70525390401936)],
    500: [(27.38825964096338, 80.08948887578502),
          (91.4973959474711, 109.16834368793245)]
}

# KDE TUNING PARAMETERS
FULL_KDE_CONFIG = {
    # Original options
    'bandwidth_method': 'silverman',   # Bandwidth selection method
    'n_points': 1000,                  # KDE evaluation resolution
    'reflection_padding': False,       # Boundary reflection
    
    # Weight handling options  
    'weight_method': 'resample',       # How to handle weights
    'n_resample': None,                # Resample count (None = auto)
    'random_seed': 42,                 # Reproducibility seed
}

SMOOTH_KDE_CONFIG = {
    'bounds': (0, 100),
    'bandwidth_method': .75,           # Slightly wider bandwidth
    'n_points': 3000,                  # Higher resolution
    'reflection_padding': False,        # KEY for boundary smoothing
    'weight_method': 'direct',         # Smoother than resampling
    'n_resample': None,                # Not used with 'direct'
    'random_seed': 42,
}

# Define return periods and colors
panel1_rps = [10, 50]
panel2_rps = [100, 500]
# Keep original color scheme for left/right panels
panel1_colors = {10: 'tab:red', 50: 'tab:green'}  # 10-yr=red, 50-yr=green (was 100-yr color)
panel2_colors = {100: 'tab:orange', 500: 'tab:blue'}  # 100-yr=orange (was 50-yr color), 500-yr=blue
line_styles = {10: '-', 50: '-', 100: '-', 500: '-'}  # All solid lines

# Calculate distances along the river
total_length_km = amite_line.geometry.iloc[0].length * 0.0003048

# --- Prepare KDE-based stats for HYDROLOGIC data ---
kde_stats_hydro = {}
discrete_stats_hydro = {}  # Keep discrete stats for comparison

# Loop through each return period for hydrologic data
for rp in [10, 50, 100, 500]:
    xs_list, median_list, q25_list, q75_list, distance_list = [], [], [], [], []
    discrete_median_list, discrete_q25_list, discrete_q75_list = [], [], []
    
    for i, rp_dfs in enumerate(cdf_data_frames):
        # Find dataframe corresponding to this return period
        df = next(df_rp for df_rp, df_return in zip(rp_dfs, [10,50,100,500]) if df_return == rp)
        
        # Calculate distance along river for this point
        distance_km = (i * 5) * total_length_km / 115  # Convert point index to distance
        
        if not df.empty:
            data = df['depth_hydrologic_percentage'].values
            weights = df['prob']
            
            # KDE-based quantiles
            q25 = kde_quantile(data, 0.25, weights, **SMOOTH_KDE_CONFIG)
            median_val = kde_quantile(data, 0.50, weights, **SMOOTH_KDE_CONFIG)
            q75 = kde_quantile(data, 0.75, weights, **SMOOTH_KDE_CONFIG)
            
            # Discrete quantiles for comparison
            discrete_q25 = df['depth_hydrologic_percentage'].quantile(0.25)
            discrete_median = df['depth_hydrologic_percentage'].median()
            discrete_q75 = df['depth_hydrologic_percentage'].quantile(0.75)
        else:
            q25 = median_val = q75 = np.nan
            discrete_q25 = discrete_median = discrete_q75 = np.nan
            
        xs_list.append(i)
        distance_list.append(distance_km)
        median_list.append(median_val)
        q25_list.append(q25)
        q75_list.append(q75)
        
        discrete_median_list.append(discrete_median)
        discrete_q25_list.append(discrete_q25)
        discrete_q75_list.append(discrete_q75)
    
    # Store KDE results for hydrologic
    kde_stats_hydro[rp] = pd.DataFrame({
        'i': xs_list,
        'distance_km': distance_list,
        'median': median_list,
        'q25': q25_list,
        'q75': q75_list
    })
    
    # Store discrete results for comparison
    discrete_stats_hydro[rp] = pd.DataFrame({
        'i': xs_list,
        'distance_km': distance_list,
        'median': discrete_median_list,
        'q25': discrete_q25_list,
        'q75': discrete_q75_list
    })

# --- Prepare KDE-based stats for SURGE data ---
kde_stats_surge = {}
discrete_stats_surge = {}  # Keep discrete stats for comparison

# Loop through each return period for surge data
for rp in [10, 50, 100, 500]:
    xs_list, median_list, q25_list, q75_list, distance_list = [], [], [], [], []
    discrete_median_list, discrete_q25_list, discrete_q75_list = [], [], []
    
    for i, rp_dfs in enumerate(cdf_data_frames):
        # Find dataframe corresponding to this return period
        df = next(df_rp for df_rp, df_return in zip(rp_dfs, [10,50,100,500]) if df_return == rp)
        
        # Calculate distance along river for this point
        distance_km = (i * 5) * total_length_km / 115  # Convert point index to distance
        
        if not df.empty:
            data = df['depth_surge_percentage'].values  # CHANGED to surge data
            weights = df['prob']
            
            # KDE-based quantiles
            q25 = kde_quantile(data, 0.25, weights, **SMOOTH_KDE_CONFIG)
            median_val = kde_quantile(data, 0.50, weights, **SMOOTH_KDE_CONFIG)
            q75 = kde_quantile(data, 0.75, weights, **SMOOTH_KDE_CONFIG)
            
            # Discrete quantiles for comparison
            discrete_q25 = df['depth_surge_percentage'].quantile(0.25)
            discrete_median = df['depth_surge_percentage'].median()
            discrete_q75 = df['depth_surge_percentage'].quantile(0.75)
        else:
            q25 = median_val = q75 = np.nan
            discrete_q25 = discrete_median = discrete_q75 = np.nan
            
        xs_list.append(i)
        distance_list.append(distance_km)
        median_list.append(median_val)
        q25_list.append(q25)
        q75_list.append(q75)
        
        discrete_median_list.append(discrete_median)
        discrete_q25_list.append(discrete_q25)
        discrete_q75_list.append(discrete_q75)
    
    # Store KDE results for surge
    kde_stats_surge[rp] = pd.DataFrame({
        'i': xs_list,
        'distance_km': distance_list,
        'median': median_list,
        'q25': q25_list,
        'q75': q75_list
    })
    
    # Store discrete results for comparison
    discrete_stats_surge[rp] = pd.DataFrame({
        'i': xs_list,
        'distance_km': distance_list,
        'median': discrete_median_list,
        'q25': discrete_q25_list,
        'q75': discrete_q75_list
    })

def add_cftz_bracket(ax, df_rp, rp, position="top"):
    """Draw horizontal bracket at top or bottom with vertical connectors to curve."""
    y_vals = df_rp['median'].values
    for (xmin, xmax) in cftz_extents[rp]:
        # get curve section
        mask = (df_rp['distance_km'] >= xmin) & (df_rp['distance_km'] <= xmax)
        if not mask.any():
            continue
        x_section = df_rp['distance_km'][mask]
        y_section = y_vals[mask]

        # vertical connector positions
        y_min, y_max = np.nanmin(y_section), np.nanmax(y_section)

        if position == "top":
            y_line = 100*1.05 #ax.get_ylim()[1] * 1.05  # above axis
            va, offset = "bottom", .5
        else:
            y_line = -10 #ax.get_ylim()[0] - (ax.get_ylim()[1] - ax.get_ylim()[0]) * 0.05  # below axis
            va, offset = "top", -.5

        # draw horizontal bracket line
        ax.hlines(y_line, xmin, xmax, colors="black", linestyles="-", alpha=0.5)

        # vertical connectors
        ax.vlines([xmin, xmax], [y_max, y_max] if position=="top" else [y_min, y_min],
                  y_line, colors="gray", linestyles="-", alpha=0.5)

        # label in middle of bracket
        ax.text((xmin + xmax)/2, y_line + offset*(ax.get_ylim()[1]-ax.get_ylim()[0])/100,
                f"CFTZ ({rp}-yr)", color="k", ha="center", va=va, fontsize=10)

# ---------------- TOP LEFT Panel: Hydrologic 10-yr (bottom), 50-yr (top) ----------------
ax = axes[0, 0]
for i, rp in enumerate(panel1_rps):
    df_rp = kde_stats_hydro[rp]
    markers = ['s', 'o']
    ax.plot(df_rp['distance_km'], df_rp['median'], color=panel1_colors[rp],
            linestyle=line_styles[rp], marker=markers[i], markerfacecolor='none',
            label=f'{rp}-yr', markersize=7)
    ax.fill_between(df_rp['distance_km'], df_rp['q25'], df_rp['q75'],
                    color=panel1_colors[rp], alpha=0.2)

add_cftz_bracket(ax, kde_stats_hydro[10], 10, position="top")
add_cftz_bracket(ax, kde_stats_hydro[50], 50, position="bottom")

ax.set_ylabel(
    'Hydrologic-attributed Depth (%):\n Median and 25th–75th interquartile range',
    fontsize=16
)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=12)
ax.tick_params(axis='both', which='major', labelsize=14)
ax.set_title('Hydrologic Contribution', fontsize=14, fontweight='bold')

# ---------------- TOP RIGHT Panel: Hydrologic 100-yr (bottom), 500-yr (top) ----------------
ax = axes[0, 1]
for i, rp in enumerate(panel2_rps):
    df_rp = kde_stats_hydro[rp]
    markers = ['s', 'o']
    ax.plot(df_rp['distance_km'], df_rp['median'], color=panel2_colors[rp],
            linestyle=line_styles[rp], marker=markers[i], markerfacecolor='none',
            label=f'{rp}-yr', markersize=7)
    ax.fill_between(df_rp['distance_km'], df_rp['q25'], df_rp['q75'],
                    color=panel2_colors[rp], alpha=0.2)

add_cftz_bracket(ax, kde_stats_hydro[100], 100, position="top")
add_cftz_bracket(ax, kde_stats_hydro[500], 500, position="bottom")

ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=12, loc='lower left')
ax.tick_params(axis='both', which='major', labelsize=14)
ax.set_title('Hydrologic Contribution', fontsize=14, fontweight='bold')

# ---------------- BOTTOM LEFT Panel: Surge 10-yr (bottom), 50-yr (top) ----------------
ax = axes[1, 0]
for i, rp in enumerate(panel1_rps):
    df_rp = kde_stats_surge[rp]
    markers = ['s', 'o']
    ax.plot(df_rp['distance_km'], df_rp['median'], color=panel1_colors[rp],
            linestyle=line_styles[rp], marker=markers[i], markerfacecolor='none',
            label=f'{rp}-yr', markersize=7)
    ax.fill_between(df_rp['distance_km'], df_rp['q25'], df_rp['q75'],
                    color=panel1_colors[rp], alpha=0.2)

add_cftz_bracket(ax, kde_stats_surge[10], 10, position="top")
add_cftz_bracket(ax, kde_stats_surge[50], 50, position="bottom")

ax.set_xlabel('Distance Along River (km)', fontsize=16)
ax.set_ylabel(
    'Surge-attributed Depth (%):\n Median and 25th–75th interquartile range',
    fontsize=16
)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=12, loc='lower right')
ax.tick_params(axis='both', which='major', labelsize=14)
ax.set_title('Surge Contribution', fontsize=14, fontweight='bold')

# ---------------- BOTTOM RIGHT Panel: Surge 100-yr (bottom), 500-yr (top) ----------------
ax = axes[1, 1]
for i, rp in enumerate(panel2_rps):
    df_rp = kde_stats_surge[rp]
    markers = ['s', 'o']
    ax.plot(df_rp['distance_km'], df_rp['median'], color=panel2_colors[rp],
            linestyle=line_styles[rp], marker=markers[i], markerfacecolor='none',
            label=f'{rp}-yr', markersize=7)
    ax.fill_between(df_rp['distance_km'], df_rp['q25'], df_rp['q75'],
                    color=panel2_colors[rp], alpha=0.2)

add_cftz_bracket(ax, kde_stats_surge[100], 100, position="top")
add_cftz_bracket(ax, kde_stats_surge[500], 500, position="bottom")

ax.set_xlabel('Distance Along River (km)', fontsize=16)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=12, loc='lower left')
ax.tick_params(axis='both', which='major', labelsize=14)
ax.set_title('Surge Contribution', fontsize=14, fontweight='bold')

# ---------------- Shared formatting ----------------
# Share y-axes within rows
axes[0, 0].sharey(axes[0, 1])  # Top row shares y-axis
axes[1, 0].sharey(axes[1, 1])  # Bottom row shares y-axis

plt.tight_layout()
plt.savefig("hydrologic_and_surge_contribution_with_CFTZ_brackets.pdf", bbox_inches="tight")
plt.show()

# Other Figure 10 Variations (NOT USED IN PAPER)

In [0]:
# --- Plot two panels ---
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Define CFTZ extents (km ranges) for each return period
cftz_extents = {
    10: [(2.853293586573366, 79.08269891412961)],
    50: [(20.800533476964578, 80.4122950467338)],
    100: [(20.832551236178276, 105.70525390401936)],
    500: [(27.38825964096338, 80.08948887578502),
          (91.4973959474711, 109.16834368793245)]
}


# KDE TUNING PARAMETERS
#KDE_CONFIG = {
#    'bandwidth_method': 'silverman',  # 'scott', 'silverman', or float (e.g., 0.5, 1.5)
#    'n_points': 1000,            # Resolution of KDE evaluation
#    'reflection_padding': False,   # Use reflection at boundaries
#}

FULL_KDE_CONFIG = {
    # Original options
    'bandwidth_method': 'silverman',   # Bandwidth selection method
    'n_points': 1000,                  # KDE evaluation resolution
    'reflection_padding': False,       # Boundary reflection
    
    # Weight handling options  
    'weight_method': 'resample',       # How to handle weights
    'n_resample': None,                # Resample count (None = auto)
    'random_seed': 42,                 # Reproducibility seed
}

SMOOTH_KDE_CONFIG = {
    'bounds': (0, 100),
    'bandwidth_method': .75,           # Slightly wider bandwidth
    'n_points': 3000,                  # Higher resolution
    'reflection_padding': False,        # KEY for boundary smoothing
    'weight_method': 'direct',         # Smoother than resampling
    'n_resample': None,                # Not used with 'direct'
    'random_seed': 42,
}

# Define return periods and colors
panel1_rps = [10, 50]
panel2_rps = [100, 500]
# Keep original color scheme for left/right panels
panel1_colors = {10: 'tab:red', 50: 'tab:green'}  # 10-yr=red, 50-yr=green (was 100-yr color)
panel2_colors = {100: 'tab:orange', 500: 'tab:blue'}  # 100-yr=orange (was 50-yr color), 500-yr=blue
line_styles = {10: '-', 50: '-', 100: '-', 500: '-'}  # All solid lines


# Calculate distances along the river
total_length_km = amite_line.geometry.iloc[0].length * 0.0003048

# --- Prepare KDE-based stats ---
kde_stats = {}
discrete_stats = {}  # Keep discrete stats for comparison

# Loop through each return period
for rp in [10, 50, 100, 500]:
    xs_list, median_list, q25_list, q75_list, distance_list = [], [], [], [], []
    discrete_median_list, discrete_q25_list, discrete_q75_list = [], [], []
    
    for i, rp_dfs in enumerate(cdf_data_frames):
        # Find dataframe corresponding to this return period
        df = next(df_rp for df_rp, df_return in zip(rp_dfs, [10,50,100,500]) if df_return == rp)
        
        # Calculate distance along river for this point
        distance_km = (i * 5) * total_length_km / 115  # Convert point index to distance
        
        if not df.empty:
            data = df['depth_hydrologic_percentage'].values

            weights = df['prob']
            
            # KDE-based quantiles
            q25 = kde_quantile(data, 0.25,weights, **SMOOTH_KDE_CONFIG)
            median_val = kde_quantile(data, 0.50,weights, **SMOOTH_KDE_CONFIG)
            q75 = kde_quantile(data, 0.75,weights, **SMOOTH_KDE_CONFIG)
            
            # Discrete quantiles for comparison
            discrete_q25 = df['depth_hydrologic_percentage'].quantile(0.25)
            discrete_median = df['depth_hydrologic_percentage'].median()
            discrete_q75 = df['depth_hydrologic_percentage'].quantile(0.75)
        else:
            q25 = median_val = q75 = np.nan
            discrete_q25 = discrete_median = discrete_q75 = np.nan
            
        xs_list.append(i)
        distance_list.append(distance_km)
        median_list.append(median_val)
        q25_list.append(q25)
        q75_list.append(q75)
        
        discrete_median_list.append(discrete_median)
        discrete_q25_list.append(discrete_q25)
        discrete_q75_list.append(discrete_q75)
    
    # Store KDE results
    kde_stats[rp] = pd.DataFrame({
        'i': xs_list,
        'distance_km': distance_list,
        'median': median_list,
        'q25': q25_list,
        'q75': q75_list
    })
    
    # Store discrete results for comparison
    discrete_stats[rp] = pd.DataFrame({
        'i': xs_list,
        'distance_km': distance_list,
        'median': discrete_median_list,
        'q25': discrete_q25_list,
        'q75': discrete_q75_list
    })

def add_cftz_bracket(ax, df_rp, rp, position="top"):
    """Draw horizontal bracket at top or bottom with vertical connectors to curve."""
    y_vals = df_rp['median'].values
    for (xmin, xmax) in cftz_extents[rp]:
        # get curve section
        mask = (df_rp['distance_km'] >= xmin) & (df_rp['distance_km'] <= xmax)
        if not mask.any():
            continue
        x_section = df_rp['distance_km'][mask]
        y_section = y_vals[mask]

        # vertical connector positions
        y_min, y_max = np.nanmin(y_section), np.nanmax(y_section)

        if position == "top":
            y_line = 100*1.05 #ax.get_ylim()[1] * 1.05  # above axis
            va, offset = "bottom", .5
        else:
            y_line = -10 #ax.get_ylim()[0] - (ax.get_ylim()[1] - ax.get_ylim()[0]) * 0.05  # below axis
            va, offset = "top", -.5

        # draw horizontal bracket line
        ax.hlines(y_line, xmin, xmax, colors="black", linestyles="-", alpha=0.5)

        # vertical connectors
        ax.vlines([xmin, xmax], [y_max, y_max] if position=="top" else [y_min, y_min],
                  y_line, colors="gray", linestyles="-", alpha=0.5)

        # label in middle of bracket
        ax.text((xmin + xmax)/2, y_line + offset*(ax.get_ylim()[1]-ax.get_ylim()[0])/100,
                f"CFTZ ({rp}-yr)", color="k", ha="center", va=va, fontsize=10)

# ---------------- Panel 1: 10-yr (bottom), 50-yr (top) ----------------
ax = axes[0]
for i, rp in enumerate(panel1_rps):
    df_rp = kde_stats[rp]
    markers = ['s', 'o']
    ax.plot(df_rp['distance_km'], df_rp['median'], color=panel1_colors[rp],
            linestyle=line_styles[rp], marker=markers[i], markerfacecolor='none',
            label=f'{rp}-yr', markersize=7)
    ax.fill_between(df_rp['distance_km'], df_rp['q25'], df_rp['q75'],
                    color=panel1_colors[rp], alpha=0.2)

add_cftz_bracket(ax, kde_stats[10], 10, position="top")
add_cftz_bracket(ax, kde_stats[50], 50, position="bottom")

ax.set_xlabel('Distance Along River (km)', fontsize=16)
ax.set_ylabel(
    'Hydrologic-attributed Depth (%):\n Median and 25th–75th interquartile range',
    fontsize=16
)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=12)
ax.tick_params(axis='both', which='major', labelsize=14)

# ---------------- Panel 2: 100-yr (bottom), 500-yr (top) ----------------
ax = axes[1]
for i, rp in enumerate(panel2_rps):
    df_rp = kde_stats[rp]
    markers = ['s', 'o']
    ax.plot(df_rp['distance_km'], df_rp['median'], color=panel2_colors[rp],
            linestyle=line_styles[rp], marker=markers[i], markerfacecolor='none',
            label=f'{rp}-yr', markersize=7)
    ax.fill_between(df_rp['distance_km'], df_rp['q25'], df_rp['q75'],
                    color=panel2_colors[rp], alpha=0.2)

add_cftz_bracket(ax, kde_stats[100], 100, position="top")
add_cftz_bracket(ax, kde_stats[500], 500, position="bottom")

ax.set_xlabel('Distance Along River (km)', fontsize=16)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=12, loc='lower left')
ax.tick_params(axis='both', which='major', labelsize=14)

# ---------------- Shared formatting ----------------
axes[0].sharey(axes[1])
plt.tight_layout()
plt.savefig("hydrologic_contribution_with_CFTZ_brackets.pdf", bbox_inches="tight")
plt.show()

In [0]:
import matplotlib.pyplot as plt
import numpy as np

# return periods and colors (consistent with earlier plots)
return_periods = [10, 50, 100, 500]
colors = ['tab:red', 'tab:orange', 'tab:green', 'tab:blue']

# storage for results
medians_by_rp = {rp: [] for rp in return_periods}

# loop through each i (point index)
for i, rp_dfs in enumerate(cdf_data_frames):
    for rp, df in zip(return_periods, rp_dfs):
        if not df.empty:
            df=df.sort_values(by='depth_hydrologic_percentage').reset_index(drop=True)
            df['normalized_prob'] = df['prob'].cumsum() / df['prob'].sum()

            median_val = np.interp(0.5, df['normalized_prob'], df['depth_hydrologic_percentage'])
            medians_by_rp[rp].append((i, median_val))
        else:
            medians_by_rp[rp].append((i, np.nan))

# --- Plot ---
plt.figure(figsize=(10,6))
for rp, color in zip(return_periods, colors):
    xs = [pt[0] for pt in medians_by_rp[rp]]
    ys = [pt[1] for pt in medians_by_rp[rp]]
    plt.plot(xs, ys, marker='o', color=color, label=f'{rp}-yr')

plt.xlabel('Point Index (i)', fontsize=12)
plt.ylabel('Median of Hydrologic-attributed Depth (%)', fontsize=12)
plt.title('Medians of Hydrologic-attributed Depth % Across Transect Points', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(frameon=False, fontsize=10)
plt.tight_layout()
plt.show()



In [0]:
import matplotlib.pyplot as plt
import numpy as np

# return periods and colors (consistent with earlier plots)
return_periods = [10, 50, 100, 500]
colors = ['tab:red', 'tab:orange', 'tab:green', 'tab:blue']

# storage for results
medians_by_rp = {rp: [] for rp in return_periods}

# loop through each i (point index)
for i, rp_dfs in enumerate(cdf_data_frames):
    for rp, df in zip(return_periods, rp_dfs):
        if not df.empty:
            df=df.sort_values(by='depth_surge_percentage').reset_index(drop=True)
            df['normalized_prob'] = df['prob'].cumsum() / df['prob'].sum()

            median_val = np.interp(0.5, df['normalized_prob'], df['depth_surge_percentage'])
            medians_by_rp[rp].append((i, median_val))
        else:
            medians_by_rp[rp].append((i, np.nan))

# --- Plot ---
plt.figure(figsize=(10,6))
for rp, color in zip(return_periods, colors):
    xs = [pt[0] for pt in medians_by_rp[rp]]
    ys = [pt[1] for pt in medians_by_rp[rp]]
    plt.plot(xs, ys, marker='o', color=color, label=f'{rp}-yr')

plt.xlabel('Point Index (i)', fontsize=12)
plt.ylabel('Median of Hydrologic-attributed Depth (%)', fontsize=12)
plt.title('Medians of Hydrologic-attributed Depth % Across Transect Points', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(frameon=False, fontsize=10)
plt.tight_layout()
plt.show()

In [0]:
import matplotlib.pyplot as plt
import numpy as np

# return periods and colors (consistent with earlier plots)
return_periods = [500, 100, 50, 10]
colors = ['tab:blue', 'tab:green', 'tab:orange', 'tab:red']

# storage for results
medians_by_rp = {rp: [] for rp in return_periods}

# loop through each i (point index)
for i, rp_dfs in enumerate(cdf_data_frames):
    for rp, df in zip(return_periods, rp_dfs):
        if not df.empty:
            median_val = df['depth_hydrologic_percentage'].median()
            medians_by_rp[rp].append((i, median_val))
        else:
            medians_by_rp[rp].append((i, np.nan))

# --- Plot ---
plt.figure(figsize=(10,6))
for rp, color in zip(return_periods, colors):
    xs = [pt[0] for pt in medians_by_rp[rp]]
    ys = [pt[1] for pt in medians_by_rp[rp]]
    plt.plot(xs, ys, marker='o', color=color, label=f'{rp}-yr')

plt.xlabel('Point Index (i)', fontsize=12)
plt.ylabel('Median of Hydrologic-attributed Depth (%)', fontsize=12)
plt.title('Medians of Hydrologic-attributed Depth % Across Transect Points', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(frameon=False, fontsize=10)
plt.tight_layout()
plt.show()


In [0]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
from scipy.optimize import minimize_scalar

def kde_quantile(data, quantile, bounds=(0, 100)):
    """
    Estimate quantile from KDE with support on bounded interval
    """
    if len(data) == 0 or np.all(np.isnan(data)):
        return np.nan
    
    # Remove NaN values
    clean_data = data[~np.isnan(data)]
    if len(clean_data) == 0:
        return np.nan
    
    # Ensure data is within bounds
    clean_data = np.clip(clean_data, bounds[0], bounds[1])
    
    # If all values are the same, return that value
    if np.std(clean_data) == 0:
        return clean_data[0]
    
    # Create KDE
    kde = gaussian_kde(clean_data)
    
    # Create evaluation points within bounds
    x_eval = np.linspace(bounds[0], bounds[1], 1000)
    pdf_vals = kde(x_eval)
    
    # Normalize to ensure it integrates to 1 over the bounded domain
    pdf_vals = pdf_vals / np.trapz(pdf_vals, x_eval)
    
    # Create CDF
    cdf_vals = np.cumsum(pdf_vals) * (x_eval[1] - x_eval[0])
    
    # Find quantile by interpolation
    if quantile <= 0:
        return bounds[0]
    elif quantile >= 1:
        return bounds[1]
    else:
        return np.interp(quantile, cdf_vals, x_eval)

# return periods and colors (consistent with earlier plots)
return_periods = [500, 100, 50, 10]
colors = ['tab:blue', 'tab:green', 'tab:orange', 'tab:red']

# storage for results
stats_by_rp = {rp: [] for rp in return_periods}

# loop through each i (point index)
for i, rp_dfs in enumerate(cdf_data_frames):
    for rp, df in zip(return_periods, rp_dfs):
        if not df.empty:
            data = df['depth_hydrologic_percentage'].values
            
            # Use KDE-based quantiles
            q25 = kde_quantile(data, 0.25)
            median_val = kde_quantile(data, 0.50)
            q75 = kde_quantile(data, 0.75)
            
            stats_by_rp[rp].append((i, q25, median_val, q75))
        else:
            stats_by_rp[rp].append((i, np.nan, np.nan, np.nan))

# --- Plot ---
plt.figure(figsize=(10,6))
for rp, color in zip(return_periods, colors):
    xs = [pt[0] for pt in stats_by_rp[rp]]
    medians = [pt[2] for pt in stats_by_rp[rp]]
    q25s = [pt[1] for pt in stats_by_rp[rp]]
    q75s = [pt[3] for pt in stats_by_rp[rp]]

    # line for medians
    plt.plot(xs, medians, marker='o', color=color, label=f'{rp}-yr')

    # shaded interquartile range
    plt.fill_between(xs, q25s, q75s, color=color, alpha=0.2)

plt.xlabel('Point Index (i)', fontsize=12)
plt.ylabel('Median of Hydrologic-attributed Depth (%)', fontsize=12)
plt.title('KDE-Smoothed Medians with Interquartile Range of Hydrologic-attributed Depth %', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(frameon=False, fontsize=10)
plt.tight_layout()
plt.show()

# Figure 11
Examine the distribution of the hydrologic and storm surge attribution

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression

# --- Helper for regression fitting ---
def fit_and_predict(x, y):
    model = LinearRegression()
    model.fit(x.reshape(-1,1), y)
    order = np.argsort(x)
    return x[order], model.predict(x.reshape(-1,1))[order], model

# --- Simple weighted line function ---
def plot_weighted_line(ax, x, y, weights, color, label):
    """
    Plot regression line with constant thickness and varying opacity based on local weight density
    """
    # Fit regression
    x_sorted, y_pred, model = fit_and_predict(x, y)
    
    # Add 5% buffer to line extent
    x_range = x_sorted.max() - x_sorted.min()
    x_buffer = x_range * 0
    x_start = x_sorted.min() - x_buffer
    x_end = x_sorted.max() + x_buffer
    
    # Create many points along the line for smooth shading
    n_points = 100
    x_line = np.linspace(x_start, x_end, n_points)
    y_line = model.predict(x_line.reshape(-1, 1))
    
    # Calculate local density along the line
    densities = []
    for x_val in x_line:
        # Find points within a window around this x value
        window_size = x_range * 0.1  # 10% of data range
        distances = np.abs(x - x_val)
        nearby_mask = distances <= window_size
        
        if np.sum(nearby_mask) > 0:
            # Sum of weights of nearby points (higher = more density)
            local_density = np.sum(weights[nearby_mask])
        else:
            # If no points nearby, use minimum density
            local_density = 0
        
        densities.append(local_density)
    
    densities = np.array(densities)
    
    # Normalize densities to [0, 1] for alpha mapping
    if densities.max() > densities.min():
        densities_norm = (densities - densities.min()) / (densities.max() - densities.min())
    else:
        densities_norm = np.ones_like(densities) * 0.5
    
    print(f"{label}: density range {densities_norm.min():.3f} to {densities_norm.max():.3f}")
    
    # Plot line segments with varying alpha
    for i in range(len(x_line) - 1):
        # Alpha based on average density of this segment
        avg_density = (densities_norm[i] + densities_norm[i + 1]) / 2
        alpha = 0.2 + 0.8 * avg_density  # Range from 0.2 to 1.0
        
        # Plot this segment
        ax.plot([x_line[i], x_line[i + 1]], [y_line[i], y_line[i + 1]], 
                color=color, linewidth=8, alpha=alpha, 
                solid_capstyle='round', zorder=5)
    
    # Plot thin black centerline over the entire line
    ax.plot(x_line, y_line, color='black', linewidth=0.5, alpha=1.0, 
            solid_capstyle='round', zorder=10)
    
    # Legend
    ax.plot([], [], color=color, linewidth=12, alpha=0.6, label=label)
    
    return x_sorted, y_pred, model

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Define total length in km
total_length_km = amite_line.geometry.iloc[0].length * 0.0003048

# Loop through the four panels
for idx, ax in zip([4, 9, 12, 20], axes.flatten()):
    distance_km = int((idx * 5) * total_length_km / 115)

    #ax.set_aspect('equal', adjustable='box')
    
    # Define datasets with labels + colors
    df_10, df_50, df_100, df_500 = cdf_data_frames[idx]
    
    # Build datasets list
    datasets = [
        (df_500, '500-yr', 'tab:blue'),
        (df_100, '100-yr', 'tab:green'),
        (df_50, '50-yr', 'tab:orange'),
        (df_10, '10-yr', 'tab:red'),
    ]
    
    lines_info = []
    
    for df, label, color in datasets:
        x = df['depth_hydrologic'].values*0.3048 #ft to meters
        y = df['depth_base_storm_raw'].values*0.3048 #ft to meters
        
        # Normalize weights to create a proper PDF (sum to 1)
        raw_weights = df['prob'].values
        weights = raw_weights / raw_weights.sum()  # This is now a proper PDF
        
        print(f"{label}: PDF sum = {weights.sum():.6f}")  # Should be 1.0
        
        # Plot weighted line
        x_sorted, y_pred, model = plot_weighted_line(ax, x, y, weights, color, label)
        lines_info.append((x_sorted, y_pred, model, label, color, df))
    
    # Set axis limits with proper buffering
    all_x = np.concatenate([df['depth_hydrologic'].values for df, _, _ in datasets])
    all_y = np.concatenate([df['depth_base_storm_raw'].values for df, _, _ in datasets])
    
    x_range = (all_x.max() - all_x.min())
    y_range = (all_y.max() - all_y.min())

    ax.set_xlim(0 - 0.95*0.3048 , (all_x.min()+17)*0.3048)
    ax.set_ylim(0 - 0.95*0.3048, (all_y.min()+10)*0.3048)

    #ax.set_xlim(all_x.min() - 0.1 * x_range, all_x.max() + 0.1 * x_range)
    #ax.set_ylim(all_y.min() - 0.1 * y_range, all_y.max() + 0.1 * y_range)
    #ax.set_ylim(-.25, 12)
    
    # Set up the plot
    ax.set_xlabel('Hydrologic-attributed Depth, m', fontsize=17)
    ax.set_ylabel('Surge-attributed Depth, m', fontsize=17)
    ax.tick_params(axis='both', which='major', labelsize=14)
    ax.set_title(f'Design events at {distance_km:} km along the Amite River', fontsize=18)
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(frameon=False, fontsize=12, loc='upper right')
    
    # Add rotated text annotations
    for x_sorted, y_pred, model, label, color, df in lines_info:
        x_mid = x_sorted[0] #(x_sorted[0] + x_sorted[-1]) / 2
        y_mid = np.interp(x_mid, x_sorted, y_pred)
        
        data_slope = (y_pred[-1] - y_pred[0]) / (x_sorted[-1] - x_sorted[0])
        
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()
        bbox = ax.get_window_extent()
        width_display = bbox.width
        height_display = bbox.height
        
        x_scale = width_display / (xlim[1] - xlim[0])
        y_scale = height_display / (ylim[1] - ylim[0])
        
        visual_slope = data_slope * (y_scale / x_scale)
        angle_deg = np.degrees(np.arctan(visual_slope))+2.5
        
        avg_depth = (np.mean(df['depth_hydrologic'].values) + np.mean(df['depth_base_storm_raw'].values))*0.3048
        offset = 0.3*0.3048
        
        ax.text(
            x_mid,
            y_mid + offset,
            f'Compound Flood Depth {avg_depth:.2f} m',
            color='black',
            fontsize=12,
            rotation=angle_deg,
            ha='left',
            va='top',
            bbox=dict(facecolor='white', alpha=0.0, edgecolor='none'),
            zorder=11
        )

plt.tight_layout()
plt.savefig("design_storms_weighted.pdf", bbox_inches="tight")
plt.show()

### Other Variations of Figure 11 (NOT USED IN PAPER)

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression

# --- Helper for regression fitting ---
def fit_and_predict(x, y):
    model = LinearRegression()
    model.fit(x.reshape(-1,1), y)
    order = np.argsort(x)
    return x[order], model.predict(x.reshape(-1,1))[order], model


fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Define total length in km
total_length_km = amite_line.geometry.iloc[0].length * 0.0003048
# Loop through the four panels
for idx, ax in zip([4, 9, 12, 20], axes.flatten()):
    distance_km = int((idx * 5) * total_length_km / 115) # Convert point index to distance and round to nearest integer
    # Define datasets with labels + colors
    df_10, df_50, df_100, df_500 = cdf_data_frames[idx]
    # build datasets list
    datasets = [
        (df_500, '500-yr', 'tab:blue'),
        (df_100, '100-yr', 'tab:green'),
        (df_50, '50-yr', 'tab:orange'),
        (df_10, '10-yr', 'tab:red'),
    ]
    lines_info = []
    for df, label, color in datasets:
        x = df['depth_hydrologic'].values
        y = df['depth_base_storm_raw'].values
        # Scatter
        ax.scatter(x, y, color=color, alpha=0.4, s=20)
        # Regression
        x_sorted, y_pred, model = fit_and_predict(x, y)
        ax.plot(x_sorted, y_pred, color=color, linewidth=2, linestyle='--', label=label)
        lines_info.append((x_sorted, y_pred, model, label, color, df))
    # Set up the plot completely first
    ax.set_xlabel('Hydrologic-attributed Depth (ft)', fontsize=16)
    ax.set_ylabel('Surge-attributed Depth (ft)', fontsize=16)
    ax.tick_params(axis='both', which='major', labelsize=14)
    ax.set_title(f'Design events at {distance_km:} km along the Amite River', fontsize=17)
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(frameon=False, fontsize=12, loc='upper right')
    # Now that the plot is fully set up, calculate angles
    for x_sorted, y_pred, model, label, color, df in lines_info:
        # midpoint in data coordinates
        x_mid = (x_sorted[0] + x_sorted[-1]) / 2
        y_mid = np.interp(x_mid, x_sorted, y_pred)
        # Get the actual slope of the line in data coordinates
        data_slope = (y_pred[-1] - y_pred[0]) / (x_sorted[-1] - x_sorted[0])
        # Convert slope to visual angle accounting for axis scaling
        # Get the data-to-display transform
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()
        # Get figure size in display coordinates
        bbox = ax.get_window_extent()
        width_display = bbox.width
        height_display = bbox.height
        # Calculate scaling factors
        x_scale = width_display / (xlim[1] - xlim[0])
        y_scale = height_display / (ylim[1] - ylim[0])
        # Adjust slope for display coordinates
        visual_slope = data_slope * (y_scale / x_scale)
        angle_deg = np.degrees(np.arctan(visual_slope))
        avg_depth = np.mean(df['depth_hydrologic'].values) + np.mean(df['depth_base_storm_raw'].values)
        offset = 0.3  # Smaller vertical offset in data units - closer to line
        ax.text(
            x_mid,
            y_mid + offset,
            f'Compound Flood Depth {avg_depth:.2f} ft',
            color='black',
            fontsize=12,
            rotation=angle_deg,
            ha='center',
            va='center',
            bbox=dict(facecolor='white', alpha=0.5, edgecolor='none')
        )
plt.tight_layout()
plt.savefig("design_storms.pdf", bbox_inches="tight")
plt.show()

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression

# --- Helper for regression fitting ---
def fit_and_predict(x, y):
    model = LinearRegression()
    model.fit(x.reshape(-1,1), y)
    order = np.argsort(x)
    return x[order], model.predict(x.reshape(-1,1))[order], model

# --- Simple weighted line function ---
def plot_weighted_line(ax, x, y, weights, color, label):
    """
    Plot regression line with constant thickness and varying opacity based on local weight density
    """
    # Fit regression
    x_sorted, y_pred, model = fit_and_predict(x, y)
    
    # Add 5% buffer to line extent
    x_range = x_sorted.max() - x_sorted.min()
    x_buffer = x_range * 0.00
    x_start = x_sorted.min() - x_buffer
    x_end = x_sorted.max() + x_buffer
    
    # Create many points along the line for smooth shading
    n_points = 100
    x_line = np.linspace(x_start, x_end, n_points)
    y_line = model.predict(x_line.reshape(-1, 1))
    
    # Calculate local density along the line
    densities = []
    for x_val in x_line:
        # Find points within a window around this x value
        window_size = x_range * 0.1  # 10% of data range
        distances = np.abs(x - x_val)
        nearby_mask = distances <= window_size
        
        if np.sum(nearby_mask) > 0:
            # Sum of weights of nearby points (higher = more density)
            local_density = np.sum(weights[nearby_mask])
        else:
            # If no points nearby, use minimum density
            local_density = 0
        
        densities.append(local_density)
    
    densities = np.array(densities)
    
    # Normalize densities to [0, 1] for alpha mapping
    if densities.max() > densities.min():
        densities_norm = (densities - densities.min()) / (densities.max() - densities.min())
    else:
        densities_norm = np.ones_like(densities) * 0.5
    
    print(f"{label}: density range {densities_norm.min():.3f} to {densities_norm.max():.3f}")
    
    # Plot line segments with varying alpha
    for i in range(len(x_line) - 1):
        # Alpha based on average density of this segment
        avg_density = (densities_norm[i] + densities_norm[i + 1]) / 2
        alpha = 0.2 + 0.8 * avg_density  # Range from 0.2 to 1.0
        
        # Plot this segment
        ax.plot([x_line[i], x_line[i + 1]], [y_line[i], y_line[i + 1]], 
                color=color, linewidth=8, alpha=alpha, 
                solid_capstyle='round', zorder=5)
    
    # Plot thin black centerline over the entire line
    ax.plot(x_line, y_line, color='black', linewidth=1.5, alpha=1.0, 
            solid_capstyle='round', zorder=10)
    
    # Legend
    ax.plot([], [], color=color, linewidth=8, alpha=0.6, label=label)
    
    return x_sorted, y_pred, model

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Define total length in km
total_length_km = amite_line.geometry.iloc[0].length * 0.0003048

# Loop through the four panels
for idx, ax in zip([4, 9, 12, 20], axes.flatten()):
    distance_km = int((idx * 5) * total_length_km / 115)
    
    # Define datasets with labels + colors
    df_10, df_50, df_100, df_500 = cdf_data_frames[idx]
    
    # Build datasets list
    datasets = [
        (df_500, '500-yr', 'tab:blue'),
        (df_100, '100-yr', 'tab:green'),
        (df_50, '50-yr', 'tab:orange'),
        (df_10, '10-yr', 'tab:red'),
    ]
    
    lines_info = []
    
    for df, label, color in datasets:
        x = df['depth_hydrologic'].values
        y = df['depth_base_storm_raw'].values
        
        # Normalize weights to create a proper PDF (sum to 1)
        raw_weights = df['prob'].values
        weights = raw_weights / raw_weights.sum()  # This is now a proper PDF
        
        print(f"{label}: PDF sum = {weights.sum():.6f}")  # Should be 1.0
        
        # Plot weighted line
        x_sorted, y_pred, model = plot_weighted_line(ax, x, y, weights, color, label)
        lines_info.append((x_sorted, y_pred, model, label, color, df))
    
    # Set axis limits with proper buffering
    all_x = np.concatenate([df['depth_hydrologic'].values for df, _, _ in datasets])
    all_y = np.concatenate([df['depth_base_storm_raw'].values for df, _, _ in datasets])
    
    x_range = all_x.max() - all_x.min()
    y_range = all_y.max() - all_y.min()
    ax.set_xlim(all_x.min() - 0.1 * x_range, all_x.max() + 0.1 * x_range)
    ax.set_ylim(all_y.min() - 0.1 * y_range, all_y.max() + 0.1 * y_range)
    
    # Set up the plot
    ax.set_xlabel('Hydrologic-attributed Depth (ft)', fontsize=16)
    ax.set_ylabel('Surge-attributed Depth (ft)', fontsize=16)
    ax.tick_params(axis='both', which='major', labelsize=14)
    ax.set_title(f'Design events at {distance_km:} km along the Amite River', fontsize=17)
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(frameon=False, fontsize=12, loc='upper right')
    
    # Add rotated text annotations
    for x_sorted, y_pred, model, label, color, df in lines_info:
        x_mid = (x_sorted[0] + x_sorted[-1]) / 2
        y_mid = np.interp(x_mid, x_sorted, y_pred)
        
        data_slope = (y_pred[-1] - y_pred[0]) / (x_sorted[-1] - x_sorted[0])
        
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()
        bbox = ax.get_window_extent()
        width_display = bbox.width
        height_display = bbox.height
        
        x_scale = width_display / (xlim[1] - xlim[0])
        y_scale = height_display / (ylim[1] - ylim[0])
        
        visual_slope = data_slope * (y_scale / x_scale)
        angle_deg = np.degrees(np.arctan(visual_slope))
        
        avg_depth = np.mean(df['depth_hydrologic'].values) + np.mean(df['depth_base_storm_raw'].values)
        offset = 0.3
        
        ax.text(
            x_mid,
            y_mid + offset,
            f'Compound Flood Depth {avg_depth:.2f} ft',
            color='black',
            fontsize=12,
            rotation=angle_deg,
            ha='center',
            va='center',
            bbox=dict(facecolor='white', alpha=0.5, edgecolor='none')
        )

plt.tight_layout()
plt.savefig("design_storms_weighted.pdf", bbox_inches="tight")
plt.show()

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression

# --- Helper for regression fitting ---
def fit_and_predict(x, y):
    model = LinearRegression()
    model.fit(x.reshape(-1,1), y)
    order = np.argsort(x)
    return x[order], model.predict(x.reshape(-1,1))[order], model

# --- Data definitions ---
# unpack the four DataFrames from cdf_data_frames[4]
df_500, df_100, df_50, df_10 = cdf_data_frames[4]

# build datasets list
datasets = [
    (df_500, '500-yr', 'tab:blue'),
    (df_100, '100-yr', 'tab:green'),
    (df_50,  '50-yr',  'tab:orange'),
    (df_10,  '10-yr',  'tab:red'),
]

# --- Create side-by-side panels ---
fig, axes = plt.subplots(1, 2, figsize=(14,6))

# -------- Panel (a): Regression --------
ax = axes[0]
for df, label, color in datasets:
    x = df['depth_hydrologic'].values
    y = df['depth_base_storm_raw'].values
    
    # scatter
    ax.scatter(x, y, color=color, alpha=0.4, s=20, label=f'{label} data')
    
    # regression line
    x_sorted, y_pred, model = fit_and_predict(x, y)
    ax.plot(x_sorted, y_pred, color=color, linewidth=2, linestyle='--', label=f'{label} fit')

ax.set_xlabel('Hydrologic-attributed Depth (ft)', fontsize=12)
ax.set_ylabel('Surge-attributed Depth (ft)', fontsize=12)
ax.set_title('At 35 km Along the Amite River Transect', fontsize=14)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=10)

# -------- Panel (b): KDE distributions --------
ax = axes[1]
sns.kdeplot(df_500['depth_hydrologic_percentage'], fill=True, alpha=0.3, label='500-yr', color='tab:blue', ax=ax)
sns.kdeplot(df_100['depth_hydrologic_percentage'], fill=True, alpha=0.3, label='100-yr', color='tab:green', ax=ax)
sns.kdeplot(df_50['depth_hydrologic_percentage'],  fill=True, alpha=0.3, label='50-yr',  color='tab:orange', ax=ax)
sns.kdeplot(df_10['depth_hydrologic_percentage'],  fill=True, alpha=0.3, label='10-yr',  color='tab:red', ax=ax)

ax.set_xlabel('Hydrologic-attributed Depth (%)', fontsize=12)
ax.set_ylabel('Density', fontsize=12)
#ax.set_title('(b) Distribution of Hydrologic-attributed Depth', fontsize=14)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=10)

plt.tight_layout()
plt.show()

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression

# --- Helper for regression fitting ---
def fit_and_predict(x, y):
    model = LinearRegression()
    model.fit(x.reshape(-1,1), y)
    order = np.argsort(x)
    return x[order], model.predict(x.reshape(-1,1))[order], model

# --- Data definitions ---
# unpack the four DataFrames from cdf_data_frames[4]
df_500, df_100, df_50, df_10 = cdf_data_frames[12]

# build datasets list
datasets = [
    (df_500, '500-yr', 'tab:blue'),
    (df_100, '100-yr', 'tab:green'),
    (df_50,  '50-yr',  'tab:orange'),
    (df_10,  '10-yr',  'tab:red'),
]

# --- Create side-by-side panels ---
fig, axes = plt.subplots(1, 2, figsize=(14,6))

# -------- Panel (a): Regression --------
ax = axes[0]
for df, label, color in datasets:
    x = df['depth_hydrologic'].values
    y = df['depth_base_storm_raw'].values
    
    # scatter
    ax.scatter(x, y, color=color, alpha=0.4, s=20, label=f'{label} data')
    
    # regression line
    x_sorted, y_pred, model = fit_and_predict(x, y)
    ax.plot(x_sorted, y_pred, color=color, linewidth=2, linestyle='--', label=f'{label} fit')

ax.set_xlabel('Hydrologic-attributed Depth (ft)', fontsize=12)
ax.set_ylabel('Surge-attributed Depth (ft)', fontsize=12)
ax.set_title('At 35 km Along the Amite River Transect', fontsize=14)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=10)

# -------- Panel (b): KDE distributions --------
ax = axes[1]
sns.kdeplot(df_500['depth_hydrologic_percentage'], fill=True, alpha=0.3, label='500-yr', color='tab:blue', ax=ax)
sns.kdeplot(df_100['depth_hydrologic_percentage'], fill=True, alpha=0.3, label='100-yr', color='tab:green', ax=ax)
sns.kdeplot(df_50['depth_hydrologic_percentage'],  fill=True, alpha=0.3, label='50-yr',  color='tab:orange', ax=ax)
sns.kdeplot(df_10['depth_hydrologic_percentage'],  fill=True, alpha=0.3, label='10-yr',  color='tab:red', ax=ax)

ax.set_xlabel('Hydrologic-attributed Depth (%)', fontsize=12)
ax.set_ylabel('Density', fontsize=12)
#ax.set_title('(b) Distribution of Hydrologic-attributed Depth', fontsize=14)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=10)

plt.tight_layout()
plt.show()

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression

# --- Helper for regression fitting ---
def fit_and_predict(x, y):
    model = LinearRegression()
    model.fit(x.reshape(-1,1), y)
    order = np.argsort(x)
    return x[order], model.predict(x.reshape(-1,1))[order], model

# --- Data definitions ---
# unpack the four DataFrames from cdf_data_frames[0]
df_500, df_100, df_50, df_10 = cdf_data_frames[20]

# build datasets list
datasets = [
    (df_500, '500-yr', 'tab:blue'),
    (df_100, '100-yr', 'tab:green'),
    (df_50,  '50-yr',  'tab:orange'),
    (df_10,  '10-yr',  'tab:red'),
]

# --- Create side-by-side panels ---
fig, axes = plt.subplots(1, 2, figsize=(14,6))

# -------- Panel (a): Regression --------
ax = axes[0]
for df, label, color in datasets:
    x = df['depth_hydrologic'].values
    y = df['depth_base_storm_raw'].values
    
    # scatter
    ax.scatter(x, y, color=color, alpha=0.4, s=20, label=f'{label} data')


ax.set_xlabel('Hydrologic-attributed Depth (ft)', fontsize=12)
ax.set_ylabel('Surge-attributed Depth (ft)', fontsize=12)
ax.set_title('At 35 km Along the Amite River Transect', fontsize=14)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=10)

# -------- Panel (b): KDE distributions --------
ax = axes[1]
sns.kdeplot(df_500['depth_hydrologic_percentage'], fill=True, alpha=0.3, label='500-yr', color='tab:blue', ax=ax)
sns.kdeplot(df_100['depth_hydrologic_percentage'], fill=True, alpha=0.3, label='100-yr', color='tab:green', ax=ax)
sns.kdeplot(df_50['depth_hydrologic_percentage'],  fill=True, alpha=0.3, label='50-yr',  color='tab:orange', ax=ax)
sns.kdeplot(df_10['depth_hydrologic_percentage'],  fill=True, alpha=0.3, label='10-yr',  color='tab:red', ax=ax)

ax.set_xlabel('Hydrologic-attributed Depth (%)', fontsize=12)
ax.set_ylabel('Density', fontsize=12)
#ax.set_title('(b) Distribution of Hydrologic-attributed Depth', fontsize=14)
ax.grid(True, linestyle='--', alpha=0.6)
ax.legend(frameon=False, fontsize=10)

plt.tight_layout()
plt.show()

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression

# --- Helper for regression fitting ---
def fit_and_predict(x, y):
    model = LinearRegression()
    model.fit(x.reshape(-1,1), y)
    order = np.argsort(x)
    return x[order], model.predict(x.reshape(-1,1))[order], model

# --- Simple weighted line function ---
def plot_weighted_line(ax, x, y, weights, color, label, linestyle='-', linewidth=8):
    """
    Plot regression line with constant thickness and varying opacity based on local weight density
    """
    # Fit regression
    x_sorted, y_pred, model = fit_and_predict(x, y)
    
    # Add 5% buffer to line extent
    x_range = x_sorted.max() - x_sorted.min()
    x_buffer = x_range * 0.05
    x_start = x_sorted.min() - x_buffer
    x_end = x_sorted.max() + x_buffer
    
    # Create many points along the line for smooth shading
    n_points = 100
    x_line = np.linspace(x_start, x_end, n_points)
    y_line = model.predict(x_line.reshape(-1, 1))
    
    # Calculate local density along the line
    densities = []
    for x_val in x_line:
        # Find points within a window around this x value
        window_size = x_range * 0.1  # 10% of data range
        distances = np.abs(x - x_val)
        nearby_mask = distances <= window_size
        
        if np.sum(nearby_mask) > 0:
            # Sum of weights of nearby points (higher = more density)
            local_density = np.sum(weights[nearby_mask])
        else:
            # If no points nearby, use minimum density
            local_density = 0
        
        densities.append(local_density)
    
    densities = np.array(densities)
    
    # Normalize densities to [0, 1] for alpha mapping
    if densities.max() > densities.min():
        densities_norm = (densities - densities.min()) / (densities.max() - densities.min())
    else:
        densities_norm = np.ones_like(densities) * 0.5
    
    print(f"{label}: density range {densities_norm.min():.3f} to {densities_norm.max():.3f}")
    
    # Plot line segments with varying alpha
    for i in range(len(x_line) - 1):
        # Alpha based on average density of this segment
        avg_density = (densities_norm[i] + densities_norm[i + 1]) / 2
        alpha = 0.2 + 0.8 * avg_density  # Range from 0.2 to 1.0
        
        # Plot this segment
        ax.plot([x_line[i], x_line[i + 1]], [y_line[i], y_line[i + 1]], 
                color=color, linewidth=linewidth, alpha=alpha, linestyle=linestyle,
                solid_capstyle='round', zorder=5)
    
    # Plot thin black centerline over the entire line
    ax.plot(x_line, y_line, color='black', linewidth=0.5, alpha=1.0, 
            linestyle=linestyle, solid_capstyle='round', zorder=10)
    
    # Legend (adjust linewidth for legend)
    legend_linewidth = 12 if linewidth == 8 else 8
    ax.plot([], [], color=color, linewidth=legend_linewidth, alpha=0.6, 
            linestyle=linestyle, label=label)
    
    return x_sorted, y_pred, model

# --- Simple regression line function for top panel ---
def plot_simple_regression_line(ax, x, y, weights, color, linestyle, linewidth=3):
    """
    Plot simple regression line without weighted opacity effects
    """
    # Fit regression
    x_sorted, y_pred, model = fit_and_predict(x, y)
    
    # Add 5% buffer to line extent
    x_range = x_sorted.max() - x_sorted.min()
    x_buffer = x_range * 0.05
    x_start = x_sorted.min() - x_buffer
    x_end = x_sorted.max() + x_buffer
    
    # Create line points
    x_line = np.linspace(x_start, x_end, 100)
    y_line = model.predict(x_line.reshape(-1, 1))
    
    # Plot simple line
    ax.plot(x_line, y_line, color=color, linewidth=linewidth, 
            linestyle=linestyle, alpha=0.8, solid_capstyle='round')
    
    return x_sorted, y_pred, model

# --- Plot five panels: one wide top panel + 2x2 grid below ---
fig = plt.figure(figsize=(16, 16))

# Create subplot layout using gridspec for better control
gs = fig.add_gridspec(3, 2, height_ratios=[1, 1, 1], hspace=0.3, wspace=0.3)

# Top panel spans both columns
ax_top = fig.add_subplot(gs[0, :])

# Four panels in 2x2 grid below
axes = [fig.add_subplot(gs[1, 0]), fig.add_subplot(gs[1, 1]),
        fig.add_subplot(gs[2, 0]), fig.add_subplot(gs[2, 1])]

# Define total length in km
total_length_km = amite_line.geometry.iloc[0].length * 0.0003048

# Define locations and their corresponding line styles
locations = [4, 9, 12, 20]
location_styles = {
    4: '-',      # Solid
    9: '--',     # Dashed  
    12: '-.',    # Dash-dot
    20: ':'      # Dotted
}

# Colors for return periods
rp_colors = {'10-yr': 'tab:red', '50-yr': 'tab:orange', '100-yr': 'tab:green', '500-yr': 'tab:blue'}

# ================ TOP PANEL: All data combined ================
print("Processing top panel...")

# Collect all data for axis limits
all_x_combined = []
all_y_combined = []

# Plot all locations and return periods on the top panel
for idx in locations:
    distance_km = int((idx * 5) * total_length_km / 115)
    linestyle = location_styles[idx]
    
    # Get datasets for this location
    df_10, df_50, df_100, df_500 = cdf_data_frames[idx]
    
    datasets = [
        (df_10, '10-yr', rp_colors['10-yr']),
        (df_50, '50-yr', rp_colors['50-yr']),
        (df_100, '100-yr', rp_colors['100-yr']),
        (df_500, '500-yr', rp_colors['500-yr']),
    ]
    
    for df, rp_label, color in datasets:
        x = df['depth_hydrologic'].values
        y = df['depth_base_storm_raw'].values
        
        # Normalize weights
        raw_weights = df['prob'].values
        weights = raw_weights / raw_weights.sum()
        
        # Plot simple regression line
        plot_simple_regression_line(ax_top, x, y, weights, color, linestyle)
        
        # Collect data for axis limits
        all_x_combined.extend(x)
        all_y_combined.extend(y)

# Set axis limits for top panel
all_x_combined = np.array(all_x_combined)
all_y_combined = np.array(all_y_combined)
x_range = all_x_combined.max() - all_x_combined.min()
y_range = all_y_combined.max() - all_y_combined.min()
ax_top.set_xlim(all_x_combined.min() - 0.1 * x_range, all_x_combined.max() + 0.1 * x_range)
ax_top.set_ylim(all_y_combined.min() - 0.1 * y_range, all_y_combined.max() + 0.1 * y_range)

# Format top panel
ax_top.set_xlabel('Hydrologic-attributed Depth (ft)', fontsize=16)
ax_top.set_ylabel('Surge-attributed Depth (ft)', fontsize=16)
ax_top.tick_params(axis='both', which='major', labelsize=14)
ax_top.set_title('Combined View: All Return Periods and Locations', fontsize=18, fontweight='bold', pad=20)
ax_top.grid(True, linestyle='--', alpha=0.6)

# Create custom legend for top panel
legend_elements = []
# Return period colors
for rp in ['10-yr', '50-yr', '100-yr', '500-yr']:
    legend_elements.append(plt.Line2D([0], [0], color=rp_colors[rp], lw=4, alpha=0.8, label=rp))

# Location line styles  
legend_elements.append(plt.Line2D([0], [0], color='gray', lw=3, linestyle='-', label='4km (solid)'))
legend_elements.append(plt.Line2D([0], [0], color='gray', lw=3, linestyle='--', label='9km (dashed)'))
legend_elements.append(plt.Line2D([0], [0], color='gray', lw=3, linestyle='-.', label='12km (dash-dot)'))
legend_elements.append(plt.Line2D([0], [0], color='gray', lw=3, linestyle=':', label='20km (dotted)'))

ax_top.legend(handles=legend_elements, frameon=False, fontsize=11, ncol=2, loc='upper right')

# ============== ORIGINAL FOUR PANELS BELOW ================
print("Processing individual panels...")

# Loop through the four panels
for panel_idx, (idx, ax) in enumerate(zip(locations, axes)):
    distance_km = int((idx * 5) * total_length_km / 115)
    
    print(f"Processing panel {panel_idx + 1}: {distance_km}km")
    
    # Define datasets with labels + colors
    df_10, df_50, df_100, df_500 = cdf_data_frames[idx]
    
    # Build datasets list
    datasets = [
        (df_500, '500-yr', rp_colors['500-yr']),
        (df_100, '100-yr', rp_colors['100-yr']),
        (df_50, '50-yr', rp_colors['50-yr']),
        (df_10, '10-yr', rp_colors['10-yr']),
    ]
    
    lines_info = []
    
    for df, label, color in datasets:
        x = df['depth_hydrologic'].values
        y = df['depth_base_storm_raw'].values
        
        # Normalize weights to create a proper PDF (sum to 1)
        raw_weights = df['prob'].values
        weights = raw_weights / raw_weights.sum()  # This is now a proper PDF
        
        print(f"{label}: PDF sum = {weights.sum():.6f}")  # Should be 1.0
        
        # Plot weighted line
        x_sorted, y_pred, model = plot_weighted_line(ax, x, y, weights, color, label)
        lines_info.append((x_sorted, y_pred, model, label, color, df))
    
    # Set axis limits with proper buffering
    all_x = np.concatenate([df['depth_hydrologic'].values for df, _, _ in datasets])
    all_y = np.concatenate([df['depth_base_storm_raw'].values for df, _, _ in datasets])
    
    x_range = all_x.max() - all_x.min()
    y_range = all_y.max() - all_y.min()
    ax.set_xlim(all_x.min() - 0.1 * x_range, all_x.max() + 0.1 * x_range)
    ax.set_ylim(all_y.min() - 0.1 * y_range, all_y.max() + 0.1 * y_range)
    
    # Set up the plot
    ax.set_xlabel('Hydrologic-attributed Depth (ft)', fontsize=16)
    ax.set_ylabel('Surge-attributed Depth (ft)', fontsize=16)
    ax.tick_params(axis='both', which='major', labelsize=14)
    ax.set_title(f'Design events at {distance_km:} km along the Amite River', fontsize=17)
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(frameon=False, fontsize=12, loc='upper right')
    
    # Add rotated text annotations
    for x_sorted, y_pred, model, label, color, df in lines_info:
        x_mid = (x_sorted[0] + x_sorted[-1]) / 2
        y_mid = np.interp(x_mid, x_sorted, y_pred)
        
        data_slope = (y_pred[-1] - y_pred[0]) / (x_sorted[-1] - x_sorted[0])
        
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()
        bbox = ax.get_window_extent()
        width_display = bbox.width
        height_display = bbox.height
        
        x_scale = width_display / (xlim[1] - xlim[0])
        y_scale = height_display / (ylim[1] - ylim[0])
        
        visual_slope = data_slope * (y_scale / x_scale)
        angle_deg = np.degrees(np.arctan(visual_slope))
        
        avg_depth = np.mean(df['depth_hydrologic'].values) + np.mean(df['depth_base_storm_raw'].values)
        offset = 0.3
        
        ax.text(
            x_mid,
            y_mid + offset,
            f'Compound Flood Depth {avg_depth:.2f} ft',
            color='black',
            fontsize=12,
            rotation=angle_deg,
            ha='center',
            va='center',
            bbox=dict(facecolor='white', alpha=0.0, edgecolor='none'),
            zorder=11
        )

plt.tight_layout()
plt.savefig("design_storms_weighted_combined.pdf", bbox_inches="tight")
plt.show()


# Figure 12

In [0]:
idx=9
distance_km = (idx * 5) * total_length_km / 115 
geo_frame_filtered_to_id = geo_frame[geo_frame['ras_id'] == closest_ras_ids[idx]]

storm_data_all_uncert_by_id = storm_data_all_uncert[(storm_data_all_uncert['ras_id'] == 
closest_ras_ids[idx])]
storm_data_surge_uncert_by_id  = storm_data_surge_uncert[(storm_data_surge_uncert['ras_id'] == 
closest_ras_ids[idx])]

In [0]:
# Group by 'storm_id' and calculate the mean of 'depth_raw', sum of 'prob', and return period
storm_data_aggregated = storm_data_all_uncert_by_id.groupby('storm_id').agg(
    mean_depth=('depth_raw', 'mean'),
    total_prob=('prob', 'sum')
).reset_index()

storm_data_aggregated = storm_data_aggregated.sort_values(by='mean_depth')
storm_data_aggregated['cum_prob'] = storm_data_aggregated['total_prob'].cumsum()

# Calculate return period
storm_data_aggregated['annualized_cdf_val'] = calc_annualized_cdf_val(storm_data_aggregated, recurrence_rate, 'cum_prob')


storm_data_aggregated['return_period'] = 1 /(1- storm_data_aggregated['annualized_cdf_val'])

display(storm_data_aggregated)

In [0]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.patches as patches
from matplotlib.patches import Rectangle
import matplotlib.gridspec as gridspec

# Set publication-quality style
plt.style.use('default')
plt.rcParams.update({
#    'font.family': 'serif',
#    'font.serif': ['Times New Roman', 'DejaVu Serif'],
    'font.size': 12,
    'axes.titlesize': 15,
    'axes.labelsize': 15,
    'xtick.labelsize': 11,
    'ytick.labelsize': 11,
    'legend.fontsize': 12,
    'figure.dpi': 300,
    'axes.linewidth': 1.0,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.grid': True,
    'axes.axisbelow': True,
    'grid.alpha': 0.3,
    'grid.linewidth': 0.5,
})

# Calculate the 500-year storm WSE
closest_row = storm_data_all_uncert_by_id.iloc[(storm_data_all_uncert_by_id['return_period'] - 500).abs().argsort()[:1]]
storm_data_all_500_wse = closest_row['depth_raw'].values[0] + geo_frame_filtered_to_id['elevs'].values[0]

# Get unique storm IDs
unique_storm_ids = cdf_data_frames[idx][3]['storm_id'].unique()

# Collect data for each storm
storm_data_collection = []

total_prob = (storm_data_surge_uncert[storm_data_surge_uncert['storm_id'].isin(unique_storm_ids)]
              .groupby('storm_id')['prob'].first().sum())

for storm_id in unique_storm_ids:
    # Get compound flooding data (with rainfall)
    compound_data = storm_data_all_uncert[
        (storm_data_all_uncert['ras_id'] == closest_ras_ids[idx]) & 
        (storm_data_all_uncert['storm_id'] == storm_id)
    ]
    
    # Get surge-only data (without rainfall)
    surge_data = storm_data_surge_uncert[
        (storm_data_surge_uncert['ras_id'] == closest_ras_ids[idx]) & 
        (storm_data_surge_uncert['storm_id'] == storm_id)
    ]
    
    if len(compound_data) > 5 and len(surge_data) > 0:  # Require sufficient data
        compound_wse = compound_data['depth_raw'] + geo_frame_filtered_to_id['elevs'].values[0]
        surge_wse = surge_data['depth_raw'] + geo_frame_filtered_to_id['elevs'].values[0]
        
        storm_info = {
            'storm_id': storm_id,
            'prob':  float(surge_data['prob'].values[0]/total_prob),
            'compound_wse_values': compound_wse.values*0.3048,
            'compound_prob_values': compound_data['prob'].values,
            'compound_wse_min': np.min(compound_wse)*0.3048,
            'compound_mean': np.mean(compound_wse)*0.3048,
            'surge_mean': np.mean(surge_wse)*0.3048,
            'surge_wse_values': surge_wse.values*0.3048,
            'hydrologic_contribution_percent': (np.mean(compound_wse) - np.mean(surge_wse)) / np.mean(compound_wse) * 100
        }
        storm_data_collection.append(storm_info)

# Sort storms by their mean compound WSE
storm_data_collection.sort(key=lambda x: x['compound_wse_min'])

# ========================
# Create design storm sets based on surge means
# ========================
percentiles = [0, 25, 50, 75, 100]
percentile_ranges = {}

# Extract storm_id and surge_mean
surge_means = [(storm['storm_id'], storm['surge_mean']) for storm in storm_data_collection]
surge_means.sort(key=lambda x: x[1])  # sort by surge_mean
surge_values = [x[1] for x in surge_means]

# Compute cutoff values
cutoffs = np.percentile(surge_values, percentiles)

# Assign storms to bins
for i in range(len(percentiles)-1):
    low, high = cutoffs[i], cutoffs[i+1]
    storms_in_range = [sid for sid, val in surge_means if low <= val <= high]
    percentile_ranges[f"{percentiles[i]}-{percentiles[i+1]}"] = storms_in_range

# Create a mapping from storm_id to design set
storm_to_set = {}
set_names = ['Storm Set 1', 'Storm Set 2', 'Storm Set 3', 'Storm Set 4', 'Storm Set 5']

##########
###########
storm_probs = [(f"{storm['storm_id']}", storm['prob'], sum(storm['prob'] for storm in storm_data_collection[:i+1])) for i, storm in enumerate(storm_data_collection)]
# Initialize groups
groups = {
    '0-0.2': [],
    '0.2-0.4': [],
    '0.4-0.6': [],
    '0.6-0.8': [],
    '0.8-1.0': []
}

# Group storms based on cumulative probability
for storm_id, prob, cum_prob in storm_probs:
    if cum_prob <= 0.2:
        groups['0-0.2'].append((storm_id, prob, cum_prob))
    elif cum_prob <= 0.4:
        groups['0.2-0.4'].append((storm_id, prob, cum_prob))
    elif cum_prob <= 0.6:
        groups['0.4-0.6'].append((storm_id, prob, cum_prob))
    elif cum_prob <= 0.8:
        groups['0.6-0.8'].append((storm_id, prob, cum_prob))
    else:
        groups['0.8-1.0'].append((storm_id, prob, cum_prob))

# Create storm_id to group number mapping
storm_to_set = {}
group_mapping = {
    '0-0.2': 0,
    '0.2-0.4': 1,
    '0.4-0.6': 2,
    '0.6-0.8': 3,
    '0.8-1.0': 4
}

for group_name, storms in groups.items():
    group_num = group_mapping[group_name]
    for storm_id, prob, cum_prob in storms:
        storm_to_set[int(storm_id)] = group_num

print("Storm ID to Group Number mapping:")
print(storm_to_set)
#################
##################

# Create the main figure with professional layout
fig = plt.figure(figsize=(14, 10))
gs = gridspec.GridSpec(2, 1, height_ratios=[20, 1], hspace=0.01)

# Main plot
ax_main = fig.add_subplot(gs[0])

# Parameters
bar_width = 0.75
storm_positions = np.arange(len(storm_data_collection))

# ================================
# Refined violin plot colors with blue reference
# ================================

# Main plot colors
surge_color = 'tab:blue'            # Keep your reference blue
compound_color = 'tab:purple'       # Deep purple, contrasts with blue but still professional

# Whiskers and outlines
whisker_color = 'black'             # Clear and neutral
violin_outline_color = 'black'      # Defines violin shape

# Optional reference/median line
reference_color = '#777777'         # Pops for medians or key lines

# Design set or group colors
set_colors = ['tab:blue', 'tab:green', 'tab:purple', 'tab:red']  # Cohesive and distinct

# Reference line for 500-year WSE
ax_main.axhline(y=storm_data_all_500_wse*0.3048, color=reference_color, linestyle='--', 
                linewidth=2.5, label=f'500-year Return Period WSE ({storm_data_all_500_wse*0.3048:.1f} m)', 
                alpha=0.9, zorder=10)

# Plot each storm
for i, storm_info in enumerate(storm_data_collection):
    storm_id = storm_info['storm_id']
    compound_wse_values = storm_info['compound_wse_values']
    prob_values = storm_info['prob']
    surge_mean = storm_info['surge_mean']
    compound_mean = storm_info['compound_mean']
    
    x_pos = storm_positions[i]
    
    # 1. Plot surge-only bar (baseline contribution)
    surge_bar = ax_main.bar(x_pos, np.min(compound_wse_values), width=bar_width, 
                           color=surge_color, alpha=0.8, edgecolor='white', linewidth=0.5,
                           label='Storm Surge Component' if i == 0 else "")
    
    # 2. Enhanced violin plot for compound WSE variability
    violin_data = compound_wse_values
    violin_weights = prob_values 
    
    if len(violin_data) > 3:
        # Calculate statistics
        p25 = np.percentile(violin_data, 25)
        p50 = np.percentile(violin_data, 50)  # median
        p75 = np.percentile(violin_data, 75)
        data_min = np.min(violin_data)
        data_max = np.max(violin_data)
        
        # Create density estimation
        kde = gaussian_kde(violin_data)
        density_range = np.linspace(data_min, data_max, 150)
        density_values = kde(density_range)
        
        # Normalize density for violin width
        max_density = np.max(density_values)
        density_norm = (density_values / max_density) * (bar_width / 3.5)
        
        # Plot violin (compound flooding variability)
        ax_main.fill_betweenx(density_range, 
                             x_pos - density_norm, 
                             x_pos + density_norm,
                             alpha=0.7, color=compound_color, 
                             label='Variable Hydrologic Component (Rainfall Driven)' if i == 0 else "")
        
        # Add violin outline
        ax_main.plot(x_pos - density_norm, density_range, color=violin_outline_color, linewidth=0.8, alpha=0.9)
        ax_main.plot(x_pos + density_norm, density_range, color=violin_outline_color, linewidth=0.8, alpha=0.9)
        
        # Add statistical markers
        marker_width = bar_width / 5
        # Median line (white for visibility)
        ax_main.plot([x_pos - marker_width, x_pos + marker_width], 
                    [p50, p50], 'white', linewidth=2.75, alpha=0.95)
        
        
        
        # Whiskers (connecting to extremes)
        ax_main.plot([x_pos, x_pos], [data_min, p25], color= whisker_color, 
                    linewidth=1.2, alpha=0.7, linestyle='-')
        ax_main.plot([x_pos, x_pos], [p75, data_max], color=whisker_color, 
                    linewidth=1.2, alpha=0.7, linestyle='-')

        # IQR box (subtle)
        iqr_width = bar_width / 8
        ax_main.plot([x_pos - iqr_width, x_pos + iqr_width], 
                    [p25, p25], 'white', linewidth=1.75, alpha=0.95)
        ax_main.plot([x_pos - iqr_width, x_pos + iqr_width], 
                    [p75, p75], 'white', linewidth=1.75, alpha=0.95)

        # Add min/max markers
        marker_width = bar_width / 5
        ax_main.plot([x_pos - marker_width, x_pos + marker_width], 
               [data_min, data_min], violin_outline_color, linewidth=2, alpha=0.7)
        ax_main.plot([x_pos - marker_width, x_pos + marker_width], 
               [data_max, data_max], violin_outline_color, linewidth=2, alpha=0.7)



# Customize main plot
ax_main.set_ylabel('Water Surface Elevation (m NAVD88)') #fontweight='bold'
ax_main.set_title('Amite River at 45 km', pad=15)

# Set x-axis (will be handled by subplot below)
ax_main.set_xticks(storm_positions)
ax_main.set_xticklabels([])  # Remove x-labels from main plot

# Professional legend
legend = ax_main.legend(loc='upper left', frameon=True, fancybox=False, 
                       shadow=False, edgecolor='black', facecolor='white', 
                       framealpha=0.95, borderpad=0.8)
legend.get_frame().set_linewidth(0.8)

# Enhanced grid
ax_main.grid(True, alpha=0.4, linestyle='-', linewidth=0.5)
ax_main.set_axisbelow(True)

# ========================
# Design Storm Set Classification (bottom panel) - FIXED
# ========================
ax_sets = fig.add_subplot(gs[1])

# Get positions for each design storm set
set_positions = {}
for i, storm_info in enumerate(storm_data_collection):
    storm_id = storm_info['storm_id']
    set_index = storm_to_set.get(storm_id, 0)
    if set_index not in set_positions:
        set_positions[set_index] = []
    set_positions[set_index].append(storm_positions[i])

# Draw design storm set classifications with proper positioning
for set_index, positions in set_positions.items():
    if len(positions) > 0:
        color = set_colors[set_index % len(set_colors)]
        
        # Calculate proper rectangle bounds
        left_pos = min(positions) - bar_width/2
        right_pos = max(positions) + bar_width/2
        rect_width = right_pos - left_pos
        ##

        rect_height = 0.16  # your choice
        rect_y = -rect_height/2  #
        # Background rectangle - properly sized around the text area
        rect = Rectangle((left_pos, rect_y), rect_width, rect_height, 
                        facecolor=color, alpha=0.15, edgecolor=color, 
                        linewidth=1.5)
        ax_sets.add_patch(rect)
        
        # Set label - centered properly
        center_pos = np.mean(positions)
        ax_sets.text(center_pos, 0, set_names[set_index], 
                    ha='center', va='center', fontweight='bold',
                    fontsize=10, color=color)

# Configure bottom panel
ax_sets.set_xlim(ax_main.get_xlim())
ax_sets.set_ylim(-0.1,0.1)
ax_sets.set_xlabel('Storm ID (JPM Storm Suite)') #fontweight='bold'

# Storm ID labels
storm_labels = [f"{storm['storm_id']}" for storm in storm_data_collection]
ax_sets.set_xticks(storm_positions)
ax_sets.set_xticklabels(storm_labels, rotation=45, ha='right')
ax_sets.set_yticks([])

# Remove spines for clean look
for spine in ax_sets.spines.values():
    spine.set_visible(False)

# REMOVED the separator line that was causing the grey line issue

plt.tight_layout()
plt.subplots_adjust(bottom=0.12)  # Extra space for rotated labels
plt.savefig("design_storm_500_yr.pdf", bbox_inches="tight")

In [0]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.patches as patches

# Calculate the 500-year storm WSE
closest_row = storm_data_all_uncert_by_id.iloc[(storm_data_all_uncert_by_id['return_period'] - 500).abs().argsort()[:1]]
storm_data_all_500_wse = closest_row['depth_raw'].values[0] + geo_frame_filtered_to_id['elevs'].values[0]

# Get unique storm IDs
unique_storm_ids = cdf_data_frames[idx][3]['storm_id'].unique()

# Collect data for each storm
storm_data_collection = []

for storm_id in unique_storm_ids:
    # Get compound flooding data (with rainfall)
    compound_data = storm_data_all_uncert[
        (storm_data_all_uncert['ras_id'] == closest_ras_ids[idx]) & 
        (storm_data_all_uncert['storm_id'] == storm_id)
    ]
    
    # Get surge-only data (without rainfall)
    surge_data = storm_data_surge_uncert[
        (storm_data_surge_uncert['ras_id'] == closest_ras_ids[idx]) & 
        (storm_data_surge_uncert['storm_id'] == storm_id)
    ]
    
    if len(compound_data) > 5 and len(surge_data) > 0:  # Require sufficient data
        compound_wse = compound_data['depth_raw'] + geo_frame_filtered_to_id['elevs'].values[0]
        surge_wse = surge_data['depth_raw'] + geo_frame_filtered_to_id['elevs'].values[0]
        
        storm_info = {
            'storm_id': storm_id,
            'compound_wse_values': compound_wse.values,
            'compound_wse_min': np.min(compound_wse),
            'compound_mean': np.mean(compound_wse),
            'surge_mean': np.mean(surge_wse),
            'surge_wse_values': surge_wse.values,
            'hydrologic_contribution_percent': np.mean(compound_wse) - np.mean(surge_wse) / np.mean(compound_wse) * 100
        }
        storm_data_collection.append(storm_info)

# Sort storms by their mean compound WSE
storm_data_collection.sort(key=lambda x: x['compound_wse_min'])

# ========================
# Create design storm sets based on surge means
# ========================
percentiles = [0, 25, 50, 75, 100]
percentile_ranges = {}

# Extract storm_id and surge_mean
surge_means = [(storm['storm_id'], storm['surge_mean']) for storm in storm_data_collection]
surge_means.sort(key=lambda x: x[1])  # sort by surge_mean
surge_values = [x[1] for x in surge_means]

# Compute cutoff values
cutoffs = np.percentile(surge_values, percentiles)

# Assign storms to bins
for i in range(len(percentiles)-1):
    low, high = cutoffs[i], cutoffs[i+1]
    storms_in_range = [sid for sid, val in surge_means if low <= val <= high]
    percentile_ranges[f"{percentiles[i]}-{percentiles[i+1]}"] = storms_in_range

# Create a mapping from storm_id to design set
storm_to_set = {}
set_names = ['Design Storm Set 1', 'Design Storm Set 2', 'Design Storm Set 3', 'Design Storm Set 4']

for i, (percentile_range, storm_list) in enumerate(percentile_ranges.items()):
    for storm_id in storm_list:
        storm_to_set[storm_id] = i

# Create the plot with more space at the bottom for the enhanced brackets
fig, ax = plt.subplots(figsize=(16, 14))

# Plot parameters
bar_width = 0.6
storm_positions = np.arange(len(storm_data_collection))
colors = plt.cm.viridis(np.linspace(0, 1, len(storm_data_collection)))

# Plot each storm
for i, storm_info in enumerate(storm_data_collection):
    storm_id = storm_info['storm_id']
    compound_wse_values = storm_info['compound_wse_values']
    surge_mean = storm_info['surge_mean']
    compound_mean = storm_info['compound_mean']
    
    x_pos = storm_positions[i]
    
    # 1. Plot surge-only bar (from 0 to surge mean)
    surge_bar = ax.bar(x_pos, np.min(compound_wse_values), width=bar_width, 
                      color='lightblue', alpha=0.8, edgecolor='navy', linewidth=1,
                      label='Surge Contribution' if i == 0 else "")
    
    # 2. Create a box-and-whisker style plot for compound WSE variability
    # But position it ABOVE the surge contribution to avoid overlap
    violin_data = compound_wse_values
    
    if len(violin_data) > 3:
        # Calculate percentiles for the box plot style
        p25 = np.percentile(violin_data, 25)
        p50 = np.percentile(violin_data, 50)  # median
        p75 = np.percentile(violin_data, 75)
        p10 = np.percentile(violin_data, 10)
        p90 = np.percentile(violin_data, 90)
        data_min = np.min(violin_data)
        data_max = np.max(violin_data)
        
        # Create a density plot positioned above the surge bar
        kde = gaussian_kde(violin_data)
        density_range = np.linspace(surge_mean, data_max, 100)  # Start from surge mean
        density_values = kde(density_range)
        
        # Normalize density for width (scale to bar_width)
        max_density = np.max(density_values)
        density_norm = (density_values / max_density) * (bar_width / 3)  # Narrower than bar
        
        # Plot the density curve as filled area ABOVE the surge bar
        ax.fill_betweenx(density_range, 
                        x_pos - density_norm, 
                        x_pos + density_norm,
                        alpha=0.7, color='red', 
                        label='Hydrologic variability' if i == 0 else "")
        
        # Add outline for the density
        ax.plot(x_pos - density_norm, density_range, 'darkred', linewidth=1.5, alpha=0.8)
        ax.plot(x_pos + density_norm, density_range, 'darkred', linewidth=1.5, alpha=0.8)
        
        # Add key percentile markers as horizontal lines
        percentile_width = bar_width / 4
        ax.plot([x_pos - percentile_width, x_pos + percentile_width], 
               [p50, p50], 'white', linewidth=2, alpha=0.9)  # Median line
        
        # Add whiskers showing the range
        ax.plot([x_pos, x_pos], [data_min, surge_mean], 'red', linewidth=2, alpha=0.6, linestyle=':')
        ax.plot([x_pos, x_pos], [data_max, p75], 'red', linewidth=2, alpha=0.6, linestyle=':')
        
        # Add min/max markers
        marker_width = bar_width / 6
        ax.plot([x_pos - marker_width, x_pos + marker_width], 
               [data_min, data_min], 'red', linewidth=2, alpha=0.7)
        ax.plot([x_pos - marker_width, x_pos + marker_width], 
               [data_max, data_max], 'red', linewidth=2, alpha=0.7)

# Mark 500-year WSE line
ax.axhline(y=storm_data_all_500_wse, color='orange', linestyle='--', linewidth=3,
           label=f'500-Year WSE ({storm_data_all_500_wse:.2f} ft)', alpha=0.9)

# ========================
# Create enhanced design storm set brackets below x-axis
# ========================
# Get the y-axis limits to position the brackets below the plot
y_min, y_max = ax.get_ylim()
y_range = y_max - y_min

# Define bracket styling parameters
bracket_height = y_range * 0.015  # Height of bracket lines
bracket_spacing = y_range * 0.03  # Spacing between bracket levels
bracket_start_offset = y_range * 0.05  # Distance below x-axis to start brackets

# Colors for different design storm sets
set_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']  # Blue, Orange, Green, Red

# Get positions for each design storm set
set_positions = {}
for i, storm_info in enumerate(storm_data_collection):
    storm_id = storm_info['storm_id']
    set_index = storm_to_set.get(storm_id, 0)
    if set_index not in set_positions:
        set_positions[set_index] = []
    set_positions[set_index].append(storm_positions[i])



# Customize the plot
ax.set_xlabel('Storm ID (from the 645 JPM storm set)', fontsize=14, fontweight='bold')
ax.set_ylabel('Water Surface Elevation (WSE) ft', fontsize=14, fontweight='bold')
ax.set_title('At 45 km along the Amite River', 
             fontsize=16, fontweight='bold', pad=20)

# Set x-axis labels to storm IDs
storm_labels = [f"Storm\n{storm['storm_id']}" for storm in storm_data_collection]
ax.set_xticks(storm_positions)
ax.set_xticklabels(storm_labels, fontsize=10)

# Add legend for the main plot elements
ax.legend(loc='upper left', fontsize=12, frameon=True, fancybox=True, shadow=True)

# Add grid (only for the main plot area)
ax.grid(True, alpha=0.3, axis='y')
ax.set_axisbelow(True)

# Style improvements
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_linewidth(1.2)
ax.spines['bottom'].set_linewidth(1.2)

# Ensure the x-axis spine is visible and create space for brackets below the plot
ax.spines['bottom'].set_position(('data', 0))

# Create a separate area below the main plot for brackets using matplotlib's transform system
trans = ax.get_xaxis_transform()

# Draw brackets in the transform coordinate system (x in data coords, y in axes coords)
for set_index, positions in set_positions.items():
    if len(positions) > 0:
        left_pos = min(positions) - bar_width/2
        right_pos = max(positions) + bar_width/2
        center_pos = np.mean(positions)
        
        color = set_colors[set_index % len(set_colors)]
        
        # Bracket positions in axes coordinates (below the plot)
        bracket_y_top = -0.06  # 8% below the plot
        bracket_y_bottom = -0.08  # 10% below the plot
        
        # Main horizontal line
        ax.plot([left_pos, right_pos], [bracket_y_bottom, bracket_y_bottom], 
               color=color, linewidth=3, solid_capstyle='round', transform=trans, clip_on=False)
        
        # Left vertical connector
        ax.plot([left_pos, left_pos], [bracket_y_top, bracket_y_bottom], 
               color=color, linewidth=2.5, solid_capstyle='round', transform=trans, clip_on=False)
        
        # Right vertical connector  
        ax.plot([right_pos, right_pos], [bracket_y_top, bracket_y_bottom], 
               color=color, linewidth=2.5, solid_capstyle='round', transform=trans, clip_on=False)
        
        # Add small decorative elements at the ends
        end_cap_size = bar_width * 0.1
        ax.plot([left_pos - end_cap_size, left_pos + end_cap_size], 
               [bracket_y_top, bracket_y_top], color=color, linewidth=2.5, transform=trans, clip_on=False)
        ax.plot([right_pos - end_cap_size, right_pos + end_cap_size], 
               [bracket_y_top, bracket_y_top], color=color, linewidth=2.5, transform=trans, clip_on=False)
        
        # Add label
        label_y = -0.1  # 14% below the plot
        ax.text(center_pos, label_y, 
               set_names[set_index], 
               ha='center', va='center', 
               fontsize=11, fontweight='bold',
               color=color, transform=trans,
               bbox=dict(boxstyle="round,pad=0.3", facecolor='white', 
                        edgecolor=color, alpha=0.8))

plt.tight_layout()
plt.show()

# ========================
# Create a summary table
# ========================
print("=" * 90)
print("STORM-BY-STORM WSE DECOMPOSITION ANALYSIS")
print("=" * 90)
print(f"500-Year Target WSE: {storm_data_all_500_wse:.3f} ft")
print()
print("Storm ID | Design Set | Surge Mean | Compound Mean | Rainfall Contrib | Rainfall Range | Exceeds 500-yr")
print("-" * 105)

total_realizations = 0
total_exceeds_500 = 0

for storm_info in storm_data_collection:
    storm_id = storm_info['storm_id']
    surge_mean = storm_info['surge_mean']
    compound_mean = storm_info['compound_mean']
    compound_values = storm_info['compound_wse_values']
    set_index = storm_to_set.get(storm_id, 0)
    
    rainfall_contrib = compound_mean - surge_mean
    rainfall_range = f"{np.min(compound_values):.2f}-{np.max(compound_values):.2f}"
    
    exceeds_500 = np.sum(compound_values >= storm_data_all_500_wse)
    total_exceeds_500 += exceeds_500
    total_realizations += len(compound_values)
    
    print(f"   {storm_id:4d}  |    Set {set_index+1}    |   {surge_mean:7.2f}  |    {compound_mean:8.2f}  |      {rainfall_contrib:6.2f}    | {rainfall_range:>13} |  {exceeds_500:3d}/{len(compound_values):3d} ({exceeds_500/len(compound_values)*100:4.1f}%)")

print("-" * 105)
print(f"OVERALL: {total_exceeds_500}/{total_realizations} realizations exceed 500-year WSE ({total_exceeds_500/total_realizations*100:.1f}%)")

# Key insights
max_surge_storm = max(storm_data_collection, key=lambda x: x['surge_mean'])
max_compound_storm = max(storm_data_collection, key=lambda x: x['compound_mean'])
max_rainfall_contrib_storm = max(storm_data_collection, key=lambda x: x['compound_mean'] - x['surge_mean'])

print(f"\nKEY INSIGHTS:")
print(f"• Highest surge contribution: Storm {max_surge_storm['storm_id']} ({max_surge_storm['surge_mean']:.2f} ft)")
print(f"• Highest compound WSE: Storm {max_compound_storm['storm_id']} ({max_compound_storm['compound_mean']:.2f} ft)")
print(f"• Largest rainfall contribution: Storm {max_rainfall_contrib_storm['storm_id']} ({max_rainfall_contrib_storm['compound_mean'] - max_rainfall_contrib_storm['surge_mean']:.2f} ft)")

# Calculate overall statistics
all_surge_means = [storm['surge_mean'] for storm in storm_data_collection]
all_compound_means = [storm['compound_mean'] for storm in storm_data_collection]
all_rainfall_contribs = [storm['compound_mean'] - storm['surge_mean'] for storm in storm_data_collection]

print(f"\nOVERALL STATISTICS:")
print(f"• Average surge contribution: {np.mean(all_surge_means):.2f} ± {np.std(all_surge_means):.2f} ft")
print(f"• Average compound WSE: {np.mean(all_compound_means):.2f} ± {np.std(all_compound_means):.2f} ft") 
print(f"• Average rainfall contribution: {np.mean(all_rainfall_contribs):.2f} ± {np.std(all_rainfall_contribs):.2f} ft")
print(f"• Rainfall contribution range: {np.min(all_rainfall_contribs):.2f} to {np.max(all_rainfall_contribs):.2f} ft")

# Print design storm set summary
print(f"\nDESIGN STORM SET SUMMARY:")
for i, (percentile_range, storm_list) in enumerate(percentile_ranges.items()):
    print(f"• {set_names[i]}: {len(storm_list)} storms (Storm IDs: {sorted(storm_list)})")

In [0]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.patches as patches
from matplotlib.patches import Rectangle
import matplotlib.gridspec as gridspec

# Set publication-quality style
plt.style.use('default')
plt.rcParams.update({
    'font.family': 'serif',
    'font.serif': ['Times New Roman', 'DejaVu Serif'],
    'font.size': 10,
    'axes.titlesize': 12,
    'axes.labelsize': 11,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 9,
    'figure.dpi': 300,
    'axes.linewidth': 1.0,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.grid': True,
    'axes.axisbelow': True,
    'grid.alpha': 0.3,
    'grid.linewidth': 0.5,
})

# Calculate the 500-year storm WSE
closest_row = storm_data_all_uncert_by_id.iloc[(storm_data_all_uncert_by_id['return_period'] - 500).abs().argsort()[:1]]
storm_data_all_500_wse = closest_row['depth_raw'].values[0] + geo_frame_filtered_to_id['elevs'].values[0]

# Get unique storm IDs
unique_storm_ids = cdf_data_frames[idx][3]['storm_id'].unique()

# Collect data for each storm
storm_data_collection = []

for storm_id in unique_storm_ids:
    # Get compound flooding data (with rainfall)
    compound_data = storm_data_all_uncert[
        (storm_data_all_uncert['ras_id'] == closest_ras_ids[idx]) & 
        (storm_data_all_uncert['storm_id'] == storm_id)
    ]
    
    # Get surge-only data (without rainfall)
    surge_data = storm_data_surge_uncert[
        (storm_data_surge_uncert['ras_id'] == closest_ras_ids[idx]) & 
        (storm_data_surge_uncert['storm_id'] == storm_id)
    ]
    
    if len(compound_data) > 5 and len(surge_data) > 0:  # Require sufficient data
        compound_wse = compound_data['depth_raw'] + geo_frame_filtered_to_id['elevs'].values[0]
        surge_wse = surge_data['depth_raw'] + geo_frame_filtered_to_id['elevs'].values[0]
        
        storm_info = {
            'storm_id': storm_id,
            'compound_wse_values': compound_wse.values,
            'compound_wse_min': np.min(compound_wse),
            'compound_mean': np.mean(compound_wse),
            'surge_mean': np.mean(surge_wse),
            'surge_wse_values': surge_wse.values,
            'hydrologic_contribution_percent': (np.mean(compound_wse) - np.mean(surge_wse)) / np.mean(compound_wse) * 100
        }
        storm_data_collection.append(storm_info)

# Sort storms by their mean compound WSE
storm_data_collection.sort(key=lambda x: x['compound_wse_min'])

# ========================
# Create design storm sets based on surge means
# ========================
percentiles = [0, 25, 50, 75, 100]
percentile_ranges = {}

# Extract storm_id and surge_mean
surge_means = [(storm['storm_id'], storm['surge_mean']) for storm in storm_data_collection]
surge_means.sort(key=lambda x: x[1])  # sort by surge_mean
surge_values = [x[1] for x in surge_means]

# Compute cutoff values
cutoffs = np.percentile(surge_values, percentiles)

# Assign storms to bins
for i in range(len(percentiles)-1):
    low, high = cutoffs[i], cutoffs[i+1]
    storms_in_range = [sid for sid, val in surge_means if low <= val <= high]
    percentile_ranges[f"{percentiles[i]}-{percentiles[i+1]}"] = storms_in_range

# Create a mapping from storm_id to design set
storm_to_set = {}
set_names = ['Design Storm Set 1', 'Design Storm Set 2', 'Design Storm Set 3', 'Design Storm Set 4']

for i, (percentile_range, storm_list) in enumerate(percentile_ranges.items()):
    for storm_id in storm_list:
        storm_to_set[storm_id] = i

# Create the main figure with professional layout
fig = plt.figure(figsize=(14, 10))
gs = gridspec.GridSpec(2, 1, height_ratios=[4, 1], hspace=0.05)

# Main plot
ax_main = fig.add_subplot(gs[0])

# Parameters
bar_width = 0.65
storm_positions = np.arange(len(storm_data_collection))

# Professional color scheme
surge_color = '#2E86AB'      # Deep blue
compound_color = '#A23B72'    # Deep magenta/burgundy
reference_color = '#F18F01'   # Orange
set_colors = ['#1F4E79', '#8B4513', '#2F5233', '#8B0000']  # Professional darker tones

# Plot each storm
for i, storm_info in enumerate(storm_data_collection):
    storm_id = storm_info['storm_id']
    compound_wse_values = storm_info['compound_wse_values']
    surge_mean = storm_info['surge_mean']
    compound_mean = storm_info['compound_mean']
    
    x_pos = storm_positions[i]
    
    # 1. Plot surge-only bar (baseline contribution)
    surge_bar = ax_main.bar(x_pos, surge_mean, width=bar_width, 
                           color=surge_color, alpha=0.8, edgecolor='white', linewidth=0.5,
                           label='Storm Surge Component' if i == 0 else "")
    
    # 2. Enhanced violin plot for compound WSE variability
    violin_data = compound_wse_values
    
    if len(violin_data) > 3:
        # Calculate statistics
        p25 = np.percentile(violin_data, 25)
        p50 = np.percentile(violin_data, 50)  # median
        p75 = np.percentile(violin_data, 75)
        data_min = np.min(violin_data)
        data_max = np.max(violin_data)
        
        # Create density estimation
        kde = gaussian_kde(violin_data)
        density_range = np.linspace(data_min, data_max, 150)
        density_values = kde(density_range)
        
        # Normalize density for violin width
        max_density = np.max(density_values)
        density_norm = (density_values / max_density) * (bar_width / 3.5)
        
        # Plot violin (compound flooding variability)
        ax_main.fill_betweenx(density_range, 
                             x_pos - density_norm, 
                             x_pos + density_norm,
                             alpha=0.7, color=compound_color, 
                             label='Compound Flooding Variability' if i == 0 else "")
        
        # Add violin outline
        ax_main.plot(x_pos - density_norm, density_range, color='darkred', linewidth=0.8, alpha=0.9)
        ax_main.plot(x_pos + density_norm, density_range, color='darkred', linewidth=0.8, alpha=0.9)
        
        # Add statistical markers
        marker_width = bar_width / 5
        # Median line (white for visibility)
        ax_main.plot([x_pos - marker_width, x_pos + marker_width], 
                    [p50, p50], 'white', linewidth=2.5, alpha=0.95)
        
        # IQR box (subtle)
        iqr_width = bar_width / 8
        ax_main.plot([x_pos - iqr_width, x_pos + iqr_width], 
                    [p25, p25], 'white', linewidth=1.5, alpha=0.8)
        ax_main.plot([x_pos - iqr_width, x_pos + iqr_width], 
                    [p75, p75], 'white', linewidth=1.5, alpha=0.8)
        
        # Whiskers (connecting to extremes)
        ax_main.plot([x_pos, x_pos], [data_min, p25], color='darkred', 
                    linewidth=1.2, alpha=0.7, linestyle='-')
        ax_main.plot([x_pos, x_pos], [p75, data_max], color='darkred', 
                    linewidth=1.2, alpha=0.7, linestyle='-')

# Reference line for 500-year WSE
ax_main.axhline(y=storm_data_all_500_wse, color=reference_color, linestyle='--', 
                linewidth=2.5, label=f'500-year Return Period WSE ({storm_data_all_500_wse:.1f} ft)', 
                alpha=0.9, zorder=10)

# Customize main plot
ax_main.set_ylabel('Water Surface Elevation (ft NAVD88)', fontweight='bold')
ax_main.set_title('Storm-by-Storm Water Surface Elevation Analysis\nAmite River at 45 km Upstream', 
                 fontweight='bold', pad=15)

# Set x-axis (will be handled by subplot below)
ax_main.set_xticks(storm_positions)
ax_main.set_xticklabels([])  # Remove x-labels from main plot

# Professional legend
legend = ax_main.legend(loc='upper left', frameon=True, fancybox=False, 
                       shadow=False, edgecolor='black', facecolor='white', 
                       framealpha=0.95, borderpad=0.8)
legend.get_frame().set_linewidth(0.8)

# Enhanced grid
ax_main.grid(True, alpha=0.4, linestyle='-', linewidth=0.5)
ax_main.set_axisbelow(True)

# ========================
# Design Storm Set Classification (bottom panel) - FIXED
# ========================
ax_sets = fig.add_subplot(gs[1])

# Get positions for each design storm set
set_positions = {}
for i, storm_info in enumerate(storm_data_collection):
    storm_id = storm_info['storm_id']
    set_index = storm_to_set.get(storm_id, 0)
    if set_index not in set_positions:
        set_positions[set_index] = []
    set_positions[set_index].append(storm_positions[i])

# Draw design storm set classifications with proper positioning
for set_index, positions in set_positions.items():
    if len(positions) > 0:
        color = set_colors[set_index % len(set_colors)]
        
        # Calculate proper rectangle bounds
        left_pos = min(positions) - bar_width/2
        right_pos = max(positions) + bar_width/2
        rect_width = right_pos - left_pos
        ##

        rect_height = 0.08  # your choice
        rect_y = -rect_height/2  #
        # Background rectangle - properly sized around the text area
        rect = Rectangle((left_pos, rect_y), rect_width, rect_height, 
                        facecolor=color, alpha=0.15, edgecolor=color, 
                        linewidth=1.5)
        ax_sets.add_patch(rect)
        
        # Set label - centered properly
        center_pos = np.mean(positions)
        ax_sets.text(center_pos, 0, set_names[set_index], 
                    ha='center', va='center', fontweight='bold',
                    fontsize=10, color=color)

# Configure bottom panel
ax_sets.set_xlim(ax_main.get_xlim())
ax_sets.set_ylim(-0.1,0.1)
ax_sets.set_xlabel('Storm ID (JPM Storm Suite)', fontweight='bold')

# Storm ID labels
storm_labels = [f"{storm['storm_id']}" for storm in storm_data_collection]
ax_sets.set_xticks(storm_positions)
ax_sets.set_xticklabels(storm_labels, rotation=45, ha='right')
ax_sets.set_yticks([])

# Remove spines for clean look
for spine in ax_sets.spines.values():
    spine.set_visible(False)

# REMOVED the separator line that was causing the grey line issue

plt.tight_layout()
plt.subplots_adjust(bottom=0.12)  # Extra space for rotated labels

# ========================
# Professional Summary Statistics
# ========================
print("=" * 100)
print("STORM-BY-STORM WATER SURFACE ELEVATION DECOMPOSITION ANALYSIS")
print("Location: Amite River, 45 km upstream")
print("=" * 100)
print(f"Reference 500-Year WSE: {storm_data_all_500_wse:.2f} ft NAVD88")
print()

# Enhanced table format
print(f"{'Storm ID':<8} | {'Design Set':<12} | {'Surge WSE':<10} | {'Compound WSE':<13} | {'Rainfall Δ':<11} | {'Variability Range':<16} | {'Exceedance':<12}")
print("-" * 105)

total_realizations = 0
total_exceeds_500 = 0

for storm_info in storm_data_collection:
    storm_id = storm_info['storm_id']
    surge_mean = storm_info['surge_mean']
    compound_mean = storm_info['compound_mean']
    compound_values = storm_info['compound_wse_values']
    set_index = storm_to_set.get(storm_id, 0)
    
    rainfall_contrib = compound_mean - surge_mean
    variability_range = f"{np.min(compound_values):.1f}–{np.max(compound_values):.1f} ft"
    
    exceeds_500 = np.sum(compound_values >= storm_data_all_500_wse)
    total_exceeds_500 += exceeds_500
    total_realizations += len(compound_values)
    
    exceedance_pct = exceeds_500/len(compound_values)*100
    
    print(f"{storm_id:<8} | {'Set ' + str(set_index+1):<12} | {surge_mean:>8.2f} ft | {compound_mean:>11.2f} ft | {rainfall_contrib:>9.2f} ft | {variability_range:<16} | {exceeds_500:>3}/{len(compound_values):<3} ({exceedance_pct:4.1f}%)")

print("-" * 105)
print(f"TOTAL ANALYSIS: {total_exceeds_500:,}/{total_realizations:,} realizations exceed 500-year WSE ({total_exceeds_500/total_realizations*100:.1f}%)")

# Enhanced insights
print(f"\nKEY FINDINGS:")
max_surge_storm = max(storm_data_collection, key=lambda x: x['surge_mean'])
max_compound_storm = max(storm_data_collection, key=lambda x: x['compound_mean'])
max_rainfall_contrib_storm = max(storm_data_collection, key=lambda x: x['compound_mean'] - x['surge_mean'])

print(f"• Maximum storm surge component: Storm {max_surge_storm['storm_id']} ({max_surge_storm['surge_mean']:.2f} ft)")
print(f"• Maximum compound WSE: Storm {max_compound_storm['storm_id']} ({max_compound_storm['compound_mean']:.2f} ft)")
print(f"• Maximum rainfall contribution: Storm {max_rainfall_contrib_storm['storm_id']} (+{max_rainfall_contrib_storm['compound_mean'] - max_rainfall_contrib_storm['surge_mean']:.2f} ft)")

# Statistical summary
all_surge_means = [storm['surge_mean'] for storm in storm_data_collection]
all_compound_means = [storm['compound_mean'] for storm in storm_data_collection]
all_rainfall_contribs = [storm['compound_mean'] - storm['surge_mean'] for storm in storm_data_collection]

print(f"\nSTATISTICAL SUMMARY (n={len(storm_data_collection)} storms):")
print(f"• Storm surge component: {np.mean(all_surge_means):.2f} ± {np.std(all_surge_means):.2f} ft")
print(f"• Compound WSE: {np.mean(all_compound_means):.2f} ± {np.std(all_compound_means):.2f} ft") 
print(f"• Rainfall contribution: {np.mean(all_rainfall_contribs):.2f} ± {np.std(all_rainfall_contribs):.2f} ft")
print(f"• Rainfall contribution range: {np.min(all_rainfall_contribs):.2f} to {np.max(all_rainfall_contribs):.2f} ft")

print(f"\nDESIGN STORM CLASSIFICATION:")
for i, (percentile_range, storm_list) in enumerate(percentile_ranges.items()):
    surge_range = [storm['surge_mean'] for storm in storm_data_collection if storm['storm_id'] in storm_list]
    print(f"• {set_names[i]}: {len(storm_list)} storms | Surge range: {min(surge_range):.1f}–{max(surge_range):.1f} ft")

plt.show()

In [0]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pareto

closest_row = storm_data_all_uncert_by_id.iloc[(storm_data_all_uncert_by_id['return_period'] - 500).abs().argsort()[:1]]
storm_data_all_500_wse = closest_row['depth_raw'].values[0]+geo_frame_filtered_to_id['elevs'].values[0]
storm_data_all_500_wse 

# Select data for storm id 498 and ras_id 77456
storm_data_all_uncert_by_all_id = storm_data_all_uncert[(storm_data_all_uncert['ras_id'] == 
closest_ras_ids[idx]) & (storm_data_all_uncert['storm_id'] == 33) ]

wse_data = storm_data_all_uncert_by_all_id['depth_raw']+geo_frame_filtered_to_id['elevs'].values[0]

# Fit a Pareto distribution to the data
shape, loc, scale = pareto.fit(wse_data)

# Plot the PDF of depth
plt.figure(figsize=(10, 6))
plt.hist(wse_data, bins=15, color='blue', edgecolor='black', density=True, alpha=0.6, label='Data')

# Plot the fitted Pareto distribution
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = pareto.pdf(x, shape, loc, scale)
plt.plot(x, p, 'k', linewidth=2, label=f'Fit: shape={shape:.2f}, loc={loc:.2f}, scale={scale:.2f}')

plt.xlabel('Depth')
plt.ylabel('Probability Density')
plt.title('PDF of Depth for Storm ID 498 and RAS ID')
plt.legend()
plt.grid(True)
plt.show()

In [0]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pareto

storm_id = 80

# Select data for storm id 498 and ras_id 77456
storm_data_all_uncert_by_all_id = storm_data_all_uncert[(storm_data_all_uncert['ras_id'] == 
closest_ras_ids[idx]) & (storm_data_all_uncert['storm_id'] == storm_id) ]

wse_data = storm_data_all_uncert_by_all_id['depth_raw']+geo_frame_filtered_to_id['elevs'].values[0]

# Fit a Pareto distribution to the data
shape, loc, scale = pareto.fit(wse_data)

# Plot the PDF of depth
plt.figure(figsize=(10, 6))
plt.hist(wse_data, bins=15, color='blue', edgecolor='black', density=True, alpha=0.6, label='Data')

# Plot the fitted Pareto distribution
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = pareto.pdf(x, shape, loc, scale)
plt.plot(x, p, 'k', linewidth=2, label=f'Fit: shape={shape:.2f}, loc={loc:.2f}, scale={scale:.2f}')

plt.xlabel('Depth')
plt.ylabel('Probability Density')
plt.title('PDF of Depth for Storm ID 498 and RAS ID')
plt.legend()
plt.grid(True)
plt.show()

In [0]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde

# Select unique storm IDs from storm_data_surge_uncert
unique_storm_ids = cdf_data_frames[idx][3]['storm_id'].unique()

# Filter storm_data_surge_uncert for unique storm IDs using isin
filtered_storm_data = storm_data_surge_uncert[(storm_data_surge_uncert['ras_id'] == 
closest_ras_ids[idx]) & (storm_data_surge_uncert['storm_id'].isin(unique_storm_ids))]

# Check if we have data
if len(filtered_storm_data) == 0:
    print("No data found for the specified ras_id and storm_ids")
else:
    print(f"Found {len(filtered_storm_data)} records for {len(unique_storm_ids)} storms")
    
    # Get depth_raw values and remove any NaN values
    depth_raw_values = filtered_storm_data['depth_raw'].dropna().values+geo_frame_filtered_to_id['elevs'].values[0]
    
    if len(depth_raw_values) == 0:
        print("No valid depth_raw values found")
    else:
        # Calculate the PDF using Gaussian Kernel Density Estimation
        kde = gaussian_kde(depth_raw_values)
        x_range = np.linspace(depth_raw_values.min(), depth_raw_values.max(), 1000)
        pdf_values = kde(x_range)
        
        # Plot the PDF
        plt.figure(figsize=(10, 6))
        plt.plot(x_range, pdf_values, label=f'All Storms (n={len(unique_storm_ids)})', linewidth=2)
        plt.xlabel('WSE (ft)', fontsize=12)
        plt.ylabel('Probability Density', fontsize=12)
        plt.title(f'PDF of Depth Raw for {len(unique_storm_ids)} Storms at RAS ID {closest_ras_ids[idx]}', fontsize=14)
        plt.grid(True, alpha=0.3)
        plt.legend()
        
        # Add summary statistics as text
        plt.text(0.02, 0.98, f'Mean: {np.mean(depth_raw_values):.2f} ft\n'
                            f'Std: {np.std(depth_raw_values):.2f} ft\n'
                            f'Samples: {len(depth_raw_values)}', 
                transform=plt.gca().transAxes, verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
        
        plt.tight_layout()
        plt.show()
        
        print(f"Storm IDs included: {unique_storm_ids}")
        print(f"WSE range: {depth_raw_values.min():.3f} - {depth_raw_values.max():.3f} ft")

In [0]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde

# Filter data for the closest_ras_ids[0]
data_to_plot = storm_data_surge_uncert[storm_data_surge_uncert['ras_id'] == closest_ras_ids[0]]['depth_adj']

# Calculate the PDF using Gaussian Kernel Density Estimation
kde = gaussian_kde(data_to_plot)
x_range = np.linspace(data_to_plot.min(), data_to_plot.max(), 1000)
pdf_values = kde(x_range)

# Plot the PDF
plt.figure(figsize=(10, 6))
plt.plot(x_range, pdf_values, color='blue', label='PDF of depth_adj')
plt.xlabel('Depth Adjusted')
plt.ylabel('Probability Density')
plt.title('PDF of Depth Adjusted for RAS ID {}'.format(closest_ras_ids[0]))
plt.grid(True)
plt.legend()
plt.show()