# Correlating well log pairs: Complex Dynamic Time Warping with boundary constraints

## Introduction to dynamic time warping

In [1]:
# Data manipulation and analysis
import os
import gc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
import glob
from IPython.display import Image as IPImage, display
warnings.filterwarnings('ignore')

from pyCoreRelator import (
    run_comprehensive_dtw_analysis,
    find_complete_core_paths,
    diagnose_chain_breaks,
    calculate_interpolated_ages,
    visualize_combined_segments,
    visualize_dtw_results_from_csv,
    load_log_data,
    plot_core_data,
    plot_dtw_matrix_with_paths,
    plot_correlation_distribution,
    find_best_mappings
)

<hr>

### Test with Cascadia hi-res MS logs

### Define core pairs

In [2]:
# Define core names as variables for easy reference
# CORE_A = "M9907-22PC"
# CORE_B = "M9907-23PC"
CORE_A = "M9907-23PC"
# CORE_B = "M9907-25PC"
# CORE_B = "M9907-12PC"
CORE_B = "RR0207-56PC"

#### Data structures and core images

In [None]:
# Define log columns to extract
# LOG_COLUMNS = ['hiresMS', 'CT', 'R', 'G', 'B']  # Choose which logs to include
# LOG_COLUMNS = ['hiresMS', 'CT', 'Lumin']  # Choose which logs to include
LOG_COLUMNS = ['hiresMS']  # Choose which logs to include
DEPTH_COLUMN = 'SB_DEPTH_cm'

# Define directory paths
mother_dir = '/Users/larryslai/Library/CloudStorage/Dropbox/My Documents/University of Texas Austin/(Project) NWP turbidites/Cascadia_core_data/OSU_dataset/'

# Define paths for Core A
core_a_log_paths = {
    'hiresMS': f'{mother_dir}_compiled_logs/{CORE_A}/ML_filled/{CORE_A}_hiresMS_MLfilled.csv',
    'CT': f'{mother_dir}_compiled_logs/{CORE_A}/ML_filled/{CORE_A}_CT_MLfilled.csv',
    'Lumin': f'{mother_dir}_compiled_logs/{CORE_A}/ML_filled/{CORE_A}_RGB_MLfilled.csv',
    'R': f'{mother_dir}_compiled_logs/{CORE_A}/ML_filled/{CORE_A}_RGB_MLfilled.csv',
    'G': f'{mother_dir}_compiled_logs/{CORE_A}/ML_filled/{CORE_A}_RGB_MLfilled.csv',
    'B': f'{mother_dir}_compiled_logs/{CORE_A}/ML_filled/{CORE_A}_RGB_MLfilled.csv',
    'Den_gm/cc': f'{mother_dir}_compiled_logs/{CORE_A}/ML_filled/{CORE_A}_MST_MLfilled.csv'
}

core_a_rgb_img_path = f"{mother_dir}_compiled_logs/{CORE_A}/{CORE_A}_RGB.tiff"
core_a_ct_img_path = f"{mother_dir}_compiled_logs/{CORE_A}/{CORE_A}_CT.tiff"

# Define paths for Core B
core_b_log_paths = {
    'hiresMS': f'{mother_dir}_compiled_logs/{CORE_B}/ML_filled/{CORE_B}_hiresMS_MLfilled.csv',
    'CT': f'{mother_dir}_compiled_logs/{CORE_B}/ML_filled/{CORE_B}_CT_MLfilled.csv',
    'Lumin': f'{mother_dir}_compiled_logs/{CORE_B}/ML_filled/{CORE_B}_RGB_MLfilled.csv',
    'R': f'{mother_dir}_compiled_logs/{CORE_B}/ML_filled/{CORE_B}_RGB_MLfilled.csv',
    'G': f'{mother_dir}_compiled_logs/{CORE_B}/ML_filled/{CORE_B}_RGB_MLfilled.csv',
    'B': f'{mother_dir}_compiled_logs/{CORE_B}/ML_filled/{CORE_B}_RGB_MLfilled.csv',
    'Den_gm/cc': f'{mother_dir}_compiled_logs/{CORE_B}/ML_filled/{CORE_B}_MST_MLfilled.csv'
}
core_b_rgb_img_path = f"{mother_dir}_compiled_logs/{CORE_B}/{CORE_B}_RGB.tiff"
core_b_ct_img_path = f"{mother_dir}_compiled_logs/{CORE_B}/{CORE_B}_CT.tiff"

# Define column mapping for alternative column names
column_alternatives = {
    'hiresMS': ['MS'],
    'CT': ['CT_value'],
    'R': ['R', 'red', 'Red'],
    'G': ['G', 'green', 'Green'],
    'B': ['B', 'blue', 'Blue'],
    'Lumin': ['luminance', 'Luminance'],
    'Den_gm/cc': ['Density', 'density']
}

# Load data for Core A
log_a, md_a, available_columns_a, rgb_img_a, ct_img_a = load_log_data(
    core_a_log_paths,
    {'rgb': core_a_rgb_img_path, 'ct': core_a_ct_img_path},
    LOG_COLUMNS,
    depth_column=DEPTH_COLUMN,
    normalize=True,
    column_alternatives=column_alternatives
)

print("\n=== DEBUG: Core A Loading ===")
print(f"LOG_COLUMNS requested: {LOG_COLUMNS}")
print(f"Available columns loaded: {available_columns_a}")
print(f"Shape of log_a: {log_a.shape}")
print(f"Type of log_a: {type(log_a)}")
if hasattr(log_a, 'ndim'):
    print(f"log_a dimensions: {log_a.ndim}")
    if log_a.ndim > 1:
        print(f"log_a has {log_a.shape[1]} columns\n")
    else:
        print("log_a is 1D (single column)\n")

# Load data for Core B
log_b, md_b, available_columns_b, rgb_img_b, ct_img_b = load_log_data(
    core_b_log_paths,
    {'rgb': core_b_rgb_img_path, 'ct': core_b_ct_img_path},
    LOG_COLUMNS,
    depth_column=DEPTH_COLUMN,
    normalize=True,
    column_alternatives=column_alternatives
)

print("\n=== DEBUG: Core B Loading ===")
print(f"LOG_COLUMNS requested: {LOG_COLUMNS}")
print(f"Available columns loaded: {available_columns_b}")
print(f"Shape of log_b: {log_b.shape}")
print(f"Type of log_b: {type(log_b)}")
if hasattr(log_b, 'ndim'):
    print(f"log_b dimensions: {log_b.ndim}")
    if log_b.ndim > 1:
        print(f"log_b has {log_b.shape[1]} columns\n")
    else:
        print("log_b is 1D (single column)\n")

In [None]:
%matplotlib inline

# Define paths to the CSV files
pickeddepth_a_csv = f'pickeddepth/{CORE_A}_pickeddepth.csv'
pickeddepth_b_csv = f'pickeddepth/{CORE_B}_pickeddepth.csv'

# Load picked depths and categories from CSV files
if os.path.exists(pickeddepth_b_csv):
    picked_data_b = pd.read_csv(pickeddepth_b_csv)
    # Combine depths and categories into tuples
    picked_b = list(zip(picked_data_b['picked_depths_cm'].values.tolist(), 
                        picked_data_b['category'].values.tolist()))
    print(f"Loaded {len(picked_b)} picked depths for {CORE_B}")
else:
    print(f"Warning: {pickeddepth_b_csv} not found. Using empty list for picked_b.")
    picked_b = []

if os.path.exists(pickeddepth_a_csv):
    picked_data_a = pd.read_csv(pickeddepth_a_csv)
    # Combine depths and categories into tuples
    picked_a = list(zip(picked_data_a['picked_depths_cm'].values.tolist(), 
                        picked_data_a['category'].values.tolist()))
    print(f"Loaded {len(picked_a)} picked depths for {CORE_A}")
else:
    print(f"Warning: {pickeddepth_a_csv} not found. Using empty list for picked_a.")
    picked_a = []

# Create uncertainty arrays (assuming uncertainty size is 2 cm)
picked_uncertainty_b = [1] * len(picked_b)
picked_uncertainty_a = [1] * len(picked_a)

In [None]:
# Extract depths and categories from the loaded tuples
picked_depths_a = [depth for depth, category in picked_a] if picked_a else []
picked_categories_a = [category for depth, category in picked_a] if picked_a else []

picked_depths_b = [depth for depth, category in picked_b] if picked_b else []
picked_categories_b = [category for depth, category in picked_b] if picked_b else []

# Now plot the cores with enhanced plot_core_data function
is_multilog = log_a.ndim > 1 and log_a.shape[1] > 1
fig_a, ax_a = plot_core_data(
    # Input data
    md_a,                                           # depth array
    log_a,                                          # log data array
    f"{CORE_A}",                                    # core name
    # Image data
    rgb_img=rgb_img_a,                              # RGB image array
    ct_img=ct_img_a,                                # CT scan image array
    # Data configuration
    available_columns=available_columns_a,          # available data columns
    is_multilog=is_multilog,                        # multi-column log flag
    # Picked depth data
    picked_depths=picked_depths_a,                  # picked depth values
    picked_categories=picked_categories_a,          # picked categories
    picked_uncertainties=picked_uncertainty_a,      # uncertainty values
    # Visualization settings
    figsize=(20, 4),                                # figure size (width, height)
    show_category=[1],                              # categories to display
    show_bed_number=True                            # show bed numbers flag
)

# Do the same for Core B
is_multilog = log_b.ndim > 1 and log_b.shape[1] > 1
fig_b, ax_b = plot_core_data(
    # Input data
    md_b,                                           # depth array
    log_b,                                          # log data array
    f"{CORE_B}",                                    # core name
    # Image data
    rgb_img=rgb_img_b,                              # RGB image array
    ct_img=ct_img_b,                                # CT scan image array
    # Data configuration
    available_columns=available_columns_b,          # available data columns
    is_multilog=is_multilog,                        # multi-column log flag
    # Picked depth data
    picked_depths=picked_depths_b,                  # picked depth values
    picked_categories=picked_categories_b,          # picked categories
    picked_uncertainties=picked_uncertainty_b,      # uncertainty values
    # Visualization settings
    figsize=(20, 4),                                # figure size (width, height)
    show_category=[1],                              # categories to display
    show_bed_number=True                            # show bed numbers flag
)

plt.tight_layout()
plt.show()

In [6]:
# # OPTIONAL: Extract top 200 cm of log data, images, picked depths and rename cores
# # Comment out this cell to use full core data with original names

# # Define depth cutoff
# DEPTH_CUTOFF = 250.0  # cm

# # First, need to reset the original core names for CSV file loading
# # (since the previous cell may have already renamed them)
# ORIGINAL_CORE_A = "M9907-23PC"
# ORIGINAL_CORE_B = "M9907-25PC"

# # Reload the original log data before clipping
# mother_dir = '/Users/larryslai/Library/CloudStorage/Dropbox/My Documents/University of Texas Austin/(Project) NWP turbidites/Cascadia_core_data/OSU_dataset/'
# # LOG_COLUMNS = ['hiresMS']  
# LOG_COLUMNS = ['hiresMS', 'CT', 'Lumin'] 
# DEPTH_COLUMN = 'SB_DEPTH_cm'

# # Define paths for original cores
# core_a_log_paths = {
#     'hiresMS': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_A}/ML_filled/{ORIGINAL_CORE_A}_hiresMS_MLfilled.csv',
#     'CT': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_A}/ML_filled/{ORIGINAL_CORE_A}_CT_MLfilled.csv',
#     'Lumin': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_A}/ML_filled/{ORIGINAL_CORE_A}_RGB_MLfilled.csv',
#     'R': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_A}/ML_filled/{ORIGINAL_CORE_A}_RGB_MLfilled.csv',
#     'G': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_A}/ML_filled/{ORIGINAL_CORE_A}_RGB_MLfilled.csv',
#     'B': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_A}/ML_filled/{ORIGINAL_CORE_A}_RGB_MLfilled.csv',
#     'Den_gm/cc': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_A}/ML_filled/{ORIGINAL_CORE_A}_MST_MLfilled.csv'
# }

# core_b_log_paths = {
#     'hiresMS': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_B}/ML_filled/{ORIGINAL_CORE_B}_hiresMS_MLfilled.csv',
#     'CT': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_B}/ML_filled/{ORIGINAL_CORE_B}_CT_MLfilled.csv',
#     'Lumin': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_B}/ML_filled/{ORIGINAL_CORE_B}_RGB_MLfilled.csv',
#     'R': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_B}/ML_filled/{ORIGINAL_CORE_B}_RGB_MLfilled.csv',
#     'G': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_B}/ML_filled/{ORIGINAL_CORE_B}_RGB_MLfilled.csv',
#     'B': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_B}/ML_filled/{ORIGINAL_CORE_B}_RGB_MLfilled.csv',
#     'Den_gm/cc': f'{mother_dir}_compiled_logs/{ORIGINAL_CORE_B}/ML_filled/{ORIGINAL_CORE_B}_MST_MLfilled.csv'
# }

# core_a_rgb_img_path = f"{mother_dir}_compiled_logs/{ORIGINAL_CORE_A}/{ORIGINAL_CORE_A}_RGB.tiff"
# core_a_ct_img_path = f"{mother_dir}_compiled_logs/{ORIGINAL_CORE_A}/{ORIGINAL_CORE_A}_CT.tiff"
# core_b_rgb_img_path = f"{mother_dir}_compiled_logs/{ORIGINAL_CORE_B}/{ORIGINAL_CORE_B}_RGB.tiff"
# core_b_ct_img_path = f"{mother_dir}_compiled_logs/{ORIGINAL_CORE_B}/{ORIGINAL_CORE_B}_CT.tiff"

# column_alternatives = {
#     'hiresMS': ['MS'],
#     'CT': ['CT_value'],
#     'R': ['R', 'red', 'Red'],
#     'G': ['G', 'green', 'Green'],
#     'B': ['B', 'blue', 'Blue'],
#     'Lumin': ['luminance', 'Luminance'],
#     'Den_gm/cc': ['Density', 'density']
# }

# # Reload original full data (with normalize=False to get raw data)
# log_a_orig, md_a_orig, available_columns_a, rgb_img_a_orig, ct_img_a_orig = load_log_data(
#     core_a_log_paths,
#     {'rgb': core_a_rgb_img_path, 'ct': core_a_ct_img_path},
#     LOG_COLUMNS,
#     depth_column=DEPTH_COLUMN,
#     normalize=False,
#     column_alternatives=column_alternatives
# )

# log_b_orig, md_b_orig, available_columns_b, rgb_img_b_orig, ct_img_b_orig = load_log_data(
#     core_b_log_paths,
#     {'rgb': core_b_rgb_img_path, 'ct': core_b_ct_img_path},
#     LOG_COLUMNS,
#     depth_column=DEPTH_COLUMN,
#     normalize=False,
#     column_alternatives=column_alternatives
# )

# # Extract top 200 cm for Core A
# cutoff_mask_a = md_a_orig <= DEPTH_CUTOFF
# log_a_raw = log_a_orig[cutoff_mask_a]
# md_a = md_a_orig[cutoff_mask_a]

# # Extract top 200 cm for Core B
# cutoff_mask_b = md_b_orig <= DEPTH_CUTOFF
# log_b_raw = log_b_orig[cutoff_mask_b]
# md_b = md_b_orig[cutoff_mask_b]

# # Renormalize the clipped log signals
# from sklearn.preprocessing import MinMaxScaler

# # Initialize scaler
# scaler = MinMaxScaler()

# # For multi-column data
# if log_a_raw.ndim > 1:
#     log_a = scaler.fit_transform(log_a_raw)
# else:
#     log_a = scaler.fit_transform(log_a_raw.reshape(-1, 1)).flatten()

# if log_b_raw.ndim > 1:
#     log_b = scaler.fit_transform(log_b_raw)
# else:
#     log_b = scaler.fit_transform(log_b_raw.reshape(-1, 1)).flatten()

# # Clip images proportionally
# def clip_image(img, max_depth_orig, depth_cutoff):
#     if img is not None:
#         clip_ratio = min(1.0, depth_cutoff / max_depth_orig)
#         if img.ndim == 3:  # Color image
#             clip_height = int(img.shape[0] * clip_ratio)
#             return img[:clip_height, :, :]
#         elif img.ndim == 2:  # Grayscale image
#             clip_height = int(img.shape[0] * clip_ratio)
#             return img[:clip_height, :]
#     return img

# # Clip images for both cores
# rgb_img_a = clip_image(rgb_img_a_orig, md_a_orig.max(), DEPTH_CUTOFF)
# ct_img_a = clip_image(ct_img_a_orig, md_a_orig.max(), DEPTH_CUTOFF)
# rgb_img_b = clip_image(rgb_img_b_orig, md_b_orig.max(), DEPTH_CUTOFF)
# ct_img_b = clip_image(ct_img_b_orig, md_b_orig.max(), DEPTH_CUTOFF)

# # Reload picked depths using original core names
# pickeddepth_a_csv = f'pickeddepth/{ORIGINAL_CORE_A}_pickeddepth.csv'
# pickeddepth_b_csv = f'pickeddepth/{ORIGINAL_CORE_B}_pickeddepth.csv'

# # Load and filter picked depths for Core A
# if os.path.exists(pickeddepth_a_csv):
#     picked_data_a = pd.read_csv(pickeddepth_a_csv)
#     # Filter to depths within cutoff
#     picked_data_a_filtered = picked_data_a[picked_data_a['picked_depths_cm'] <= DEPTH_CUTOFF]
#     picked_a = list(zip(picked_data_a_filtered['picked_depths_cm'].values.tolist(), 
#                         picked_data_a_filtered['category'].values.tolist()))
#     print(f"Loaded {len(picked_a)} picked depths (≤{DEPTH_CUTOFF} cm) for {ORIGINAL_CORE_A}")
# else:
#     print(f"Warning: {pickeddepth_a_csv} not found. Using empty list for picked_a.")
#     picked_a = []

# # Load and filter picked depths for Core B
# if os.path.exists(pickeddepth_b_csv):
#     picked_data_b = pd.read_csv(pickeddepth_b_csv)
#     # Filter to depths within cutoff
#     picked_data_b_filtered = picked_data_b[picked_data_b['picked_depths_cm'] <= DEPTH_CUTOFF]
#     picked_b = list(zip(picked_data_b_filtered['picked_depths_cm'].values.tolist(), 
#                         picked_data_b_filtered['category'].values.tolist()))
#     print(f"Loaded {len(picked_b)} picked depths (≤{DEPTH_CUTOFF} cm) for {ORIGINAL_CORE_B}")
# else:
#     print(f"Warning: {pickeddepth_b_csv} not found. Using empty list for picked_b.")
#     picked_b = []

# # Update uncertainty arrays
# picked_uncertainty_a = [1] * len(picked_a)
# picked_uncertainty_b = [1] * len(picked_b)

# # Extract filtered depths and categories
# picked_depths_a = [depth for depth, category in picked_a] if picked_a else []
# picked_categories_a = [category for depth, category in picked_a] if picked_a else []
# picked_depths_b = [depth for depth, category in picked_b] if picked_b else []
# picked_categories_b = [category for depth, category in picked_b] if picked_b else []

# # Rename cores
# CORE_A = "Core_A"
# CORE_B = "Core_B"

# print(f"\n=== TOP 200 CM EXTRACTION COMPLETE ===")
# print(f"Core A: Extracted {len(md_a)} data points (0 to {md_a.max():.1f} cm)")
# print(f"Core B: Extracted {len(md_b)} data points (0 to {md_b.max():.1f} cm)")
# print(f"Cores renamed to: {CORE_A} and {CORE_B}")
# print(f"Log A shape after clipping and renormalization: {log_a.shape}")
# print(f"Log B shape after clipping and renormalization: {log_b.shape}")
# print(f"Log A range after renormalization: [{log_a.min():.3f}, {log_a.max():.3f}]")
# print(f"Log B range after renormalization: [{log_b.min():.3f}, {log_b.max():.3f}]")
# print(f"Picked depths A: {len(picked_depths_a)} depths")
# print(f"Picked depths B: {len(picked_depths_b)} depths")
# if rgb_img_a is not None:
#     print(f"RGB image A shape after clipping: {rgb_img_a.shape}")
# if rgb_img_b is not None:
#     print(f"RGB image B shape after clipping: {rgb_img_b.shape}")
# if ct_img_a is not None:
#     print(f"CT image A shape after clipping: {ct_img_a.shape}")
# if ct_img_b is not None:
#     print(f"CT image B shape after clipping: {ct_img_b.shape}")
# print("=== Comment out this cell to use full core data ===\n")


<hr>

# Usage Examples and Executions

In [7]:
# Extract a subset of picked depths of category 1 for both cores
all_depths_a_cat1 = np.array([depth for depth, category in picked_a if category == 1]).astype('float32')
all_depths_b_cat1 = np.array([depth for depth, category in picked_b if category == 1]).astype('float32')

#### extract ages

In [8]:
def load_age_constraints(core_name, consider_adjacent_core=False):
    """
    Load age constraints for a specific core, optionally including data from adjacent cores.
    
    Args:
        core_name: Name of the core to load data for
        consider_adjacent_core: If True, also load data from cores with similar names
    
    Returns:
        Dictionary containing all age constraint data
    """
    base_path = '/Users/larryslai/Library/CloudStorage/Dropbox/My Documents/University of Texas Austin/(Project) NWP turbidites/Cascadia_core_data/Age constraints/Goldfinger2012'
    csv_files = []
    
    # Add primary core CSV
    primary_csv = f'{base_path}/{core_name}_age.csv'
    csv_files.append(primary_csv)
    
    # Add adjacent core CSVs if specified
    if consider_adjacent_core:
        # Get base part of core name (without last two characters)
        core_base = core_name[:-2]
        # Look for similar core names in the directory
        if os.path.exists(base_path):
            for file in os.listdir(base_path):
                if file.endswith('_age.csv') and file.startswith(f'{core_base}'):
                    potential_core = file.split('_age.csv')[0]
                    if potential_core != core_name:  # Skip the primary core
                        csv_files.append(f'{base_path}/{file}')
    
    # Initialize result containers
    all_data = pd.DataFrame()
    result = {
        'depths': [],
        'ages': [],
        'pos_errors': [],
        'neg_errors': [],
        'in_sequence_flags': [],
        'in_sequence_depths': [],
        'in_sequence_ages': [],
        'in_sequence_pos_errors': [],
        'in_sequence_neg_errors': [],
        'out_sequence_depths': [],
        'out_sequence_ages': [],
        'out_sequence_pos_errors': [],
        'out_sequence_neg_errors': [],
        'core': [],
        'interpreted_bed': []
    }
    
    # Define required columns
    required_columns = ['calib502_agebp', 'calib502_2sigma_pos', 'calib502_2sigma_neg', 
                      'mindepth_cm', 'maxdepth_cm', 'in_sequence', 'core', 'interpreted_bed']
    
    # Process each CSV file
    loaded_files = 0
    for csv_file in csv_files:
        if os.path.exists(csv_file):
            data = pd.read_csv(csv_file)
            # Filter rows with all required columns available
            for col in required_columns:
                data = data.dropna(subset=[col])
            
            all_data = pd.concat([all_data, data])
            loaded_files += 1
    
    if loaded_files > 0:
        print(f"Loaded {len(all_data)} age constraints for {core_name}")
        
        # Sort by age if multiple cores were combined
        if consider_adjacent_core:
            all_data = all_data.sort_values(by='mindepth_cm')
        
        # Extract all age constraints
        result['depths'] = (all_data['mindepth_cm'] + all_data['maxdepth_cm']) / 2
        result['ages'] = all_data['calib502_agebp'].tolist()
        result['pos_errors'] = all_data['calib502_2sigma_pos'].tolist()
        result['neg_errors'] = all_data['calib502_2sigma_neg'].tolist()
        result['in_sequence_flags'] = all_data['in_sequence'].tolist()
        result['core'] = all_data['core'].tolist()
        result['interpreted_bed'] = all_data['interpreted_bed'].tolist()
        
        # Separate in-sequence and out-of-sequence constraints
        for i in range(len(result['in_sequence_flags'])):
            if result['in_sequence_flags'][i] == 1:
                result['in_sequence_depths'].append(result['depths'].iloc[i] if isinstance(result['depths'], pd.Series) else result['depths'][i])
                result['in_sequence_ages'].append(result['ages'][i])
                result['in_sequence_pos_errors'].append(result['pos_errors'][i])
                result['in_sequence_neg_errors'].append(result['neg_errors'][i])
            else:
                result['out_sequence_depths'].append(result['depths'].iloc[i] if isinstance(result['depths'], pd.Series) else result['depths'][i])
                result['out_sequence_ages'].append(result['ages'][i])
                result['out_sequence_pos_errors'].append(result['pos_errors'][i])
                result['out_sequence_neg_errors'].append(result['neg_errors'][i])
    else:
        print(f"Warning: No age constraint files found for {core_name}")
    
    return result

In [None]:
# Load age constraints for both cores
consider_adjacent_core = True
age_data_a = load_age_constraints(CORE_A, consider_adjacent_core)
age_data_b = load_age_constraints(CORE_B, consider_adjacent_core)

In [None]:
# Calculate interpolated ages for Core A using the function
pickeddepth_ages_a = calculate_interpolated_ages(
    # Input data
    picked_depths=all_depths_a_cat1,                                     # depths to interpolate ages for
    age_constraints_depths=age_data_a['depths'],                         # age constraint depths
    age_constraints_ages=age_data_a['ages'],                             # age constraint ages
    age_constraints_pos_errors=age_data_a['pos_errors'],                 # positive errors
    age_constraints_neg_errors=age_data_a['neg_errors'],                 # negative errors
    age_constraints_in_sequence_flags=age_data_a['in_sequence_flags'],   # in-sequence flags
    age_constraint_source_core=age_data_a['core'],                       # source core for each constraint
    # Core boundaries
    top_bottom=True,                                                     # include top and bottom depths/ages
    top_depth=0.0,                                                       # top of core depth
    bottom_depth=md_a[-1],                                               # max depth of core a
    top_age=0,                                                           # default age at top of core
    top_age_pos_error=75,                                                # default positive uncertainty of top age
    top_age_neg_error=75,                                                # default negative uncertainty of top age
    # Uncertainty calculation
    uncertainty_method='MonteCarlo',                                     # uncertainty calculation method
    n_monte_carlo=10000,                                                 # number of Monte Carlo iterations
    # Visualization and output
    show_plot=True,                                                      # display plot
    core_name=CORE_A,                                                    # core name for plot title
    export_csv=True                                                      # export results to CSV
)

# Print the age constraint data for Core A
print("\nAge Constraints for Core A:")
if len(age_data_a['depths']) > 0:
    for i in range(len(age_data_a['depths'])):
        depth_val = age_data_a['depths'].iloc[i] if isinstance(age_data_a['depths'], pd.Series) else age_data_a['depths'][i]
        age_val = age_data_a['ages'][i]
        pos_err_val = age_data_a['pos_errors'][i]
        neg_err_val = age_data_a['neg_errors'][i]
        in_seq = age_data_a['in_sequence_flags'][i]
        
        # Add source core and interpreted bed info if they exist
        source_core_info = f", Source Core: {age_data_a['core'][i]}" if i < len(age_data_a['core']) else ""
        bed_info = f", Interpreted Bed: {age_data_a['interpreted_bed'][i]}" if i < len(age_data_a['interpreted_bed']) else ""
        
        print(f"Depth: {depth_val:.2f} cm, Age: {age_val:.1f} years BP (+{pos_err_val:.1f} ; -{neg_err_val:.1f}), In Sequence: {in_seq}{source_core_info}{bed_info}")
else:
    print(f"No age constraints available in {CORE_A}")

# Print the interpolated ages
print(f"\nEstimated Ages for picked depths in {CORE_A}:")
for i, depth in enumerate(pickeddepth_ages_a['depths']):
    print(f"Depth: {depth:.2f} cm, Age: {pickeddepth_ages_a['ages'][i]:.1f} years BP (+{pickeddepth_ages_a['pos_uncertainties'][i]:.1f} ; -{pickeddepth_ages_a['neg_uncertainties'][i]:.1f})")

In [None]:
# Calculate interpolated ages for Core B using the function
pickeddepth_ages_b = calculate_interpolated_ages(
    # Input data
    picked_depths=all_depths_b_cat1,                                     # depths to interpolate ages for
    age_constraints_depths=age_data_b['depths'],                         # age constraint depths
    age_constraints_ages=age_data_b['ages'],                             # age constraint ages
    age_constraints_pos_errors=age_data_b['pos_errors'],                 # positive errors
    age_constraints_neg_errors=age_data_b['neg_errors'],                 # negative errors
    age_constraints_in_sequence_flags=age_data_b['in_sequence_flags'],   # in-sequence flags
    age_constraint_source_core=age_data_b['core'],                       # source core for each constraint
    # Core boundaries
    top_bottom=True,                                                     # include top and bottom depths/ages
    top_depth=0.0,                                                       # top of core depth
    bottom_depth=md_b[-1],                                               # max depth of core b
    top_age=0,                                                           # default age at top of core
    top_age_pos_error=75,                                                # default positive uncertainty of top age
    top_age_neg_error=75,                                                # default negative uncertainty of top age
    # Uncertainty calculation
    uncertainty_method='MonteCarlo',                                     # 'MonteCarlo', 'Linear', or 'Gaussian'
    n_monte_carlo=10000,                                                 # number of Monte Carlo sampling iterations
    # Visualization and output
    show_plot=True,                                                      # display plot
    core_name=CORE_B,                                                    # core name for plot title
    export_csv=True                                                      # export results to CSV
)

# Print the age constraint data for Core B
print("\nAge Constraints for Core B:")
if len(age_data_b['depths']) > 0:
    for i in range(len(age_data_b['depths'])):
        depth_val = age_data_b['depths'].iloc[i] if isinstance(age_data_b['depths'], pd.Series) else age_data_b['depths'][i]
        age_val = age_data_b['ages'][i]
        pos_err_val = age_data_b['pos_errors'][i]
        neg_err_val = age_data_b['neg_errors'][i]
        in_seq = age_data_b['in_sequence_flags'][i]
        
        # Add source core and interpreted bed info if they exist
        source_core_info = f", Source Core: {age_data_b['core'][i]}" if i < len(age_data_b['core']) else ""
        bed_info = f", Interpreted Bed: {age_data_b['interpreted_bed'][i]}" if i < len(age_data_b['interpreted_bed']) else ""
        
        print(f"Depth: {depth_val:.2f} cm, Age: {age_val:.1f} years BP (+{pos_err_val:.1f} ; -{neg_err_val:.1f}), In Sequence: {in_seq}{source_core_info}{bed_info}")
else:
    print(f"No age constraints available in {CORE_B}")

print(f"\nEstimated Ages for picked depths in {CORE_B}:")
for i, depth in enumerate(pickeddepth_ages_b['depths']):
    print(f"Depth: {depth:.2f} cm, Age: {pickeddepth_ages_b['ages'][i]:.1f} years BP (+{pickeddepth_ages_b['pos_uncertainties'][i]:.1f} ; -{pickeddepth_ages_b['neg_uncertainties'][i]:.1f})")

In [None]:
# Load the age data from CSV files

# Load age data for Core A
core_a_age_csv = f"{CORE_A}_pickeddepth_age.csv"
if os.path.exists(core_a_age_csv):
    df_ages_a = pd.read_csv(core_a_age_csv)
    pickeddepth_ages_a = {
        'depths': df_ages_a['picked_depths_cm'].values.astype('float32').tolist(),
        'ages': df_ages_a['est_age'].values.astype('float32').tolist(),
        'pos_uncertainties': df_ages_a['est_age_poserr'].values.astype('float32').tolist(),
        'neg_uncertainties': df_ages_a['est_age_negerr'].values.astype('float32').tolist()
    }
    print(f"Loaded age data for {CORE_A} from CSV file")
else:
    print(f"Warning: Could not find age data CSV for {CORE_A}")

# Load age data for Core B
core_b_age_csv = f"{CORE_B}_pickeddepth_age.csv"
if os.path.exists(core_b_age_csv):
    df_ages_b = pd.read_csv(core_b_age_csv)
    pickeddepth_ages_b = {
        'depths': df_ages_b['picked_depths_cm'].values.astype('float32').tolist(),
        'ages': df_ages_b['est_age'].values.astype('float32').tolist(),
        'pos_uncertainties': df_ages_b['est_age_poserr'].values.astype('float32').tolist(),
        'neg_uncertainties': df_ages_b['est_age_negerr'].values.astype('float32').tolist()
    }
    print(f"Loaded age data for {CORE_B} from CSV file")
else:
    print(f"Warning: Could not find age data CSV for {CORE_B}")


#### Find out all segment pairs among boundaries

In [13]:
# Define file names for age consideration or not
age_consideration=True
restricted_age_correlation=True
shortest_path_search=True

if age_consideration:
    if restricted_age_correlation:
        YES_NO_AGE = 'restricted_age'
    else:
        YES_NO_AGE = 'loose_age'
else:
    YES_NO_AGE = 'no_age'

if shortest_path_search:
    SEARCH_METHOD = 'optimal'
else:
    SEARCH_METHOD = 'random'

# Define whether to use independent DTW or not
independent_dtw=False # If False (default), it performs dependent DTW. If True, it performs independent DTW.

In [None]:
%matplotlib inline

# Example usage:
# Set picked_depths_a and picked_depths_b to None to use auto-segmentation

# Define the folder path
frames_folder = "outputs/SegmentPair_DTW_frames"

# Check if the folder exists
if os.path.exists(frames_folder):
    # Get all PNG files in the folderf d
    png_files = glob.glob(os.path.join(frames_folder, "*.png"))
    
    # Delete each PNG file
    for png_file in png_files:
        try:
            os.remove(png_file)
            # print(f"Deleted: {png_file}")
        except Exception as e:
            print(f"Error deleting {png_file}: {e}")
    
    print(f"Cleaned up {len(png_files)} PNG files from {frames_folder}")
else:
    print(f"Folder '{frames_folder}' does not exist. Creating it...")
    os.makedirs(frames_folder, exist_ok=True)
    
# Run comprehensive DTW analysis
dtw_results, valid_dtw_pairs, segments_a, segments_b, depth_boundaries_a, depth_boundaries_b, dtw_distance_matrix_full = run_comprehensive_dtw_analysis(
    # Input data
    log_a,                                                      # Core A log data
    log_b,                                                      # Core B log data
    md_a,                                                       # Core A measured depth
    md_b,                                                       # Core B measured depth
    picked_depths_a=all_depths_a_cat1,                         # Selected depths for core A
    picked_depths_b=all_depths_b_cat1,                         # Selected depths for core B
    core_a_name=CORE_A,                                        # Name identifier for core A
    core_b_name=CORE_B,                                        # Name identifier for core B
    # Analysis parameters
    top_bottom=True,                                            # Include top and bottom boundaries
    top_depth=0.0,                                              # Starting depth for analysis
    independent_dtw=independent_dtw,                            # Use independent DTW if True
    exclude_deadend=True,                                       # Exclude dead-end segments
    # Age constraints
    age_consideration=age_consideration,                        # Include age constraints
    ages_a=pickeddepth_ages_a,                                  # Age data for core A depths
    ages_b=pickeddepth_ages_b,                                  # Age data for core B depths
    restricted_age_correlation=restricted_age_correlation,      # Use strict age correlation
    all_constraint_ages_a=age_data_a['in_sequence_ages'],      # All age constraints for core A
    all_constraint_ages_b=age_data_b['in_sequence_ages'],      # All age constraints for core B
    all_constraint_depths_a=age_data_a['in_sequence_depths'],  # All depth constraints for core A
    all_constraint_depths_b=age_data_b['in_sequence_depths'],  # All depth constraints for core B
    all_constraint_pos_errors_a=age_data_a['in_sequence_pos_errors'], # Positive age errors for core A
    all_constraint_pos_errors_b=age_data_b['in_sequence_pos_errors'], # Positive age errors for core B
    all_constraint_neg_errors_a=age_data_a['in_sequence_neg_errors'], # Negative age errors for core A
    all_constraint_neg_errors_b=age_data_b['in_sequence_neg_errors'], # Negative age errors for core B
    age_constraint_a_source_cores=age_data_a['core'],          # Source cores for age constraints A
    age_constraint_b_source_cores=age_data_b['core'],          # Source cores for age constraints B
    # Visualization
    visualize_pairs=True,                                       # Create pair visualizations
    visualize_segment_labels=False,                             # Show segment labels in plots
    create_dtw_matrix=True,                                     # Generate DTW distance matrix
    dtwmatrix_output_filename=f'SegmentPair_DTW_matrix_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.png', # Matrix plot filename
    creategif=True,                                             # Create animated GIF
    gif_output_filename=f'SegmentPair_DTW_animation_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.gif', # GIF filename
    max_frames=50,                                              # Maximum frames in animation
    color_interval_size=5,                                      # Color coding interval size
    keep_frames=True,                                           # Save individual frames
    # Debug and processing
    debug=False                                                 # Enable debug output
)

In [None]:
diagnostic_result = diagnose_chain_breaks(
    # Input data
    valid_dtw_pairs,                                        # Valid DTW segment pairs from analysis
    segments_a,                                             # Segment definitions for core A
    segments_b,                                             # Segment definitions for core Bss
    depth_boundaries_a,                                     # Depth boundaries for core A segments
    depth_boundaries_b                                      # Depth boundaries for core B segments
)

<hr>

## Search complete DTW paths

In [None]:
complete_path_search_result = find_complete_core_paths(
    # Input data
    valid_dtw_pairs,                                                                # Valid DTW segment pairs from analysis
    segments_a,                                                                     # Segment definitions for core A
    segments_b,                                                                     # Segment definitions for core B
    log_a,                                                                          # Log data for core A
    log_b,                                                                          # Log data for core B
    depth_boundaries_a,                                                             # Depth boundaries for core A segments
    depth_boundaries_b,                                                             # Depth boundaries for core B segments
    dtw_results,                                                                    # DTW analysis results
    # Output settings
    output_csv=f"mappings_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.csv",     # Output CSV filename for mappings
    # Search parameters
    start_from_top_only=True,                                                       # Start path search from top segments only
    shortest_path_search=shortest_path_search,                                      # Use shortest path search algorithm
    shortest_path_level=2,                                                          # Path level preference (higher = more segments)
    max_search_path=100000,                                                         # Maximum paths per segment pair to avoid memory issues
    # Processing settings
    batch_size=1000,                                                                # Processing batch size
    n_jobs=-1,                                                                      # Number of CPU cores (-1 uses all available)
    debug=False                                                                     # Enable debug output
)

In [None]:
%matplotlib inline

correlation_save_path=f'CombinedDTW_correlation_mappings_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.gif'
matrix_save_path=f'CombinedDTW_matrix_mappings_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.gif'

# 1. First, read all available mappings from a CSV (assuming it was created by find_all_sequential_mappings)
sequential_mappings_csv = f"outputs/mappings_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.csv"

# 3. Visualize a representative subset of the mappings
visualize_dtw_results_from_csv(
    # Input data
    sequential_mappings_csv,                                                        # CSV file with sequential mappings
    log_a,                                                                          # Log data for core A
    log_b,                                                                          # Log data for core B
    md_a,                                                                           # Measured depth data for core A
    md_b,                                                                           # Measured depth data for core B
    dtw_results,                                                                    # DTW analysis results
    valid_dtw_pairs,                                                                # Valid DTW segment pairs
    segments_a,                                                                     # Segment definitions for core A
    segments_b,                                                                     # Segment definitions for core B
    depth_boundaries_a,                                                             # Depth boundaries for core A segments
    depth_boundaries_b,                                                             # Depth boundaries for core B segments
    dtw_distance_matrix_full,                                                       # Full DTW distance matrix
    # Core identifiers
    core_a_name=CORE_A,                                                             # Name identifier for core A
    core_b_name=CORE_B,                                                             # Name identifier for core B
    # Visualization settings
    color_interval_size=5,                                                          # Color interval size for visualization
    debug=False,                                                                    # Enable debug output
    visualize_pairs=False,                                                          # Show DTW pairs in visualization
    visualize_segment_labels=False,                                                 # Show segment labels in visualization
    mark_depths=True,                                                               # Mark depth points in visualization
    # GIF output settings
    creategif=True,                                                                 # Create animated GIF output
    correlation_gif_output_filename=correlation_save_path,                          # Output filename for correlation GIF
    matrix_gif_output_filename=matrix_save_path,                                    # Output filename for matrix GIF
    max_frames=50,                                                                  # Maximum number of frames in GIF
    keep_frames=True,                                                               # Keep individual frames after GIF creation
    # Age constraints
    mark_ages=age_consideration,                                                    # Mark age constraints in visualization
    ages_a=pickeddepth_ages_a,                                                      # Age data for core A
    ages_b=pickeddepth_ages_b,                                                      # Age data for core B
    all_constraint_depths_a=age_data_a['in_sequence_depths'],                       # Depth constraints for core A
    all_constraint_depths_b=age_data_b['in_sequence_depths'],                       # Depth constraints for core B
    all_constraint_ages_a=age_data_a['in_sequence_ages'],                           # Age constraints for core A
    all_constraint_ages_b=age_data_b['in_sequence_ages'],                           # Age constraints for core B
    all_constraint_pos_errors_a=age_data_a['in_sequence_pos_errors'],               # Positive age errors for core A
    all_constraint_pos_errors_b=age_data_b['in_sequence_pos_errors'],               # Positive age errors for core B
    all_constraint_neg_errors_a=age_data_a['in_sequence_neg_errors'],               # Negative age errors for core A
    all_constraint_neg_errors_b=age_data_b['in_sequence_neg_errors'],               # Negative age errors for core B
    age_constraint_a_source_cores=age_data_a['core'],                               # Source cores for age constraints A
    age_constraint_b_source_cores=age_data_b['core']                                # Source cores for age constraints B
)

# Display the GIFs
print("DTW Correlation Mappings GIF:")
display(IPImage(f"outputs/{correlation_save_path}"))

print("DTW Matrix Mappings GIF:")
display(IPImage(f"outputs/{matrix_save_path}"))

In [None]:
# Load the saved DTW results
sequential_mappings_csv = f'outputs/mappings_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.csv'
output_matrix_png_filename = f'CombinedDTW_matrix_mappings_colored_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.png'

%matplotlib inline

_ = plot_dtw_matrix_with_paths(
    # Input data
    dtw_distance_matrix_full,                                                       # Full DTW distance matrix
    sequential_mappings_csv=sequential_mappings_csv,                                # CSV file with sequential mappings
    # Core identifiers
    core_a_name=CORE_A,                                                             # Name identifier for core A
    core_b_name=CORE_B,                                                             # Name identifier for core B
    md_a=md_a,                                                                      # Metadata for core A
    md_b=md_b,                                                                      # Metadata for core B
    # Visualization settings
    mode='all_paths_colored',                                                       # Visualization mode
    color_metric='perc_diag',                                                       # Metric used for coloring paths
                                                                                    # Available options: 'corr_coef', 'norm_dtw', 'dtw_ratio', 'perc_diag', 
                                                                                    # 'variance_deviation', 'match_min', 'match_mean', 'perc_age_overlap', None (uses mapping_id)
    output_filename=output_matrix_png_filename,                                     # Output filename for the plot
    # Age constraint data
    age_constraint_a_depths=age_data_a['in_sequence_depths'] if age_consideration else None,  # Depth constraints for core A
    age_constraint_a_ages=age_data_a['in_sequence_ages'] if age_consideration else None,      # Age constraints for core A
    age_constraint_a_source_cores=age_data_a['core'] if age_consideration else None,          # Source cores for age constraints A
    age_constraint_b_depths=age_data_b['in_sequence_depths'] if age_consideration else None,  # Depth constraints for core B
    age_constraint_b_ages=age_data_b['in_sequence_ages'] if age_consideration else None,      # Age constraints for core B
    age_constraint_b_source_cores=age_data_b['core'] if age_consideration else None,          # Source cores for age constraints B
    # Performance settings
    n_jobs=-1                                                                       # Number of parallel jobs (-1 means use all processors)
)

In [None]:
# Use custom metric weights
# Load the DTW results
sequential_mappings_csv = f'outputs/mappings_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.csv'
dtw_results_df = pd.read_csv(sequential_mappings_csv)

custom_weights = {
    'corr_coef': 3.0,
    'perc_diag': 1.0,
    'norm_dtw': 1.0,
    'dtw_ratio': 0.0,
    'perc_age_overlap': 1.0,
    'wrapping_deviation': 0.0,
    'mean_matching_function': 0.0
}

top_mapping_ids, top_mapping_pairs, top_mappings_df = find_best_mappings(
    csv_file_path=sequential_mappings_csv,
    top_n=5,
    filter_shortest_dtw=True,
    metric_weight=custom_weights
)


In [None]:
%matplotlib inline

visualize_pairs=False

if visualize_pairs:
    visualize_type='pairs'
    visualize_segment_labels=True
    mark_depths=True
else:
    visualize_type='fullpath'
    visualize_segment_labels=False
    mark_depths=False

# Visualize the combined segments
_, _, _, _ = visualize_combined_segments(
    # Input data
    log_a=log_a,                                # Core A log data
    log_b=log_b,                                # Core B log data
    md_a=md_a,                                  # Core A measured depths
    md_b=md_b,                                  # Core B measured depths
    dtw_results=dtw_results,                    # DTW alignment results
    valid_dtw_pairs=valid_dtw_pairs,            # Valid DTW pairs
    segments_a=segments_a,                      # Core A segments
    segments_b=segments_b,                      # Core B segments
    depth_boundaries_a=depth_boundaries_a,      # Core A depth boundaries
    depth_boundaries_b=depth_boundaries_b,      # Core B depth boundaries
    dtw_distance_matrix_full=dtw_distance_matrix_full,       # Full DTW distance matrix
    segment_pairs_to_combine=top_mapping_pairs[0],         # Valid pairs to combine
    # Visualization options
    color_interval_size=5,                      # Size of color intervals
    visualize_pairs=visualize_pairs,            # Whether to visualize pairs (True/False)
    visualize_segment_labels=visualize_segment_labels, # Whether to show segment labels (True/False)
    mark_depths=mark_depths,                    # Whether to mark depths (True/False)
    # Output paths
    correlation_save_path=f'CombinedDTW_correlation_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}_{top_mapping_ids[0]}_{visualize_type}.png',
    matrix_save_path=f'CombinedDTW_matrix_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}_{top_mapping_ids[0]}_{visualize_type}.png',
    # Age constraint parameters
    mark_ages=age_consideration,                # Whether to mark ages (True/False)
    ages_a=pickeddepth_ages_a if age_consideration else None, # Core A ages
    ages_b=pickeddepth_ages_b if age_consideration else None, # Core B ages
    all_constraint_ages_a=age_data_a['in_sequence_ages'] if age_consideration else None, # Core A constraint ages
    all_constraint_ages_b=age_data_b['in_sequence_ages'] if age_consideration else None, # Core B constraint ages
    all_constraint_depths_a=age_data_a['in_sequence_depths'] if age_consideration else None, # Core A constraint depths
    all_constraint_depths_b=age_data_b['in_sequence_depths'] if age_consideration else None, # Core B constraint depths
    all_constraint_pos_errors_a=age_data_a['in_sequence_pos_errors'] if age_consideration else None, # Core A positive errors
    all_constraint_pos_errors_b=age_data_b['in_sequence_pos_errors'] if age_consideration else None, # Core B positive errors
    all_constraint_neg_errors_a=age_data_a['in_sequence_neg_errors'] if age_consideration else None, # Core A negative errors
    all_constraint_neg_errors_b=age_data_b['in_sequence_neg_errors'] if age_consideration else None, # Core B negative errors
    age_constraint_a_source_cores=age_data_a['core'] if age_consideration else None, # Core A source cores
    age_constraint_b_source_cores=age_data_b['core'] if age_consideration else None, # Core B source cores
    # Core identifiers
    core_a_name=CORE_A,                         # Name of Core A
    core_b_name=CORE_B                          # Name of Core B
)

In [None]:
# Available quality indices: 'corr_coef', 'norm_dtw', 'dtw_ratio', 'perc_diag', 'variance_deviation', 'match_min', 'match_mean', 'perc_age_overlap'
targeted_quality_index = 'corr_coef' 

# Example usage:
plot_correlation_distribution(
    # Input parameters
    csv_file=f'outputs/mappings_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.csv',  # Path to mappings CSV file
    target_mapping_id=top_mapping_ids[0],                                              # ID of mapping to analyze
    quality_index=targeted_quality_index,                                             # Quality metric to plot
    # Histogram parameters
    no_bins=None,                                                                     # Number of bins (auto if None)
    # Output parameters
    save_png=True,                                                                    # Whether to save plot as PNG
    png_filename=f'{"r-values" if targeted_quality_index == "corr_coef" else targeted_quality_index}_distribution_{CORE_A}_{CORE_B}_{YES_NO_AGE}_{SEARCH_METHOD}.png',  # Output filename
    # Distribution fitting parameters
    pdf_method='normal',                                                              # PDF fitting method: 'KDE', 'skew-normal', or 'normal'
    kde_bandwidth=0.05                                                                # Bandwidth for KDE method
)