# Correlating well log pairs: Complex Dynamic Time Warping with boundary constraints

## Introduction to dynamic time warping

In [None]:
# Data manipulation and analysis
import os
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from IPython.display import Image as IPImage, display
warnings.filterwarnings('ignore')

from pyCoreRelator import (
    run_comprehensive_dtw_analysis,
    find_complete_core_paths,
    diagnose_chain_breaks,
    calculate_interpolated_ages,
    load_pickeddepth_ages_from_csv,
    visualize_combined_segments,
    visualize_dtw_results_from_csv,
    load_core_log_data,
    plot_dtw_matrix_with_paths,
    plot_correlation_distribution,
    find_best_mappings,
    load_core_age_constraints
)

## Extract and show each category of picked depths ###
%matplotlib inline

<hr>

### Test with Cascadia hi-res MS logs

### Define core pairs

In [None]:
CORE_A = "M9907-25PC"
# CORE_A = "M9907-23PC"

In [None]:
CORE_B = "M9907-23PC"
# CORE_B = "M9907-11PC"

#### Data structures and core images

In [None]:
# Define log columns to extract
# LOG_COLUMNS = ['hiresMS']  # Choose which logs to include
LOG_COLUMNS = ['hiresMS', 'CT', 'Lumin']  # Choose which logs to include
DEPTH_COLUMN = 'SB_DEPTH_cm'

In [None]:
# Define paths for Core A
core_a_log_paths = {
    'hiresMS': f'example_data/processed_data/{CORE_A}/{CORE_A}_hiresMS_MLfilled.csv',
    'CT': f'example_data/processed_data/{CORE_A}/{CORE_A}_CT_MLfilled.csv',
    'Lumin': f'example_data/processed_data/{CORE_A}/{CORE_A}_RGB_MLfilled.csv'
}

core_a_rgb_img = f"example_data/processed_data/{CORE_A}/{CORE_A}_RGB.tiff"
core_a_ct_img = f"example_data/processed_data/{CORE_A}/{CORE_A}_CT.tiff"

In [None]:
# Reload and plot Core A with picked depths using load_core_log_data
log_a, md_a, picked_depths_a, interpreted_bed_a = load_core_log_data(
    log_paths=core_a_log_paths,
    core_name=CORE_A,
    log_columns=LOG_COLUMNS,
    depth_column=DEPTH_COLUMN,
    normalize=True,
    core_img_1=core_a_rgb_img,
    core_img_2=core_a_ct_img,
    figsize=(20, 4),
    picked_datum=f'example_data/picked_datum/{CORE_A}_pickeddepth.csv',
    categories=[1],
    show_bed_number=False,
    show_fig=True  # Display the figure
)

In [None]:
# Define paths for Core B
core_b_log_paths = {
    'hiresMS': f'example_data/processed_data/{CORE_B}/{CORE_B}_hiresMS_MLfilled.csv',
    'CT': f'example_data/processed_data/{CORE_B}/{CORE_B}_CT_MLfilled.csv',
    'Lumin': f'example_data/processed_data/{CORE_B}/{CORE_B}_RGB_MLfilled.csv'
}
core_b_rgb_img = f"example_data/processed_data/{CORE_B}/{CORE_B}_RGB.tiff"
core_b_ct_img = f"example_data/processed_data/{CORE_B}/{CORE_B}_CT.tiff"

In [None]:
# Reload and plot Core B with picked depths using load_core_log_data
log_b, md_b, picked_depths_b, interpreted_bed_b = load_core_log_data(
    log_paths=core_b_log_paths,
    core_name=CORE_B,
    log_columns=LOG_COLUMNS,
    depth_column=DEPTH_COLUMN,
    normalize=True,
    core_img_1=core_b_rgb_img,
    core_img_2=core_b_ct_img,
    figsize=(20, 4),
    picked_datum=f'example_data/picked_datum/{CORE_B}_pickeddepth.csv',
    categories=[1],
    show_bed_number=False,
    show_fig=True  # Display the figure
)

<hr>

# Usage Examples and Executions

#### extract ages

In [None]:
# Load age constraints for both cores
data_columns = {
    'age': 'calib810_agebp',
    'pos_error': 'calib810_2sigma_pos', 
    'neg_error': 'calib810_2sigma_neg',
    'min_depth': 'mindepth_cm',
    'max_depth': 'maxdepth_cm',
    'in_sequence': 'in_sequence',
    'core': 'core',
    'interpreted_bed': 'interpreted_bed'
}

# Configuration
age_base_path = 'example_data/raw_data/C14age_data'

# Load age constraints for both cores
age_data_a = load_core_age_constraints(CORE_A, age_base_path, data_columns, consider_adjacent_core = False)
age_data_b = load_core_age_constraints(CORE_B, age_base_path, data_columns, consider_adjacent_core = False)

uncertainty_method='MonteCarlo'   # 'MonteCarlo', 'Linear', or 'Gaussian'

In [None]:
# Calculate interpolated ages for Core A using the function
estimated_datum_ages_a = calculate_interpolated_ages(
    # Input data
    picked_depths=picked_depths_a,                             # depths to interpolate ages for
    age_constraints_depths=age_data_a['depths'],                         # age constraint depths
    age_constraints_ages=age_data_a['ages'],                             # age constraint ages
    age_constraints_pos_errors=age_data_a['pos_errors'],                 # positive errors
    age_constraints_neg_errors=age_data_a['neg_errors'],                 # negative errors
    age_constraints_in_sequence_flags=age_data_a['in_sequence_flags'],   # in-sequence flags
    age_constraint_source_core=age_data_a['core'],                       # source core for each constraint
    # Core boundaries
    top_bottom=True,                                                     # include top and bottom depths/ages
    top_depth=0.0,                                                       # top of core depth
    bottom_depth=md_a[-1],                                               # max depth of core a
    top_age=0,                                                           # default age at top of core
    top_age_pos_error=75,                                                # default positive uncertainty of top age
    top_age_neg_error=75,                                                # default negative uncertainty of top age
    # Uncertainty calculation
    uncertainty_method=uncertainty_method,                               # uncertainty calculation method: 'MonteCarlo', 'Linear', or 'Gaussian'
    n_monte_carlo=10000,                                                 # number of Monte Carlo iterations
    # Visualization and output
    show_plot=True,                                                      # display plot
    core_name=CORE_A,                                                    # core name for plot title
    export_csv=True,                                                     # export results to CSV
    csv_filename=f'example_data/picked_datum/{CORE_A}_pickeddepth_ages_{uncertainty_method}.csv',                         # CSV filename for results
    mute_mode=False
)

# Print the age constraint data for Core A
print("\nAge Constraints for Core A:")
if len(age_data_a['depths']) > 0:
    for i in range(len(age_data_a['depths'])):
        depth_val = age_data_a['depths'].iloc[i] if isinstance(age_data_a['depths'], pd.Series) else age_data_a['depths'][i]
        age_val = age_data_a['ages'][i]
        pos_err_val = age_data_a['pos_errors'][i]
        neg_err_val = age_data_a['neg_errors'][i]
        in_seq = age_data_a['in_sequence_flags'][i]
        
        # Add source core and interpreted bed info if they exist
        source_core_info = f", Source Core: {age_data_a['core'][i]}" if i < len(age_data_a['core']) else ""
        bed_info = f", Interpreted Bed: {age_data_a['interpreted_bed'][i]}" if i < len(age_data_a['interpreted_bed']) else ""
        
        print(f"Depth: {depth_val:.2f} cm, Age: {age_val:.1f} years BP (+{pos_err_val:.1f} ; -{neg_err_val:.1f}), In Sequence: {in_seq}{source_core_info}{bed_info}")
else:
    print(f"No age constraints available in {CORE_A}")

# Print the interpolated ages
print(f"\nEstimated Ages for picked depths in {CORE_A}:")
for i, depth in enumerate(estimated_datum_ages_a['depths']):
    print(f"Depth: {depth:.2f} cm, Age: {estimated_datum_ages_a['ages'][i]:.1f} years BP (+{estimated_datum_ages_a['pos_uncertainties'][i]:.1f} ; -{estimated_datum_ages_a['neg_uncertainties'][i]:.1f})")

In [None]:
# Calculate interpolated ages for Core B using the function
estimated_datum_ages_b = calculate_interpolated_ages(
    # Input data
    picked_depths=picked_depths_b,                             # depths to interpolate ages for
    age_constraints_depths=age_data_b['depths'],                         # age constraint depths
    age_constraints_ages=age_data_b['ages'],                             # age constraint ages
    age_constraints_pos_errors=age_data_b['pos_errors'],                 # positive errors
    age_constraints_neg_errors=age_data_b['neg_errors'],                 # negative errors
    age_constraints_in_sequence_flags=age_data_b['in_sequence_flags'],   # in-sequence flags
    age_constraint_source_core=age_data_b['core'],                       # source core for each constraint
    # Core boundaries
    top_bottom=True,                                                     # include top and bottom depths/ages
    top_depth=0.0,                                                       # top of core depth
    bottom_depth=md_b[-1],                                               # max depth of core b
    top_age=0,                                                           # default age at top of core
    top_age_pos_error=75,                                                # default positive uncertainty of top age
    top_age_neg_error=75,                                                # default negative uncertainty of top age
    # Uncertainty calculation
    uncertainty_method=uncertainty_method,                               # uncertainty calculation method: 'MonteCarlo', 'Linear', or 'Gaussian'
    n_monte_carlo=10000,                                                 # number of Monte Carlo sampling iterations
    # Visualization and output
    show_plot=True,                                                      # display plot
    core_name=CORE_B,                                                    # core name for plot title
    export_csv=True,                                                     # export results to CSV
    csv_filename=f'example_data/picked_datum/{CORE_B}_pickeddepth_ages_{uncertainty_method}.csv',                         # CSV filename for results
    mute_mode=False
)

# Print the age constraint data for Core B
print("\nAge Constraints for Core B:")
if len(age_data_b['depths']) > 0:
    for i in range(len(age_data_b['depths'])):
        depth_val = age_data_b['depths'].iloc[i] if isinstance(age_data_b['depths'], pd.Series) else age_data_b['depths'][i]
        age_val = age_data_b['ages'][i]
        pos_err_val = age_data_b['pos_errors'][i]
        neg_err_val = age_data_b['neg_errors'][i]
        in_seq = age_data_b['in_sequence_flags'][i]
        
        # Add source core and interpreted bed info if they exist
        source_core_info = f", Source Core: {age_data_b['core'][i]}" if i < len(age_data_b['core']) else ""
        bed_info = f", Interpreted Bed: {age_data_b['interpreted_bed'][i]}" if i < len(age_data_b['interpreted_bed']) else ""
        
        print(f"Depth: {depth_val:.2f} cm, Age: {age_val:.1f} years BP (+{pos_err_val:.1f} ; -{neg_err_val:.1f}), In Sequence: {in_seq}{source_core_info}{bed_info}")
else:
    print(f"No age constraints available in {CORE_B}")

print(f"\nEstimated Ages for picked depths in {CORE_B}:")
for i, depth in enumerate(estimated_datum_ages_b['depths']):
    print(f"Depth: {depth:.2f} cm, Age: {estimated_datum_ages_b['ages'][i]:.1f} years BP (+{estimated_datum_ages_b['pos_uncertainties'][i]:.1f} ; -{estimated_datum_ages_b['neg_uncertainties'][i]:.1f})")

In [None]:
# Skip age interpolation and extrapolation, and directly load estimated age csv to get pickeddepth_age parameter
# Use load_pickeddepth_ages_from_csv function
estimated_datum_ages_a = load_pickeddepth_ages_from_csv(pickeddepth_age_csv = f"example_data/picked_datum/{CORE_A}_pickeddepth_ages_{uncertainty_method}.csv")

In [None]:
estimated_datum_ages_b = load_pickeddepth_ages_from_csv(pickeddepth_age_csv = f"example_data/picked_datum/{CORE_B}_pickeddepth_ages_{uncertainty_method}.csv")

#### Find out all segment pairs among boundaries

In [None]:
# Define file names for age consideration or not

# Check if the last age in either core is NaN to determine age consideration
last_age_a = estimated_datum_ages_a['ages'][-1] if len(estimated_datum_ages_a['ages']) > 0 else float('nan')
last_age_b = estimated_datum_ages_b['ages'][-1] if len(estimated_datum_ages_b['ages']) > 0 else float('nan')

age_consideration = not (pd.isna(last_age_a) or pd.isna(last_age_b))
# age_consideration = False

restricted_age_correlation=True

if age_consideration:
    if restricted_age_correlation:
        YES_NO_AGE = 'restricted_age'
    else:
        YES_NO_AGE = 'loose_age'
else:
    YES_NO_AGE = 'no_age'

In [None]:
# Run comprehensive DTW analysis
dtw_results, valid_dtw_pairs, segments_a, segments_b, depth_boundaries_a, depth_boundaries_b, dtw_distance_matrix_full = run_comprehensive_dtw_analysis(
    # Input data
    log_a,                                                      # Core A log data
    log_b,                                                      # Core B log data
    md_a,                                                       # Core A measured depth
    md_b,                                                       # Core B measured depth
    picked_depths_a=picked_depths_a,                         # Selected depths for core A
    picked_depths_b=picked_depths_b,                         # Selected depths for core B
    core_a_name=CORE_A,                                        # Name identifier for core A
    core_b_name=CORE_B,                                        # Name identifier for core B
    # Analysis parameters
    top_bottom=True,                                            # Include top and bottom boundaries
    top_depth=0.0,                                              # Starting depth for analysis
    independent_dtw=False,                            # Use independent DTW if True
    exclude_deadend=True,                                       # Exclude dead-end segments
    pca_for_dependent_dtw=False,                # Use PCA for dependent DTW
    # Age constraints
    age_consideration=age_consideration,                        # Include age constraints
    ages_a=estimated_datum_ages_a,                                  # Age data for core A depths
    ages_b=estimated_datum_ages_b,                                  # Age data for core B depths
    restricted_age_correlation=restricted_age_correlation,      # Use strict age correlation
    all_constraint_ages_a=age_data_a['in_sequence_ages'],      # All age constraints for core A
    all_constraint_ages_b=age_data_b['in_sequence_ages'],      # All age constraints for core B
    all_constraint_depths_a=age_data_a['in_sequence_depths'],  # All depth constraints for core A
    all_constraint_depths_b=age_data_b['in_sequence_depths'],  # All depth constraints for core B
    all_constraint_pos_errors_a=age_data_a['in_sequence_pos_errors'], # Positive age errors for core A
    all_constraint_pos_errors_b=age_data_b['in_sequence_pos_errors'], # Positive age errors for core B
    all_constraint_neg_errors_a=age_data_a['in_sequence_neg_errors'], # Negative age errors for core A
    all_constraint_neg_errors_b=age_data_b['in_sequence_neg_errors'], # Negative age errors for core B
    age_constraint_a_source_cores=age_data_a['core'],          # Source cores for age constraints A
    age_constraint_b_source_cores=age_data_b['core'],          # Source cores for age constraints B
    # Visualization
    visualize_pairs=True,                                       # Create pair visualizations
    visualize_segment_labels=False,                             # Show segment labels in plots
    create_dtw_matrix=True,                                     # Generate DTW distance matrix
    dtwmatrix_output_filename=f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/SegmentPair_DTW_matrix_{YES_NO_AGE}.png', # Matrix plot filename
    creategif=True,                                             # Create animated GIF
    gif_output_filename=f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/SegmentPair_DTW_animation_{YES_NO_AGE}.gif', # GIF filename
    max_frames=50,                                              # Maximum frames in animation
    color_interval_size=5,                                      # Color coding interval size
    keep_frames=False,                                           # Save individual frames
    # Debug and processing
    debug=False                                                 # Enable debug output
)

## Mathematical Estimation of the Number of Possible Solutions (Total Complete Paths)

### 1. Segment Generation Formula

Given $n$ units for each core, the **number of valid segments per core** follows:

$$S = 2n + 1$$

### 2. Valid Segment Pairs

**Theoretical maximum number of segment pairs**: $P_{\max} = S_A \times S_B$  

However, because the 'optimal search' try to filters pairs based on shortest DTW path search method (minimze pinch-outs), not all segment pairs $(S_A \times S_B)$ are valid. $P_{\mathrm{valid}} < P_{\max}$

Based 97 Cascadida core data pairs, we fund the typical retention rate is ~70-75% of theoretical maximum pairs remain valid after DTW filtering.

$$P_{\mathrm{valid}} \approx 0.745 \times (S_A \times S_B)$$

### 3. Solution Count Formula

The relationship between valid pairs and total solutions follows a **quadratic-in-log-space** pattern:

$$C \approx e^{4.395 (\ln P_{\mathrm{valid}})^2 - 43.179 \ln P_{\mathrm{valid}} + 116.872}$$

This empirical formula was fitted to 97 core pair analyses with $R^2 > 0.999$.

### 4. Practical Examples

Based on actual data from 97 core pair analyses:

| Units per core ($n$) | Valid segments per core ($S$) | Valid pairs ($P_{\mathrm{valid}}$) | Est. solutions ($C$) |
|----------------|----------------|-------------|---------------------|
| 6-8 | 13-17 | 150-700 | $10^{4}$ - $10^{8}$ |
| 11-14 | 23-29 | 250-1,100 | $10^{5}$ - $10^{13}$ |
| 18-22 | 37-45 | 1,100-1,900 | $10^{13}$ - $10^{18}$ |
| 24-28 | 49-57 | 1,400-2,700 | $10^{15}$ - $10^{22}$ |
| 30-31 | 61-63 | 2,400-2,900 | $10^{20}$ - $10^{24}$ |

**Key finding**: The quadratic-in-log-space relationship means solution count grows rapidly with problem size. Small increases in the number of units per core lead to dramatic increases in computational complexity.

In [None]:
diagnostic_result = diagnose_chain_breaks(
    # Input data
    valid_dtw_pairs,                                        # Valid DTW segment pairs from analysis
    segments_a,                                             # Segment definitions for core A
    segments_b,                                             # Segment definitions for core Bss
    depth_boundaries_a,                                     # Depth boundaries for core A segments
    depth_boundaries_b                                      # Depth boundaries for core B segments
)

<hr>

## Search complete DTW paths

In [None]:
shortest_path_search=True

if shortest_path_search:
    SEARCH_METHOD = 'optimal'
else:
    SEARCH_METHOD = 'random'

In [None]:
complete_path_search_result = find_complete_core_paths(
    # Input data
    valid_dtw_pairs,                                                                # Valid DTW segment pairs from analysis
    segments_a,                                                                     # Segment definitions for core A
    segments_b,                                                                     # Segment definitions for core B
    log_a,                                                                          # Log data for core A
    log_b,                                                                          # Log data for core B
    depth_boundaries_a,                                                             # Depth boundaries for core A segments
    depth_boundaries_b,                                                             # Depth boundaries for core B segments
    dtw_results,                                                                    # DTW analysis results
    dtw_distance_matrix_full,                                                       # Full DTW distance matrix
    # Output settings
    output_csv=f"example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/mappings_{YES_NO_AGE}_{SEARCH_METHOD}.csv",     # Output CSV filename for mappings
    # Search parameters
    start_from_top_only=True,                                                       # Start path search from top segments only
    shortest_path_search=shortest_path_search,                                      # Use shortest path search algorithm
    shortest_path_level=2,                                                          # Path level preference (higher = more segments)
    max_search_path=10000,                                                          # Maximum paths per segment pair to avoid memory issues
    # Processing settings
    batch_size=1000,                                                                # Processing batch size
    n_jobs=-1,                                                                      # Number of CPU cores (-1 uses all available)
    debug=False,                                                                    # Enable debug output,
    pca_for_dependent_dtw=False
)

In [None]:
%matplotlib inline

correlation_gif_name=f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/CombinedDTW_correlation_mappings_{YES_NO_AGE}_{SEARCH_METHOD}.gif'
matrix_gif_name=f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/CombinedDTW_matrix_mappings_{YES_NO_AGE}_{SEARCH_METHOD}.gif'

# 1. First, read all available mappings from a CSV (assuming it was created by find_all_sequential_mappings)
sequential_mappings_csv = f"example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/mappings_{YES_NO_AGE}_{SEARCH_METHOD}.csv"

# 3. Visualize a representative subset of the mappings
visualize_dtw_results_from_csv(
    # Input data
    sequential_mappings_csv,                                                        # CSV file with sequential mappings
    log_a,                                                                          # Log data for core A
    log_b,                                                                          # Log data for core B
    md_a,                                                                           # Measured depth data for core A
    md_b,                                                                           # Measured depth data for core B
    dtw_results,                                                                    # DTW analysis results
    valid_dtw_pairs,                                                                # Valid DTW segment pairs
    segments_a,                                                                     # Segment definitions for core A
    segments_b,                                                                     # Segment definitions for core B
    depth_boundaries_a,                                                             # Depth boundaries for core A segments
    depth_boundaries_b,                                                             # Depth boundaries for core B segments
    dtw_distance_matrix_full,                                                       # Full DTW distance matrix
    # Core identifiers
    core_a_name=CORE_A,                                                             # Name identifier for core A
    core_b_name=CORE_B,                                                             # Name identifier for core B
    # Visualization settings
    color_interval_size=10,                                                         # Color interval size for visualization
    debug=False,                                                                    # Enable debug output
    visualize_pairs=False,                                                          # Show DTW pairs in visualization
    visualize_segment_labels=False,                                                 # Show segment labels in visualization
    mark_depths=False,                                                               # Mark depth points in visualization
    # GIF output settings
    creategif=True,                                                                 # Create animated GIF output
    correlation_gif_output_filename=correlation_gif_name,                          # Output filename for correlation GIF
    matrix_gif_output_filename=matrix_gif_name,                                    # Output filename for matrix GIF
    max_frames=50,                                                                  # Maximum number of frames in GIF
    keep_frames=False,                                                               # Keep individual frames after GIF creation
    # Age constraints
    mark_ages=age_consideration,                                                    # Mark age constraints in visualization
    ages_a=estimated_datum_ages_a,                                                      # Age data for core A
    ages_b=estimated_datum_ages_b,                                                      # Age data for core B
    all_constraint_depths_a=age_data_a['in_sequence_depths'],                       # Depth constraints for core A
    all_constraint_depths_b=age_data_b['in_sequence_depths'],                       # Depth constraints for core B
    all_constraint_ages_a=age_data_a['in_sequence_ages'],                           # Age constraints for core A
    all_constraint_ages_b=age_data_b['in_sequence_ages'],                           # Age constraints for core B
    all_constraint_pos_errors_a=age_data_a['in_sequence_pos_errors'],               # Positive age errors for core A
    all_constraint_pos_errors_b=age_data_b['in_sequence_pos_errors'],               # Positive age errors for core B
    all_constraint_neg_errors_a=age_data_a['in_sequence_neg_errors'],               # Negative age errors for core A
    all_constraint_neg_errors_b=age_data_b['in_sequence_neg_errors'],               # Negative age errors for core B
    age_constraint_a_source_cores=age_data_a['core'],                               # Source cores for age constraints A
    age_constraint_b_source_cores=age_data_b['core'],                                # Source cores for age constraints B
    # Interpreted bed correlation
    interpreted_bed_a=interpreted_bed_a,         # Interpreted bed name for Core A
    interpreted_bed_b=interpreted_bed_b          # Interpreted bed name for Core B
)

# Display the GIFs
print("DTW Correlation Mappings GIF:")
display(IPImage(correlation_gif_name))

print("DTW Matrix Mappings GIF:")
display(IPImage(matrix_gif_name))

In [None]:
# Load the saved DTW results
sequential_mappings_csv = f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/mappings_{YES_NO_AGE}_{SEARCH_METHOD}.csv'
output_matrix_png_filename = f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/CombinedDTW_matrix_mappings_colored_{YES_NO_AGE}_{SEARCH_METHOD}.png'

%matplotlib inline

_ = plot_dtw_matrix_with_paths(
    # Input data
    dtw_distance_matrix_full,                                                       # Full DTW distance matrix
    sequential_mappings_csv=sequential_mappings_csv,                                # CSV file with sequential mappings
    # Core identifiers
    core_a_name=CORE_A,                                                             # Name identifier for core A
    core_b_name=CORE_B,                                                             # Name identifier for core B
    md_a=md_a,                                                                      # Metadata for core A
    md_b=md_b,                                                                      # Metadata for core B
    # Visualization settings
    mode='all_paths_colored',                                                       # Visualization mode
    color_metric='norm_dtw',                                                    # Metric used for coloring paths
                                                                                    # Available options: 'corr_coef', 'norm_dtw', 'dtw_ratio', 'perc_diag', 'dtw_warp_eff', 'perc_age_overlap', None (uses mapping_id)
    output_filename=output_matrix_png_filename,                                     # Output filename for the plot
    # Age constraint data
    age_constraint_a_depths=age_data_a['in_sequence_depths'] if age_consideration else None,  # Depth constraints for core A
    age_constraint_a_ages=age_data_a['in_sequence_ages'] if age_consideration else None,      # Age constraints for core A
    age_constraint_a_source_cores=age_data_a['core'] if age_consideration else None,          # Source cores for age constraints A
    age_constraint_b_depths=age_data_b['in_sequence_depths'] if age_consideration else None,  # Depth constraints for core B
    age_constraint_b_ages=age_data_b['in_sequence_ages'] if age_consideration else None,      # Age constraints for core B
    age_constraint_b_source_cores=age_data_b['core'] if age_consideration else None,          # Source cores for age constraints B
    # Performance settings
    n_jobs=-1                                                                       # Number of parallel jobs (-1 means use all processors)
)

In [None]:
### Find the best mapping ####
# Load the DTW results
sequential_mappings_csv = f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/mappings_{YES_NO_AGE}_{SEARCH_METHOD}.csv'

# Use custom metric weights
custom_weights = {
    'corr_coef': 1.0,
    'perc_diag': 0.0,
    'norm_dtw': 1.0,
    'dtw_ratio': 0.0,
    'perc_age_overlap': 0.0,
    'dtw_warp_eff': 0.0
}

# # To just find the best-scored mappings
# top_mapping_ids, top_mapping_pairs, top_mappings_df = find_best_mappings(
#     csv_file_path=sequential_mappings_csv,
#     top_n=10,
#     filter_shortest_dtw=True,
#     metric_weight=custom_weights
# )

### To find the best-scored mappings that comply the intepreted bed correlation
top_mapping_ids, top_mapping_pairs, top_mappings_df = find_best_mappings(
    csv_file_path=sequential_mappings_csv,
    metric_weight=custom_weights,
    picked_depths_a_cat1=picked_depths_a,
    picked_depths_b_cat1=picked_depths_b,
    interpreted_bed_a=interpreted_bed_a,
    interpreted_bed_b=interpreted_bed_b,
    valid_dtw_pairs=valid_dtw_pairs,
    segments_a=segments_a,
    segments_b=segments_b
)

In [None]:
%matplotlib inline

visualize_pairs=False

if visualize_pairs:
    visualize_type='pairs'
    visualize_segment_labels=False
    mark_depths=True
else:
    visualize_type='fullpath'
    visualize_segment_labels=False
    mark_depths=False

# Visualize the combined segments
_, _, _, _ = visualize_combined_segments(
    # Input data
    log_a=log_a,                                # Core A log data
    log_b=log_b,                                # Core B log data
    md_a=md_a,                                  # Core A measured depths
    md_b=md_b,                                  # Core B measured depths
    dtw_results=dtw_results,                    # DTW alignment results
    valid_dtw_pairs=valid_dtw_pairs,            # Valid DTW pairs
    segments_a=segments_a,                      # Core A segments
    segments_b=segments_b,                      # Core B segments
    depth_boundaries_a=depth_boundaries_a,      # Core A depth boundaries
    depth_boundaries_b=depth_boundaries_b,      # Core B depth boundaries
    dtw_distance_matrix_full=dtw_distance_matrix_full,       # Full DTW distance matrix
    segment_pairs_to_combine=top_mapping_pairs[0],           # Valid pairs to combine
    # Visualization options
    color_interval_size=10,                     # Size of color intervals
    visualize_pairs=visualize_pairs,            # Whether to visualize pairs (True/False)
    visualize_segment_labels=visualize_segment_labels, # Whether to show segment labels (True/False)
    mark_depths=mark_depths,                    # Whether to mark depths (True/False)
    # Output paths
    correlation_save_path=f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/CombinedDTW_correlation_{YES_NO_AGE}_{SEARCH_METHOD}_{top_mapping_ids[0]}_{visualize_type}.png',
    matrix_save_path=f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/CombinedDTW_matrix_{YES_NO_AGE}_{SEARCH_METHOD}_{top_mapping_ids[0]}_{visualize_type}.png',
    # Age constraint parameters
    mark_ages=age_consideration,                # Whether to mark ages (True/False)
    ages_a=estimated_datum_ages_a if age_consideration else None, # Core A ages
    ages_b=estimated_datum_ages_b if age_consideration else None, # Core B ages
    all_constraint_ages_a=age_data_a['in_sequence_ages'] if age_consideration else None, # Core A constraint ages
    all_constraint_ages_b=age_data_b['in_sequence_ages'] if age_consideration else None, # Core B constraint ages
    all_constraint_depths_a=age_data_a['in_sequence_depths'] if age_consideration else None, # Core A constraint depths
    all_constraint_depths_b=age_data_b['in_sequence_depths'] if age_consideration else None, # Core B constraint depths
    all_constraint_pos_errors_a=age_data_a['in_sequence_pos_errors'] if age_consideration else None, # Core A positive errors
    all_constraint_pos_errors_b=age_data_b['in_sequence_pos_errors'] if age_consideration else None, # Core B positive errors
    all_constraint_neg_errors_a=age_data_a['in_sequence_neg_errors'] if age_consideration else None, # Core A negative errors
    all_constraint_neg_errors_b=age_data_b['in_sequence_neg_errors'] if age_consideration else None, # Core B negative errors
    age_constraint_a_source_cores=age_data_a['core'] if age_consideration else None, # Core A source cores
    age_constraint_b_source_cores=age_data_b['core'] if age_consideration else None, # Core B source cores
    # Core identifiers
    core_a_name=CORE_A,                         # Name of Core A
    core_b_name=CORE_B,                         # Name of Core B
    # Interpreted bed correlation
    interpreted_bed_a=interpreted_bed_a,         # Interpreted bed name for Core A
    interpreted_bed_b=interpreted_bed_b          # Interpreted bed name for Core B
)

In [None]:
# Available quality indices: 'corr_coef', 'norm_dtw', 'dtw_ratio', 'perc_diag', 'dtw_warp_eff', 'perc_age_overlap'
targeted_quality_index = ['corr_coef', 'norm_dtw']  # Can be a single string or list of strings

# Handle both single index and multiple indices
if isinstance(targeted_quality_index, str):
    targeted_quality_index = [targeted_quality_index]

# Loop over all targeted quality indices
for quality_idx in targeted_quality_index:
    plot_correlation_distribution(
        # Input parameters
        csv_file=f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/mappings_{YES_NO_AGE}_{SEARCH_METHOD}.csv',  # Path to mappings CSV file
        target_mapping_id=top_mapping_ids[0],                                             # ID of mapping to analyze
        quality_index=quality_idx,                                                        # Quality metric to plot
        # Core names
        core_a_name=CORE_A,                                                               # Core A name
        core_b_name=CORE_B,                                                               # Core B name
        # Histogram parameters
        bin_width=None,                                                                   # Bin width (auto if None)
        # Output parameters
        save_png=True,                                                                    # Whether to save plot as PNG
        png_filename=f'example_data/analytical_outputs/{CORE_A}_{CORE_B}/{"_".join(LOG_COLUMNS)}/{"r-values" if quality_idx == "corr_coef" else quality_idx}_distribution_{YES_NO_AGE}_{SEARCH_METHOD}.png',  # Output filename
        # Distribution fitting parameters
        pdf_method='normal',                                                              # PDF fitting method: 'KDE', 'skew-normal', or 'normal'
        kde_bandwidth=0.05,                                                               # Bandwidth for KDE method
        mute_mode=False                                                                   # Whether to suppress print statements
    )