In [None]:
# Cell 1: Import Required Packages
import os
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Cell 2: Extract Core Lengths and Load Segment Pool
from pyCoreRelator import load_segment_pool

# Define directory paths
mother_dir = '/Users/larryslai/Library/CloudStorage/Dropbox/My Documents/University of Texas Austin/(Project) NWP turbidites'

# Function to extract core length from depth data
def get_core_length(core_name, depth_column='SB_DEPTH_cm', log_column=None):
    """Extract maximum depth from core data"""
    # Try hiresMS file first (most common)
    depth_file = f'{mother_dir}/Cascadia_core_data/OSU_dataset/_compiled_logs/{core_name}/ML_filled/{core_name}_{log_column}_MLfilled.csv'
    try:
        df = pd.read_csv(depth_file)
        return df[depth_column].max()
    except:
        print(f"Warning: Could not read depth from {depth_file}")
        return None

#####

# LOG_COLUMNS = ['hiresMS']  # Choose one log column for segment pool
# LOG_COLUMNS = ['CT']
# LOG_COLUMNS = ['Lumin']
# LOG_COLUMNS = ['hiresMS', 'CT']
# LOG_COLUMNS = ['hiresMS', 'Lumin']
# LOG_COLUMNS = ['CT', 'Lumin']
LOG_COLUMNS = ['hiresMS', 'CT', 'Lumin']


# Define core names and target parameters
DEPTH_COLUMN = 'SB_DEPTH_cm'

###
# For multidimensional DTW, choose DTW method:
pca_for_dependent_dtw=False 

# Define all cores for segment pool
SEGMENT_POOL_CORES = [
    "M9907-11PC", "M9907-23PC", "M9907-25PC"
]

# Define paths and parameters for multiple log types
CORE_LOG_PATHS = {
    core_name: {
        'hiresMS': f'{mother_dir}/Cascadia_core_data/OSU_dataset/_compiled_logs/{core_name}/ML_filled/{core_name}_hiresMS_MLfilled.csv',
        'CT': f'{mother_dir}/Cascadia_core_data/OSU_dataset/_compiled_logs/{core_name}/ML_filled/{core_name}_CT_MLfilled.csv',
        'Lumin': f'{mother_dir}/Cascadia_core_data/OSU_dataset/_compiled_logs/{core_name}/ML_filled/{core_name}_RGB_MLfilled.csv',
    }
    for core_name in SEGMENT_POOL_CORES
}

COLUMN_ALTERNATIVES = {
    'hiresMS': ['MS'],
    'CT': ['CT_value'],
    'Lumin': ['luminance', 'Luminance']
}

PICKED_DEPTH_PATHS = {
    core_name: f'{mother_dir}/pyCoreRelator/pickeddepth/{core_name}_pickeddepth.csv'
    for core_name in SEGMENT_POOL_CORES
}

# Execute the function using the imported function from pyCoreRelator
seg_logs, seg_depths, _ = load_segment_pool(
    core_names = SEGMENT_POOL_CORES,
    core_log_paths = CORE_LOG_PATHS,
    picked_depth_paths = PICKED_DEPTH_PATHS,
    log_column_names = LOG_COLUMNS,
    depth_column=DEPTH_COLUMN,
    alternative_column_names = COLUMN_ALTERNATIVES,
    boundary_category=1,
    neglect_topbottom=True
)


In [None]:
# Cell 3: Plot All Turbidite Segments from Pool
from pyCoreRelator import plot_segment_pool

# Plot the segment pool using imported function
_, _ = plot_segment_pool(
    segment_logs = seg_logs,
    segment_depths = seg_depths,
    log_column_names = LOG_COLUMNS,
    n_cols=10,
    figsize_per_row=3,
    plot_segments=True,
    save_plot=False,
    plot_filename=None
)

In [None]:
# Cell 3.5: Remove unwanted segments from Pool
from pyCoreRelator import modify_segment_pool

# Define the segment numbers to be excluded
exclude_segs = [18, 19, 20, 21, 22, 23, 24, 25, 26, 50, 51]

# Remove segments from the pool
mod_seg_logs, mod_seg_depths = modify_segment_pool(seg_logs, seg_depths, remove_list=exclude_segs)

# Plot the modified segment pool
_, _ = plot_segment_pool(
    segment_logs = mod_seg_logs,
    segment_depths = mod_seg_depths,
    log_column_names = LOG_COLUMNS,
    n_cols=10,
    figsize_per_row=3,
    plot_segments=True,
    save_plot=False,
    plot_filename=None
)

In [None]:
# Cell 4: Create and Plot Synthetic Core Pair
from pyCoreRelator import create_synthetic_log, plot_synthetic_log

# Generate & plot synthetic core A
syn_log_a, syn_md_a, syn_pickeddepth_a, inds_a = create_synthetic_log(
    target_thickness = 300,
    segment_logs = mod_seg_logs,
    segment_depths = mod_seg_depths,
    exclude_inds = None,
    repetition = False         # If True: allow reselecting the same layer from the pool
)

syn_depth_a = [depth[0] for depth in syn_pickeddepth_a] # Extract only depth arraynly for plotting

plot_synthetic_log(
    synthetic_log = syn_log_a,
    synthetic_md = syn_md_a,
    synthetic_picked_depths = syn_depth_a,    
    log_column_names = LOG_COLUMNS,
    title = f'Synthetic Core A\n({len(inds_a)} layers)',
    save_plot = False,
    plot_filename = None
)

# Generate & plot synthetic core B
syn_log_b, syn_md_b, syn_pickeddepth_b, inds_b = create_synthetic_log(
    target_thickness = 300,
    segment_logs = mod_seg_logs,
    segment_depths = mod_seg_depths,
    exclude_inds = None,
    repetition = False          # If True: allow reselecting the same layer from the pool
)

syn_depth_b = [depth[0] for depth in syn_pickeddepth_b] # Extract only depth arraynly for plotting

plot_synthetic_log(
    synthetic_log = syn_log_b,
    synthetic_md = syn_md_b,
    synthetic_picked_depths = syn_depth_b,    
    log_column_names = LOG_COLUMNS,
    title = f'Synthetic Core B\n({len(inds_b)} layers)',
    save_plot = False,
    plot_filename = None
)

In [None]:
# Cell 5: DTW Analysis on Synthetic Pair
from pyCoreRelator import run_comprehensive_dtw_analysis, find_complete_core_paths

# Run DTW analysis
dtw_results, valid_dtw_pairs, segments_a, segments_b, _, _, dtw_distance_matrix_full = run_comprehensive_dtw_analysis(
    syn_log_a, syn_log_b, syn_md_a, syn_md_b,
    picked_depths_a = syn_depth_a,
    picked_depths_b = syn_depth_b,
    independent_dtw = False,
    pca_for_dependent_dtw = pca_for_dependent_dtw,
    top_bottom = False,
    mute_mode = False
)

# Find complete core paths and extract r-values
_ = find_complete_core_paths(
    valid_dtw_pairs,
    segments_a, 
    segments_b, 
    syn_log_a, 
    syn_log_b,
    syn_depth_a, 
    syn_depth_b,
    dtw_results,
    dtw_distance_matrix_full,
    output_csv=f"example_outputs/temp_synthetic_{"_".join(LOG_COLUMNS)}_core_pair_metrics.csv",
    output_metric_only=True,
    shortest_path_search=True,
    shortest_path_level=2,
    max_search_path=100000,
    mute_mode=False,
    pca_for_dependent_dtw=pca_for_dependent_dtw
)

In [None]:
# Cell 6: Plot R-Values Distribution from Synthetic Pair
from pyCoreRelator import plot_correlation_distribution

# Plot correlation distribution
_, _, _ = plot_correlation_distribution(
    csv_file=f'example_outputs/temp_synthetic_{"_".join(LOG_COLUMNS)}_core_pair_metrics.csv',
    quality_index='corr_coef',   # available metrics: corr_coef, norm_dtw, dtw_ratio, perc_diag, dtw_warp_eff, perc_age_overlap
    save_png=False,
    pdf_method='normal',         # 'KDE', 'skew-normal', 'normal'
    kde_bandwidth=0.05,
    mute_mode=False
)

# Plot correlation distribution
_, _, _ = plot_correlation_distribution(
    csv_file=f'example_outputs/temp_synthetic_{"_".join(LOG_COLUMNS)}_core_pair_metrics.csv',
    quality_index='norm_dtw',   # available metrics: corr_coef, norm_dtw, dtw_ratio, perc_diag, dtw_warp_eff, perc_age_overlap
    save_png=False,
    pdf_method='normal',         # 'KDE', 'skew-normal', 'normal'
    kde_bandwidth=0.05,
    mute_mode=False
)

# Remove temporary CSV file after loop is complete
if os.path.exists(f'example_outputs/temp_synthetic_{"_".join(LOG_COLUMNS)}_core_pair_metrics.csv'):
    os.remove(f"example_outputs/temp_synthetic_{"_".join(LOG_COLUMNS)}_core_pair_metrics.csv")

In [None]:
# Cell 8: Run Multiple Iterations for Synthetic Logs R-Value Findings
from pyCoreRelator import synthetic_correlation_quality

# Run synthetic correlation quality analysis
synthetic_correlation_quality(
    mod_seg_logs = mod_seg_logs,
    mod_seg_depths = mod_seg_depths,
    log_column_names = LOG_COLUMNS,
    quality_indices = ['corr_coef','norm_dtw'], # Define quality indices to iterate through (norm_dtw, dtw_ratio, perc_diag, corr_coef, dtw_warp_eff, perc_age_overlap)
    number_of_iterations = 100,                 # Number of iterations to run
    core_a_length=600,
    core_b_length=600,
    repetition=False,                           # True: allow reselecting turbidite segments; False: each segment can only be selected once
    pca_for_dependent_dtw=pca_for_dependent_dtw,
    output_csv_dir =f'example_outputs',         # Directory for the output CSV files (optional)
    mute_mode=True
)

In [None]:
# Cell 9: Plot all distribution curves for each quality index
from pyCoreRelator import plot_synthetic_correlation_quality

# Define quality indices to plot
# quality_indices = ['corr_coef', 'norm_dtw', 'dtw_ratio', 'perc_diag', 'dtw_warp_eff', 'perc_age_overlap']
quality_indices = ['corr_coef', 'norm_dtw']

# Plot individual PDF curves from each iteration
plot_synthetic_correlation_quality(
    input_csv=f'example_outputs/synthetic_PDFs_{"_".join(LOG_COLUMNS)}_{{quality_index}}.csv',
    quality_indices=quality_indices,
    bin_width=None,                # If not specified, corr_coef=0.025, norm_dtw=0.0025
    plot_individual_pdf=True,      # True: overlay individual PDFs; False: combined distribution
    save_plot=True,
    plot_filename= f'example_outputs/every_synthetic_iterations_{"_".join(LOG_COLUMNS)}_{{quality_index}}.png'  # Uncomment to save with save_plot=True
)

In [None]:
# Cell 10: Combine all binned data and recalculate distribution for each quality index
from pyCoreRelator import plot_synthetic_correlation_quality

# Define quality indices to iterate through (matching Cell 8)
# quality_indices = ['corr_coef', 'norm_dtw', 'dtw_ratio', 'perc_diag', 'dtw_warp_eff', 'perc_age_overlap']
quality_indices = ['corr_coef', 'norm_dtw']

# Plot combined distribution across all iterations
plot_synthetic_correlation_quality(
    input_csv=f'example_outputs/synthetic_PDFs_{"_".join(LOG_COLUMNS)}_{{quality_index}}.csv',
    quality_indices=quality_indices,
    bin_width=None,                   # If not specified, corr_coef=0.025, norm_dtw=0.0025
    plot_individual_pdf=False,        # False: combined distribution; True: overlay individual PDFs
    save_plot=True,
    plot_filename=f'example_outputs/combined_synthetic_distribution_{"_".join(LOG_COLUMNS)}_{{quality_index}}.png' # Uncomment to save with save_plot=True
)