# Manual core depth picking

In [None]:
# Data manipulation and analysis
# Data manipulation and analysis
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt

# Image and file handling
import os

# Utilities
import warnings
warnings.filterwarnings('ignore')

from pyCoreRelator import plot_core_data, pick_stratigraphic_levels


### Define the core to be analyzed

In [None]:
# CORE_NAME = "M9907-01PC"
# CORE_NAME = "M9907-02TC"
# CORE_NAME = "M9907-03PC"
# CORE_NAME = "M9907-05TC"
# CORE_NAME = "M9907-06PC"
# CORE_NAME = "M9907-07PC"
# CORE_NAME = "M9907-07TC"
# CORE_NAME = "M9907-08PC"
# CORE_NAME = "M9907-09PC"
# CORE_NAME = "M9907-09TC"
# CORE_NAME = "M9907-10PC"
# CORE_NAME = "M9907-11PC"
# CORE_NAME = "M9907-12PC"
# CORE_NAME = "M9907-13PC"
# CORE_NAME = "M9907-14PC"
# CORE_NAME = "M9907-14TC"
# CORE_NAME = "M9907-15PC"
# CORE_NAME = "M9907-16PC"
# CORE_NAME = "M9907-17PC"
CORE_NAME = "M9907-19PC"

# CORE_NAME = "M9907-22PC"
# CORE_NAME = "M9907-23PC"
# CORE_NAME = "M9907-25PC"
# CORE_NAME = "M9907-30PC"
# CORE_NAME = "M9907-31PC"
# CORE_NAME = "RR0207-56PC"

#### Load core data and images

In [None]:
# Define column names to extract from dataset
# LOG_COLUMNS = ['hiresMS']  # Choose which logs to include
LOG_COLUMNS = ['hiresMS','CT', 'Lumin', 'Den_gm/cc', 'MS']  # Choose which logs to include
DEPTH_COLUMN = 'SB_DEPTH_cm'

# Define directory paths
mother_dir = '/Users/larryslai/Library/CloudStorage/Dropbox/My Documents/University of Texas Austin/(Project) NWP turbidites/Cascadia_core_data/OSU_dataset/'

# Define paths for the core
core_log_paths = {
    'hiresMS': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_hiresMS_MLfilled.csv',
    'CT': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_CT_MLfilled.csv',
    'Lumin': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_RGB_MLfilled.csv',
    'R': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_RGB_MLfilled.csv',
    'G': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_RGB_MLfilled.csv',
    'B': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_RGB_MLfilled.csv',
    'Den_gm/cc': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_MST_MLfilled.csv',
    'MS': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_MST_MLfilled.csv'
}

rgb_img_path = f"{mother_dir}_compiled_logs/{CORE_NAME}/{CORE_NAME}_RGB.tiff"
ct_img_path = f"{mother_dir}_compiled_logs/{CORE_NAME}/{CORE_NAME}_CT.tiff"

# Define column mapping for alternative column names
column_alternatives = {
    'hiresMS': ['hiresMS'],
    'CT': ['CT_value'],
    'R': ['R', 'red', 'Red'],
    'G': ['G', 'green', 'Green'],
    'B': ['B', 'blue', 'Blue'],
    'Lumin': ['luminance', 'Luminance'],
    'Den_gm/cc': ['Density', 'density'],
    'MS': ['MS']
}

# Load images only if they exist
rgb_img = None
ct_img = None

if os.path.exists(rgb_img_path):
    rgb_img = plt.imread(rgb_img_path)
else:
    print(f"RGB image not found: {rgb_img_path}")

if os.path.exists(ct_img_path):
    ct_img = plt.imread(ct_img_path)
else:
    print(f"CT image not found: {ct_img_path}")

# Load log data from separate files
dfs = {}
for log in LOG_COLUMNS:
    try:
        df = pd.read_csv(core_log_paths[log])
        # Find the correct column name using alternatives if needed
        col_name = log
        if log not in df.columns:
            for alt in column_alternatives.get(log, []):
                if alt in df.columns:
                    col_name = alt
                    break
        # Check if DEPTH_COLUMN exists in the dataframe
        if DEPTH_COLUMN not in df.columns:
            print(f"Skipping {log}: {DEPTH_COLUMN} column not found")
            continue
        dfs[log] = df[[DEPTH_COLUMN, col_name]].rename(columns={col_name: log})
    except Exception as e:
        print(f"Error loading {log}: {e}")

# Merge dataframes on depth column
if dfs:
    # Get list of successfully loaded logs
    available_logs = list(dfs.keys())
    print(f"Successfully loaded logs: {available_logs}")
    
    # Start with the first available log
    merged_df = dfs[available_logs[0]]
    
    # Merge with remaining available logs
    for log in available_logs[1:]:
        merged_df = pd.merge(merged_df, dfs[log], on=DEPTH_COLUMN, how='outer')
    
    # Sort by depth and handle missing values
    merged_df = merged_df.sort_values(by=DEPTH_COLUMN).fillna(method='ffill').fillna(method='bfill')
    
    # Extract data only for available logs
    log_data = np.array(merged_df[available_logs])
    log_data = (log_data - np.min(log_data, axis=0)) / (np.max(log_data, axis=0) - np.min(log_data, axis=0))  # normalize to 0-1
    measured_depth = np.array(merged_df[DEPTH_COLUMN])  # measured depth
    
    %matplotlib inline
    
    # Call plot_core_data with conditional image parameters
    kwargs = {}
    
    if rgb_img is not None:
        kwargs['core_img_1'] = rgb_img
    if ct_img is not None:
        kwargs['core_img_2'] = ct_img
    
    plot_core_data(
        measured_depth, 
        log_data, 
        CORE_NAME, 
        label_name=available_logs,
        **kwargs
        )
    plt.show()
else:
    print("No data could be loaded for the specified logs.")


### Interactive picking for stratigraphic levels

In [None]:
%matplotlib widget 
# should install ipympl

# Pick stratigraphic levels for the core
print(f"Please pick stratigraphic levels for {CORE_NAME}")
picked_depths, picked_categories = pick_stratigraphic_levels(
    measured_depth, 
    log_data, 
    core_name=CORE_NAME,
    csv_filename=f"pickeddepth/{CORE_NAME}_pickeddepth.csv",
    **kwargs
)

### Sort and save picked depths


In [None]:
# Sort and save the picked depths CSV file
csv_file = f'pickeddepth/{CORE_NAME}_pickeddepth.csv'

if os.path.exists(csv_file):
    df = pd.read_csv(csv_file)
    if not df.empty:
        # Convert columns to numeric types to ensure correct sorting
        df['category'] = pd.to_numeric(df['category'], errors='coerce')
        df['picked_depths_cm'] = pd.to_numeric(df['picked_depths_cm'], errors='coerce')
        # Drop rows with conversion issues
        df = df.dropna(subset=['category', 'picked_depths_cm'])
        
        # Sort first by category, then by picked_depths_cm
        df_sorted = df.sort_values(by=['category', 'picked_depths_cm'])
        # Save the sorted DataFrame back to the CSV file
        df_sorted.to_csv(csv_file, index=False)
        print(f"Sorted and saved {csv_file} with {len(df_sorted)} records.")
    else:
        print(f"File {csv_file} is empty. Skipping sorting.")
else:
    print(f"File {csv_file} not found.")

### Visualize picked boundaries & assign bed names


In [None]:
# Interactive datum naming for picked depths
csv_file = f'pickeddepth/{CORE_NAME}_pickeddepth.csv'

if os.path.exists(csv_file):
    df = pd.read_csv(csv_file)
    
    # Add 'interpreted_bed' column if it doesn't exist
    if 'interpreted_bed' not in df.columns:
        df['interpreted_bed'] = ''
    
    # Handle NaN values by converting them to empty strings
    df['interpreted_bed'] = df['interpreted_bed'].fillna('')
    
    print(f"Interactive datum naming for {CORE_NAME}")
    print("Current data:")
    display(df[['category', 'picked_depths_cm', 'interpreted_bed']])
    
    # Show the figure with depths before entering names
    picked_points = list(zip(df['picked_depths_cm'].values.tolist(), 
                            df['category'].values.tolist()))
    picked_uncertainty = [1] * len(picked_points)
    
    # Define colors for different categories
    category_colors = {
        1: 'red',
        2: 'blue',
        3: 'green',
        4: 'purple',
        5: 'orange',
        6: 'cyan',
        7: 'magenta',
        8: 'yellow',
        9: 'black'
    }
    
    # Plot core with picked boundaries colored by category
    fig = plot_core_data(measured_depth, 
                         log_data, 
                         f"{CORE_NAME} with Picked Boundaries", 
                         core_img_1=rgb_img, 
                         core_img_2=ct_img,
                         figsize=(20, 4))
    
    # Add colored uncertainty shading and boundaries
    for (depth, category), uncertainty in zip(picked_points, picked_uncertainty):
        color = category_colors.get(category, 'red')  # Default to red if category not in dictionary
        # Add transparent shading covering the uncertainty interval
        plt.axvspan(depth - uncertainty, 
                    depth + uncertainty, 
                    color=color, 
                    alpha=0.1)
        # Add the picked depth line on top
        plt.axvline(x=depth, 
                    color=color, 
                    linestyle='--', 
                    linewidth=1.2, 
                    label=f'#{category}' if f'#{category}' not in plt.gca().get_legend_handles_labels()[1] else "")
    
    # Add legend with unique category entries
    handles, labels = plt.gca().get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    plt.legend(by_label.values(), 
               by_label.keys(), 
               loc='upper left', 
               ncol=len(by_label))
    plt.title(f"{CORE_NAME} with {len(picked_points)} Picked Boundaries")
    
    plt.tight_layout()
    plt.show()
    
    # Create interactive widgets for datum naming
    from ipywidgets import interact, widgets, VBox, HBox, Button, Output
    from IPython.display import display, clear_output
    
    # Create widgets
    row_selector = widgets.Dropdown(
        options=[(f"Row {i}: Depth {df.loc[i, 'picked_depths_cm']} cm, Cat {df.loc[i, 'category']}", i) 
                for i in range(len(df))],
        description='Select Row:',
        style={'description_width': 'initial'}
    )
    
    name_input = widgets.Text(
        description='Bed Name:',
        placeholder='Enter bed name (e.g., Bed_A)'
    )
    
    update_button = widgets.Button(
        description='Update Name',
        button_style='info'
    )
    
    save_button = widgets.Button(
        description='Save All Changes',
        button_style='success'
    )
    
    output_area = widgets.Output()
    
    # Event handlers
    def on_row_change(change):
        if change['new'] is not None:
            current_name = df.loc[change['new'], 'interpreted_bed']
            name_input.value = str(current_name) if pd.notna(current_name) else ''
    
    def on_update_click(b):
        row_idx = row_selector.value
        bed_name = name_input.value.strip()
        
        if row_idx is not None:
            df.loc[row_idx, 'interpreted_bed'] = bed_name if bed_name else ''
            with output_area:
                clear_output()
                print(f"Updated row {row_idx} with name '{bed_name}'")
                print(f"Depth: {df.loc[row_idx, 'picked_depths_cm']} cm, Category: {df.loc[row_idx, 'category']}")
        else:
            with output_area:
                clear_output()
                print("Please select a row")
    
    def on_save_click(b):
        # Save data to CSV
        df_to_save = df.copy()
        df_to_save.to_csv(csv_file, index=False)
        
        with output_area:
            clear_output()
            print(f"Saved updated data to {csv_file}")
            
            # Reload the data from the saved CSV file
            updated_df = pd.read_csv(csv_file)
            # Keep empty cells as empty strings instead of NaN
            updated_df['interpreted_bed'] = updated_df['interpreted_bed'].fillna('')
            print("Final data:")
            display(updated_df[['category', 'picked_depths_cm', 'interpreted_bed']])
            
            # Plot using the plot_core_data function with interpreted bed names
            fig, plot_ax = plot_core_data(
                measured_depth, 
                log_data, 
                f"{CORE_NAME} with Named Boundaries", 
                core_img_1=rgb_img, 
                core_img_2=ct_img,
                figsize=(20, 4),
                picked_depths=updated_df['picked_depths_cm'].tolist(),
                picked_categories=updated_df['category'].tolist(),
                picked_uncertainties=[1] * len(updated_df),
                show_interpreted_bed_name=updated_df['interpreted_bed'].tolist()
            )
            
            plt.tight_layout()
            plt.show()
    # Connect event handlers
    row_selector.observe(on_row_change, names='value')
    update_button.on_click(on_update_click)
    save_button.on_click(on_save_click)
    
    # Display widgets
    controls = VBox([
        row_selector,
        name_input,
        HBox([update_button, save_button]),
        output_area
    ])
    
    display(controls)
    
else:
    print(f"File {csv_file} not found. Please pick depths first.")
