# Manual core depth picking

In [1]:
# Data manipulation and analysis
# Data manipulation and analysis
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt

# Image and file handling
import os

# Utilities
import warnings
warnings.filterwarnings('ignore')

from pyCoreRelator import plot_core_data, pick_stratigraphic_levels


### Define the core to be analyzed

In [None]:
# Define core name for analysis
# CORE_NAME = "M9907-31PC"
# CORE_NAME = "M9907-30PC"
# CORE_NAME = "M9907-23PC"
# CORE_NAME = "M9907-14TC"
# CORE_NAME = "RR0207-56PC"
CORE_NAME = "M9907-01PC"

#### Load core data and images

In [3]:
# Define column names to extract from dataset
# LOG_COLUMNS = ['hiresMS']  # Choose which logs to include
LOG_COLUMNS = ['hiresMS','CT', 'Lumin']  # Choose which logs to include
DEPTH_COLUMN = 'SB_DEPTH_cm'

# Define directory paths
mother_dir = '/Users/larryslai/Library/CloudStorage/Dropbox/My Documents/University of Texas Austin/(Project) NWP turbidites/Cascadia_core_data/OSU_dataset/'

# Define paths for the core
core_log_paths = {
    'hiresMS': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_hiresMS_MLfilled.csv',
    'CT': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_CT_MLfilled.csv',
    'Lumin': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_RGB_MLfilled.csv',
    'R': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_RGB_MLfilled.csv',
    'G': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_RGB_MLfilled.csv',
    'B': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_RGB_MLfilled.csv',
    'Den_gm/cc': f'{mother_dir}_compiled_logs/{CORE_NAME}/ML_filled/{CORE_NAME}_MST_MLfilled.csv'
}

rgb_img_path = f"{mother_dir}_compiled_logs/{CORE_NAME}/{CORE_NAME}_RGB.tiff"
ct_img_path = f"{mother_dir}_compiled_logs/{CORE_NAME}/{CORE_NAME}_CT.tiff"

# Define column mapping for alternative column names
column_alternatives = {
    'hiresMS': ['MS'],
    'CT': ['CT_value'],
    'R': ['R', 'red', 'Red'],
    'G': ['G', 'green', 'Green'],
    'B': ['B', 'blue', 'Blue'],
    'Lumin': ['luminance', 'Luminance'],
    'Den_gm/cc': ['Density', 'density']
}

# Load images
rgb_img = plt.imread(rgb_img_path)
ct_img = plt.imread(ct_img_path)

# Load log data from separate files
dfs = {}
for log in LOG_COLUMNS:
    try:
        df = pd.read_csv(core_log_paths[log])
        # Find the correct column name using alternatives if needed
        col_name = log
        if log not in df.columns:
            for alt in column_alternatives.get(log, []):
                if alt in df.columns:
                    col_name = alt
                    break
        dfs[log] = df[[DEPTH_COLUMN, col_name]].rename(columns={col_name: log})
    except Exception as e:
        print(f"Error loading {log}: {e}")

# Merge dataframes on depth column
if dfs:
    merged_df = dfs[LOG_COLUMNS[0]]
    for log in LOG_COLUMNS[1:]:
        if log in dfs:
            merged_df = pd.merge(merged_df, dfs[log], on=DEPTH_COLUMN, how='outer')
    
    # Sort by depth and handle missing values
    merged_df = merged_df.sort_values(by=DEPTH_COLUMN).fillna(method='ffill').fillna(method='bfill')
    
    # Extract data
    log_data = np.array(merged_df[LOG_COLUMNS])
    log_data = (log_data - np.min(log_data, axis=0)) / (np.max(log_data, axis=0) - np.min(log_data, axis=0))  # normalize to 0-1
    measured_depth = np.array(merged_df[DEPTH_COLUMN])  # measured depth
    
    %matplotlib inline
    
    # Plot core with RGB image, CT image, and log curve
    plot_core_data(
        measured_depth, 
        log_data, 
        CORE_NAME, 
        core_img_1=rgb_img, 
        core_img_2=ct_img, 
        label_name=LOG_COLUMNS
    )
    plt.show()
else:
    print("No data could be loaded for the specified logs.")


Error loading hiresMS: [Errno 2] No such file or directory: '/Users/larryslai/Library/CloudStorage/Dropbox/My Documents/University of Texas Austin/(Project) NWP turbidites/Cascadia_core_data/OSU_dataset/_compiled_logs/M9907-01PC/ML_filled/M9907-01PC_hiresMS_MLfilled.csv'
Error loading CT: [Errno 2] No such file or directory: '/Users/larryslai/Library/CloudStorage/Dropbox/My Documents/University of Texas Austin/(Project) NWP turbidites/Cascadia_core_data/OSU_dataset/_compiled_logs/M9907-01PC/ML_filled/M9907-01PC_CT_MLfilled.csv'
Error loading Lumin: [Errno 2] No such file or directory: '/Users/larryslai/Library/CloudStorage/Dropbox/My Documents/University of Texas Austin/(Project) NWP turbidites/Cascadia_core_data/OSU_dataset/_compiled_logs/M9907-01PC/ML_filled/M9907-01PC_RGB_MLfilled.csv'
No data could be loaded for the specified logs.


### Interactive picking for stratigraphic levels

In [4]:
%matplotlib widget 
# should install ipympl

# Pick stratigraphic levels for the core
print(f"Please pick stratigraphic levels for {CORE_NAME}")
picked_depths, picked_categories = pick_stratigraphic_levels(
    measured_depth, 
    log_data, 
    core_img_1=rgb_img, 
    core_img_2=ct_img, 
    core_name=CORE_NAME,
    csv_filename=f"pickeddepth/{CORE_NAME}_pickeddepth.csv"
)

Please pick stratigraphic levels for M9907-01PC


NameError: name 'measured_depth' is not defined

### Sort and save picked depths


In [None]:
# Sort and save the picked depths CSV file
csv_file = f'pickeddepth/{CORE_NAME}_pickeddepth.csv'

if os.path.exists(csv_file):
    df = pd.read_csv(csv_file)
    if not df.empty:
        # Convert columns to numeric types to ensure correct sorting
        df['category'] = pd.to_numeric(df['category'], errors='coerce')
        df['picked_depths_cm'] = pd.to_numeric(df['picked_depths_cm'], errors='coerce')
        # Drop rows with conversion issues
        df = df.dropna(subset=['category', 'picked_depths_cm'])
        
        # Sort first by category, then by picked_depths_cm
        df_sorted = df.sort_values(by=['category', 'picked_depths_cm'])
        # Save the sorted DataFrame back to the CSV file
        df_sorted.to_csv(csv_file, index=False)
        print(f"Sorted and saved {csv_file} with {len(df_sorted)} records.")
    else:
        print(f"File {csv_file} is empty. Skipping sorting.")
else:
    print(f"File {csv_file} not found.")

### Visualize picked boundaries


In [None]:
%matplotlib inline

# Load picked depths and categories from CSV file
pickeddepth_csv = f'pickeddepth/{CORE_NAME}_pickeddepth.csv'

if os.path.exists(pickeddepth_csv):
    picked_data = pd.read_csv(pickeddepth_csv)
    # Combine depths and categories into tuples
    picked_points = list(zip(picked_data['picked_depths_cm'].values.tolist(), 
                            picked_data['category'].values.tolist()))
    print(f"Loaded {len(picked_points)} picked depths for {CORE_NAME}")
else:
    print(f"Warning: {pickeddepth_csv} not found. Using empty list for picked points.")
    picked_points = []

# Create uncertainty array (assuming uncertainty size is 1 cm)
picked_uncertainty = [1] * len(picked_points)

# Define colors for different categories
category_colors = {
    1: 'red',
    2: 'blue',
    3: 'green',
    4: 'purple',
    5: 'orange',
    6: 'cyan',
    7: 'magenta',
    8: 'yellow',
    9: 'black'
}

# Plot core with picked boundaries colored by category
fig = plot_core_data(measured_depth, 
                     log_data, 
                     f"{CORE_NAME} with Picked Boundaries", 
                     core_img_1=rgb_img, 
                     core_img_2=ct_img,
                     figsize=(20, 4))

# Add colored uncertainty shading and boundaries
for (depth, category), uncertainty in zip(picked_points, picked_uncertainty):
    color = category_colors.get(category, 'red')  # Default to red if category not in dictionary
    # Add transparent shading covering the uncertainty interval
    plt.axvspan(depth - uncertainty, 
                depth + uncertainty, 
                color=color, 
                alpha=0.1)
    # Add the picked depth line on top
    plt.axvline(x=depth, 
                color=color, 
                linestyle='--', 
                linewidth=1.2, 
                label=f'#{category}' if f'#{category}' not in plt.gca().get_legend_handles_labels()[1] else "")

# Add legend with unique category entries
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), 
           by_label.keys(), 
           loc='upper left', 
           ncol=len(by_label))
plt.title(f"{CORE_NAME} with {len(picked_points)} Picked Boundaries")

plt.tight_layout()
plt.show()
