# Welcome to the 210Pb age model script v2.4.0 - BATCH PROCESSING!

### <div style="text-align: right"> Last modified by A.A. Lehrmann 1 December 2025 </div>


### This script will process MULTIPLE cores at once!
### It extracts radioisotope data from Canberra PDFs, runs the age model, and plots for each core

**Expected folder structure:**
```
parent_folder/
├── NBP1902_BC28_gamma spec data/
│   ├── PDFs/
│   │   └── (PDF files)
│   └── weights.csv
├── NBP2202_MC12_gamma spec data/
│   ├── PDFs/
│   │   └── (PDF files)
│   └── weights.csv
└── ...
```
(Each core has its own folder containing PDFs and weights file)


## Import libraries and setup

In [1]:
#Cell 1: import required libraries
import os
import pandas as pd
from PyPDF2 import PdfReader
from pathlib import Path
from datetime import datetime
import re
import traceback

In [2]:
# Cell 2: --- Helper functions (same as original) ---
import os, re, numpy as np, pandas as pd

# Capture depth AND optional version (e.g., 21-24, 21-24cm, 21-24_v2, 21-24cm_v3, etc.)
DEPTH_RE = re.compile(r'(\d{1,3}-\d{1,3})(?:cm)?(?:_v(\d+))?(?=\.|_|$)', re.IGNORECASE)

def _depth_and_version(s: str):
    base = os.path.basename(str(s))
    m = DEPTH_RE.search(base)
    if not m:
        return None, 0
    depth = m.group(1)                 # standardized like "21-24"
    version = int(m.group(2)) if m.group(2) else 0
    return depth, version

def reshape_canberra_with_ptsrc_mixed(df_in: pd.DataFrame,
                                      write_to: str | None = None,
                                      ptsrc_cols: tuple[str,str] = ("Pb-210", "Pb-210 error")) -> pd.DataFrame:
    """
    Works on a single mixed table (Canberra + PtSrc rows).
    - For any duplicate depths on either side, keeps the **highest _vN**.
    - Adds H/I/J = ['ptsrc_pb210','ptsrc_pb210 error','file ptsrc'] right after 'Pb-214 error'.
    - Preserves your original header names (no renaming of 'error' columns).
    """
    if "File" not in df_in.columns:
        raise KeyError("Input dataframe must contain a 'File' column.")

    df = df_in.copy()

    # Split rows by prefix
    is_ptsrc = df["File"].astype(str).str.startswith("PtSrc_")
    can_df = df.loc[~is_ptsrc].copy()
    pt_df  = df.loc[ is_ptsrc].copy()

    # Extract (depth, version) for both sides
    can_df[["__depth__","__ver__"]] = can_df["File"].apply(lambda s: pd.Series(_depth_and_version(s)))
    pt_df[["__depth__","__ver__"]]  = pt_df["File"].apply(lambda s: pd.Series(_depth_and_version(s)))

    # **Prefer highest version for Canberra** when duplicates share the same depth
    can_df = can_df.sort_values("__ver__").drop_duplicates(subset="__depth__", keep="last")

    # Pick value/error columns in PtSrc rows
    val_col, err_col = ptsrc_cols
    if val_col not in pt_df.columns or err_col not in pt_df.columns:
        # Fallback: first two numeric columns
        num_cols = [c for c in pt_df.columns if c != "File" and np.issubdtype(pt_df[c].dtype, np.number)]
        if len(num_cols) < 2:
            for c in pt_df.columns:
                if c != "File":
                    pt_df[c] = pd.to_numeric(pt_df[c], errors="coerce")
            num_cols = [c for c in pt_df.columns if c != "File" and np.issubdtype(pt_df[c].dtype, np.number)]
        val_col, err_col = num_cols[:2]

    # Reduce PtSrc to needed columns and **prefer highest version per depth**
    q = pt_df.loc[:, ["__depth__", "__ver__", "File", val_col, err_col]].copy()
    q.rename(columns={
        "File": "file ptsrc",
        val_col: "ptsrc_pb210",
        err_col: "ptsrc_pb210 error"
    }, inplace=True)
    q["file ptsrc"] = q["file ptsrc"].map(lambda x: os.path.basename(str(x)))
    q = q.sort_values("__ver__").drop_duplicates(subset="__depth__", keep="last")

    # Merge and clean
    out = can_df.merge(q.drop(columns="__ver__"), on="__depth__", how="left")
    out.drop(columns=["__depth__","__ver__"], errors="ignore", inplace=True)

    # Desired column order (keep original header names)
    base = ["File","Pb-210","Pb-210 error","Bi-214","Bi-214 error","Pb-214","Pb-214 error"]
    hij  = ["ptsrc_pb210","ptsrc_pb210 error","file ptsrc"]
    others = [col for col in out.columns if col not in base + hij]
    out = out[[c for c in base if c in out.columns] + hij + others]

    if write_to:
        out.to_csv(write_to, index=False)
    return out


## Batch Processing Setup

This cell will:
1. Ask for the parent directory containing all core folders
2. Detect all subdirectories as potential cores
3. Let you configure global settings
4. Process each core automatically

In [3]:
# Cell 3: Batch processing configuration
SCRIPT_VERSION = "v2.4.0_batch"
OPERATOR_NAME = input("Operator name: ").strip()
_default = datetime.today().strftime("%Y%m%d")
_run = input(f"Run date [YYYYMMDD] (Enter for {_default}): ").strip() or _default
RUN_DATE = _run

# Function to auto-detect year from cruise name
def get_year_from_cruise_name(core_name: str) -> int | None:
    """
    Extract year from NBP cruise name.
    NBP2202 -> 2022
    NBP2002 -> 2020
    NBP1902 -> 2019
    """
    match = re.search(r'NBP(\d{2})(\d{2})', core_name, re.IGNORECASE)
    if match:
        year_prefix = match.group(1)  # e.g., '22', '20', '19'
        return 2000 + int(year_prefix)
    return None

# Get parent directory containing all core folders
parent_dir = input("Enter the parent folder path containing all core folders: ").strip()
parent_path = Path(parent_dir)

if not parent_path.exists():
    raise FileNotFoundError(f"Directory not found: {parent_path}")

# Detect all subdirectories as potential cores
# Core folders are typically named like NBP1902_BC28_gamma spec data
all_folders = [d for d in parent_path.iterdir() if d.is_dir()]
core_folders = all_folders  # All subdirectories are potential cores
print(f"\nFound {len(core_folders)} core folders:")

# Auto-detect years from core names
core_years = {}
for folder in core_folders:
    detected_year = get_year_from_cruise_name(folder.name)
    core_years[folder.name] = detected_year
    year_str = f" (detected year: {detected_year})" if detected_year else " (year unknown)"
    print(f"  • {folder.name}{year_str}")

# Check if all years were detected
all_detected = all(year is not None for year in core_years.values())
unique_years = set(y for y in core_years.values() if y is not None)

# Get global settings
print("\n=== Global Settings ===")
if all_detected and len(unique_years) == 1:
    default_year = list(unique_years)[0]
    print(f"All cores detected as year {default_year}")
    year_input = input(f"Use {default_year} for all cores? (Enter to confirm, or type different year): ").strip()
    default_year_of_core = int(year_input) if year_input else default_year
elif all_detected:
    print(f"Multiple years detected: {sorted(unique_years)}")
    print("Will use individual years for each core based on cruise name.")
    default_year_of_core = None  # Will use per-core years
else:
    print("Some cores have unknown years. Will ask for each core individually.")
    default_year_of_core = None

# Ask about depth labeling preference
label_choice = input("\nLabel depths with calendar years? (all/none/ask): ").strip().lower()
if label_choice not in ['all', 'none', 'ask']:
    label_choice = 'ask'

# Ask about missing depths
missing_choice = input("Any intervals with undetectable radioisotopes? (yes/no/ask): ").strip().lower()
if missing_choice not in ['yes', 'no', 'ask']:
    missing_choice = 'ask'

print(f"\nWill process {len(core_folders)} cores with global settings:")
print(f"  Operator: {OPERATOR_NAME}")
print(f"  Run date: {RUN_DATE}")
if default_year_of_core:
    print(f"  Year of core: {default_year_of_core} (applies to all)")
else:
    print(f"  Year of core: Will use individual years per core")
print(f"  Depth labeling: {label_choice}")
print(f"  Missing depths handling: {missing_choice}")

proceed = input("\nProceed with batch processing? (yes/no): ").strip().lower()
if proceed != 'yes':
    raise SystemExit("Batch processing cancelled by user.")

Operator name:  Asmara A. Lehrmann
Run date [YYYYMMDD] (Enter for 20251201):  
Enter the parent folder path containing all core folders:  D:\210Pb_thismachine\cores



Found 21 core folders:
  • NBP1902_BC28_gamma spec data (detected year: 2019)
  • NBP1902_JPC17_gamma spec data (detected year: 2019)
  • NBP2002 MC30 (detected year: 2020)
  • NBP2002_KC72_gamma spec data (detected year: 2020)
  • NBP2202 MC12 (detected year: 2022)
  • NBP2202_KC-24_gamma spec data (detected year: 2022)
  • NBP2202_KC04_gamma spec data (detected year: 2022)
  • NBP2202_KC06_gamma spec data (detected year: 2022)
  • NBP2202_KC15_gamma spec data (detected year: 2022)
  • NBP2202_KC17_gamma spec data (detected year: 2022)
  • NBP2202_KC20_gamma spec data (detected year: 2022)
  • NBP2202_KC21_gamma spec data (detected year: 2022)
  • NBP2202_KC22_gamma spec data (detected year: 2022)
  • NBP2202_KC23_gamma spec data (detected year: 2022)
  • NBP2202_KC25_gamma spec data (detected year: 2022)
  • NBP2202_KC26_gamma spec data (detected year: 2022)
  • NBP2202_MC05_gamma spec data (detected year: 2022)
  • NBP2202_MC12_gamma spec data (detected year: 2022)
  • NBP2202_MC19


Label depths with calendar years? (all/none/ask):  all
Any intervals with undetectable radioisotopes? (yes/no/ask):  no



Will process 21 cores with global settings:
  Operator: Asmara A. Lehrmann
  Run date: 20251201
  Year of core: Will use individual years per core
  Depth labeling: all
  Missing depths handling: no



Proceed with batch processing? (yes/no):  yes


## Core Processing Functions

These cells contain all the processing logic from the original script, wrapped in functions for batch processing.

In [4]:
# Cell 4: PDF extraction functions (from original script cells 6-10)

def process_ptsrc_pdf(file_path, filename):
    """Process PtSrc PDF files (checks pages 3 and 4)"""
    try:
        reader = PdfReader(file_path)
        if len(reader.pages) < 3:
            print(f"PDF file '{filename}' has less than 3 pages. Skipping.")
            return None
        
        # Try page 3 first (index 2), then page 4 (index 3)
        pages_to_check = [2]  # Start with page 3
        if len(reader.pages) >= 4:
            pages_to_check.append(3)  # Add page 4 if it exists
        
        for page_idx in pages_to_check:
            page = reader.pages[page_idx]
            text = page.extract_text()
            lines = text.split('\n')
            for line in lines:
                if 'Pb-210' in line:
                    ptsrc_pb210, PtSrc_Pb210error = line.split()[-2:]
                    return {
                        'File': filename,
                        'Pb-210': float(ptsrc_pb210),
                        'Pb-210 error': float(PtSrc_Pb210error)
                    }
        
        return None
    except Exception as e:
        print(f"Error processing PtSrc PDF '{filename}': {e}")
        return None

def process_regular_pdf(file_path, filename):
    """Process regular PDF files (checks pages 3 and 4)"""
    pb210 = pb210error = Bi214 = Bi214error = Pb214 = Pb214error = None
    try:
        reader = PdfReader(file_path)
        if len(reader.pages) < 3:
            print(f"PDF file '{filename}' has less than 3 pages. Skipping.")
            return None
        
        # Try page 3 first (index 2), then page 4 (index 3)
        pages_to_check = [2]  # Start with page 3
        if len(reader.pages) >= 4:
            pages_to_check.append(3)  # Add page 4 if it exists
        
        data_found = False
        for page_idx in pages_to_check:
            page = reader.pages[page_idx]
            text = page.extract_text()
            lines = text.split('\n')
            for line in lines:
                if 'Pb-210' in line:
                    pb210, pb210error = line.split()[-2:]
                elif 'Bi-214' in line:
                    Bi214, Bi214error = line.split()[-2:]
                elif 'Pb-214' in line:
                    Pb214, Pb214error = line.split()[-2:]
            
            # Check if we found all required data
            if pb210 is not None and Bi214 is not None:
                data_found = True
                break
        
        if not data_found:
            if pb210 is None or pb210error is None:
                return None
            if Bi214 is None or Bi214error is None:
                return None
        
        if Pb214 is None or Pb214error is None:
            Pb214 = Pb214error = 'NaN'
        
        return {
            'File': filename,
            'Pb-210': float(pb210),
            'Pb-210 error': float(pb210error),
            'Bi-214': float(Bi214),
            'Bi-214 error': float(Bi214error),
            'Pb-214': float(Pb214) if Pb214 != 'NaN' else float('nan'),
            'Pb-214 error': float(Pb214error) if Pb214error != 'NaN' else float('nan')
        }
    
    except Exception as e:
        print(f"Error processing PDF file '{filename}': {e}")
        return None

def process_pdf_file(file_path, filename):
    """Route to appropriate processor based on filename"""
    if filename.startswith("PtSrc_"):
        return process_ptsrc_pdf(file_path, filename)
    else:
        return process_regular_pdf(file_path, filename)

def extract_isotope_data_from_pdfs(folder_path):
    """
    Extract radioisotope data from all PDFs in the specified folder.
    Returns a list of dictionaries with isotope data.
    """
    combined_data = []
    folder_path = Path(folder_path)
    
    for pdf_file in folder_path.glob("*.pdf"):
        data = process_pdf_file(str(pdf_file), pdf_file.name)
        if data is not None:
            combined_data.append(data)
    
    return combined_data

In [5]:
# Cell 5: Age model calculation functions (from original script cells 17-24)
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def calculate_age_model(merged_data, year_of_core, missing_depths=[]):
    """
    Calculate the age model from merged isotope and weight data.
    Based on the Wellner Lab Group excel model (Appleby, 2001; Boldt et al., 2013)
    """
    data = merged_data.copy()
    
    # Calculate interval properties
    data['Center point of interval'] = (data['Top of interval (cm)'] + data['Base of interval (cm)']) / 2
    data['Interval thickness (cm)'] = data['Base of interval (cm)'] - data['Top of interval (cm)']
    
    # Calculate activities
    data['Pb-210 activity (Bq/g)'] = data['Pb-210'] / data['Weight (g)']
    data['Pb-210 activity Uncertainty (Bq-g)'] = data['Pb-210 error'] / data['Weight (g)']
    
    data['Bi-214 activity (Bq/g)'] = data['Bi-214'] / data['Weight (g)']
    data['Pb-214 activity (Bq/g)'] = data['Pb-214'] / data['Weight (g)']
    
    # Calculate supported activity
    data['Averaged supported activity of Bi-214 and Pb-214 (Bq/g)'] = (
        data['Bi-214 activity (Bq/g)'] + data['Pb-214 activity (Bq/g)']
    ) / 2
    
    # Calculate uncertainties for background activity
    data['Bi-214 uncertainty'] = data['Bi-214 error'] / data['Weight (g)']
    data['Pb-214 uncertainty'] = data['Pb-214 error'] / data['Weight (g)']
    data['Background activity uncertainty (Bq/g)'] = np.sqrt(
        data['Bi-214 uncertainty']**2 + data['Pb-214 uncertainty']**2
    ) / 2
    
    # Calculate excess Pb-210
    data['Excess Pb-210 (Bq/g)'] = (
        data['Pb-210 activity (Bq/g)'] - 
        data['Averaged supported activity of Bi-214 and Pb-214 (Bq/g)']
    )
    
    # Handle missing/negative values
    data.loc[data['Excess Pb-210 (Bq/g)'] < 0, 'Excess Pb-210 (Bq/g)'] = np.nan
    
    # CRS age model calculations
    lambda_pb210 = np.log(2) / 22.3  # decay constant
    
    # Calculate cumulative inventory from bottom up
    data['Excess Pb-210 inventory (Bq/cm2)'] = (
        data['Excess Pb-210 (Bq/g)'] * 
        data['Weight (g)'] / 
        data['Interval thickness (cm)']
    )
    
    data['Cumulative inventory from bottom'] = (
        data['Excess Pb-210 inventory (Bq/cm2)'][::-1].cumsum()[::-1]
    )
    
    # Calculate ages
    total_inventory = data['Cumulative inventory from bottom'].iloc[0]
    data['Age (years)'] = (1 / lambda_pb210) * np.log(
        total_inventory / data['Cumulative inventory from bottom']
    )
    
    # Calculate calendar years
    data['calendar years pre year of core'] = year_of_core - data['Age (years)']
    
    # Calculate sedimentation rate
    data['Sedimentation rate (cm/yr)'] = data['Interval thickness (cm)'] / data['Age (years)'].diff()
    
    return data

def compose_output_name(basename: str, ext: str, directory: Path | str, suffix: str) -> Path:
    """
    Returns a full Path like <directory>/<basename>_<suffix><ext>
    """
    directory = Path(directory)
    base = f"{basename}{suffix}"
    if not ext.startswith("."):
        ext = "." + ext
    return directory / f"{base}{ext}"

def write_readme(filepath: Path, content: str):
    """Write a README file next to the output file"""
    readme_path = filepath.parent / f"{filepath.stem}_README.txt"
    with open(readme_path, 'w') as f:
        f.write(content)

In [6]:
# Cell 6: Plotting functions (from original script cells 27-30)

def save_figure(fig, basename: str, directory: Path, ext: str, dpi: int, 
                meta: dict, readme_content: str = ""):
    """
    Save figure with metadata footer and optional README.
    """
    # Remove any existing footer text
    for ax in fig.axes:
        for t in ax.texts:
            if hasattr(t, 'get_gid') and t.get_gid() == 'footer':
                try:
                    t.remove()
                except Exception:
                    pass
    
    # Add footer to first axes
    ax = fig.axes[0]
    footer = ax.text(
        1.01, 0.5, f"Created by {meta['operator']} with 210PbAgeModelScript {meta['version']}",
        ha="left", va="center", rotation=270, fontsize=9,
        color="lightgrey", transform=ax.transAxes
    )
    footer.set_gid("footer")
    
    # Save figure
    suffix = f"_{meta['core']}_{meta['date']}"
    out_path = compose_output_name(basename, ext, directory, suffix)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(out_path, dpi=dpi, bbox_inches="tight")
    
    print(f"Figure saved -> {out_path}")
    if readme_content:
        write_readme(out_path, readme_content)
    
    return out_path

def plot_uncorrected_activity(data, core_name, missing_depths, save_location, meta):
    """
    Plot Pb-210 uncorrected activity.
    """
    # Color scheme
    excess_pb210_color = '#3B5BA5'
    excess_pb210_error_color = '#6B8DD6'
    
    plt.figure(figsize=(3, 5))
    plt.errorbar(
        data['Pb-210 activity (Bq/g)'], data['Center point of interval'], 
        xerr=data['Pb-210 activity Uncertainty (Bq-g)'], fmt='-', 
        color=excess_pb210_color, label='Pb-210 activity (Bq/unit)', 
        capsize=5, linewidth=1, ecolor=excess_pb210_error_color
    )
    plt.xscale('log')
    plt.xlim(0.01, 10)
    
    # Highlight missing intervals
    for y in missing_depths:
        plt.axhspan(y - 0.5, y + 0.5, alpha=0.5, color='brown', 
                    label='Undetectable radioisotope' if y == missing_depths[0] else None)
    
    plt.title(f"{core_name} 210 Pb Uncorrected Activity", fontsize=18)
    plt.xlabel("Bq/g", fontsize=14)
    plt.ylabel("Depth (cm)", fontsize=14)
    plt.gca().invert_yaxis()
    plt.grid(True, which='both', linestyle='-', linewidth=0.5, color='lightgray')
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=1)
    plt.tight_layout()
    
    fig = plt.gcf()
    save_figure(fig, basename="UncorrectedActivity", directory=save_location, 
                ext=".pdf", dpi=300, meta=meta)
    plt.close()

def plot_age_model(data, core_name, missing_depths, depths_to_label, save_location, meta):
    """
    Plot the full age model with excess Pb-210 and background activity.
    """
    # Color scheme
    excess_pb210_color = '#3B5BA5'
    excess_pb210_error_color = '#6B8DD6'
    supported_activity_color = '#C5462D'
    supported_activity_error_color = '#E67E6B'
    
    plt.figure(figsize=(5, 10))
    
    # Calculate errors
    yerr = np.abs(data['Center point of interval'] - data['Top of interval (cm)'])
    xerr = data['Pb-210 activity Uncertainty (Bq-g)']
    
    # Plot Excess Pb-210 with connecting line
    valid_mask_excess = ~data['Excess Pb-210 (Bq/g)'].isna()
    plt.plot(
        data.loc[valid_mask_excess, 'Excess Pb-210 (Bq/g)'], 
        data.loc[valid_mask_excess, 'Center point of interval'],
        color=excess_pb210_color, linewidth=1, zorder=1
    )
    
    # Draw error boxes for Excess Pb-210
    for i in range(len(data)):
        x = data['Excess Pb-210 (Bq/g)'].iloc[i]
        y = data['Center point of interval'].iloc[i]
        
        if pd.isna(x):
            continue
        
        width = xerr.iloc[i] * 2
        height = yerr.iloc[i] * 2
        rect = patches.Rectangle(
            (x - xerr.iloc[i], y - yerr.iloc[i]), width, height,
            linewidth=0.5, edgecolor='grey', facecolor='none'
        )
        plt.gca().add_patch(rect)
    
    # Plot background activity
    valid_mask_bg = ~data['Averaged supported activity of Bi-214 and Pb-214 (Bq/g)'].isna()
    plt.errorbar(
        data.loc[valid_mask_bg, 'Averaged supported activity of Bi-214 and Pb-214 (Bq/g)'],
        data.loc[valid_mask_bg, 'Center point of interval'],
        xerr=data.loc[valid_mask_bg, 'Background activity uncertainty (Bq/g)'],
        fmt='-', color=supported_activity_color, label='Background Activity',
        capsize=5, linewidth=1, ecolor=supported_activity_error_color
    )
    
    # Highlight missing intervals
    for y in missing_depths:
        plt.axhspan(y - 0.5, y + 0.5, alpha=0.5, color='brown',
                    label='Undetectable radioisotope' if y == missing_depths[0] else None)
    
    # Annotate selected depths with calendar years
    for i, depth in enumerate(data['Center point of interval']):
        if depth in depths_to_label:
            year_value = data['calendar years pre year of core'].iloc[i]
            if not pd.isna(year_value):
                plt.text(
                    data['Excess Pb-210 (Bq/g)'].iloc[i] + 0.05, depth,
                    f'{int(np.ceil(year_value))}',
                    fontsize=14, color='black', verticalalignment='center'
                )
    
    plt.title(f"{core_name} Age Model", fontsize=18)
    plt.xlabel("Bq/unit", fontsize=14)
    plt.ylabel("Depth (cm)", fontsize=14)
    plt.xscale('log')
    plt.xlim(0.01, 2)
    plt.gca().invert_yaxis()
    plt.grid(True, which='both', linestyle='-', linewidth=0.5, color='lightgray')
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=1)
    plt.tight_layout()
    
    fig = plt.gcf()
    save_figure(fig, basename="AgeModelPlot", directory=save_location, 
                ext=".pdf", dpi=300, meta=meta)
    plt.close()

## Main Batch Processing Loop

This cell processes each core folder automatically.

In [7]:
# Cell 7: Main batch processing loop

# Track results
results = []
failed_cores = []

for core_idx, core_folder in enumerate(core_folders, 1):
    print(f"\n{'='*80}")
    print(f"Processing core {core_idx}/{len(core_folders)}: {core_folder.name}")
    print(f"{'='*80}")
    
    try:
        # Extract core name from folder name
        CORE_NAME_RAW = core_folder.name
        CORE_NAME = re.sub(r"[^A-Za-z0-9]+", "", CORE_NAME_RAW)
        SUFFIX = f"_{CORE_NAME}_{RUN_DATE}"
        
        META = {
            "operator": OPERATOR_NAME, 
            "date": RUN_DATE, 
            "core": CORE_NAME, 
            "version": SCRIPT_VERSION, 
            "suffix": SUFFIX
        }
        
        print(f"Core name: {CORE_NAME}")
        
        # Find PDFs folder inside this core folder, or use core folder directly
        # Exclude output files like AgeModelPlot
        pdf_folders = [d for d in core_folder.glob("*[Pp][Dd][Ff]*") 
                      if d.is_dir()]  # Only directories
        
        if pdf_folders:
            folder_path = pdf_folders[0]
            print(f"  PDF folder: {folder_path.name}")
        else:
            # Check if PDFs are directly in the core folder
            # Look for source PDFs (exclude output PDFs like AgeModelPlot)
            pdf_files = [f for f in core_folder.glob("*.pdf") 
                        if not f.name.startswith(('AgeModelPlot', 'CanberraData'))]
            if pdf_files:
                folder_path = core_folder
                print(f"  PDFs found in root of core folder ({len(pdf_files)} files)")
            else:
                print(f"  WARNING: No source PDFs found in {CORE_NAME_RAW}, skipping")
                failed_cores.append((CORE_NAME, "No source PDFs found"))
                continue
        
        # Extract isotope data from PDFs
        print(f"  Extracting isotope data from PDFs...")
        print(f"  DEBUG: Looking for PDFs in: {folder_path}")
        pdf_files = list(Path(folder_path).glob("*.pdf"))
        print(f"  DEBUG: Found {len(pdf_files)} PDF files")
        if pdf_files and len(pdf_files) <= 3:
            print(f"  DEBUG: PDF files: {[p.name for p in pdf_files]}")
        isotope_data = extract_isotope_data_from_pdfs(folder_path)
        
        if not isotope_data:
            print(f"  WARNING: No isotope data extracted, skipping {CORE_NAME}")
            failed_cores.append((CORE_NAME, "No isotope data extracted"))
            continue
        
        df_isotopes = pd.DataFrame(isotope_data)
        print(f"  Extracted data from {len(df_isotopes)} PDFs")
        
        # Process with PtSrc if available
        output_csv_path = compose_output_name("CanberraData", ".csv", folder_path, SUFFIX)
        df_processed = reshape_canberra_with_ptsrc_mixed(df_isotopes, write_to=output_csv_path)
        print(f"  Saved Canberra data to {output_csv_path.name}")
        
        # Find weights file inside the core folder
        weights_pattern = ['*weights*.csv', '*_weights.csv', '*-weights.csv']
        weights_files = []
        for pattern in weights_pattern:
            weights_files.extend(core_folder.glob(pattern))
        
        if not weights_files:
            print(f"  WARNING: No weights file found in {CORE_NAME_RAW}, skipping")
            failed_cores.append((CORE_NAME, "No weights file found"))
            continue
        
        weights_path = weights_files[0]
        print(f"  Weights file: {weights_path.name}")
        
        # Load and merge data
        df_weights = pd.read_csv(weights_path)
        
        print(f"  DEBUG: Weights file has {len(df_weights)} rows")
        print(f"  DEBUG: Weights columns: {list(df_weights.columns)}")
        
        # Extract top and base depths from PDF filenames
        # e.g., "NBP1902_BC28_9-10cm.PDF" -> top=9, base=10
        depth_extract = df_processed['File'].str.extract(r'(\d+)-(\d+)')
        df_processed['top'] = pd.to_numeric(depth_extract[0])
        df_processed['base'] = pd.to_numeric(depth_extract[1])
        
        print(f"  DEBUG: Sample processed depths: top={df_processed['top'].head(3).tolist()}, base={df_processed['base'].head(3).tolist()}")
        
        # Identify weight file columns (assuming columns B, C, D are top, center, base)
        # Column A is core name, so we use columns 1, 2, 3 (0-indexed)
        if len(df_weights.columns) >= 4:
            df_weights['top'] = pd.to_numeric(df_weights.iloc[:, 1])
            df_weights['center'] = pd.to_numeric(df_weights.iloc[:, 2])
            df_weights['base'] = pd.to_numeric(df_weights.iloc[:, 3])
            if len(df_weights.columns) >= 5:
                df_weights['Weight (g)'] = pd.to_numeric(df_weights.iloc[:, 4])
            print(f"  DEBUG: Sample weight tops: {df_weights['top'].head(3).tolist()}")
            print(f"  DEBUG: Sample weight bases: {df_weights['base'].head(3).tolist()}")
        else:
            print(f"  WARNING: Weights file doesn't have expected format, skipping {CORE_NAME}")
            failed_cores.append((CORE_NAME, "Weights file format incorrect"))
            continue
        
        # Merge on top and base depths
        merged = df_processed.merge(df_weights[['top', 'base', 'Weight (g)']], 
                                    on=['top', 'base'], how='inner')
        
        # Add other required columns from weights if present
        if 'center' in df_weights.columns:
            # Re-merge to get center column
            merged = df_processed.merge(df_weights[['top', 'base', 'center', 'Weight (g)']], 
                                        on=['top', 'base'], how='inner')
            merged['Top of interval (cm)'] = merged['top']
            merged['Base of interval (cm)'] = merged['base']
            merged['Center point of interval'] = merged['center']
        
        print(f"  DEBUG: After merge: {len(merged)} rows")
        
        if len(merged) == 0:
            print(f"  WARNING: No data after merging, skipping {CORE_NAME}")
            failed_cores.append((CORE_NAME, "No data after merging weights"))
            continue
        
        print(f"  Merged {len(merged)} samples")
        
        # Handle missing depths
        if missing_choice == 'ask':
            has_missing = input(f"  Does {CORE_NAME} have undetectable radioisotopes? (yes/no): ").strip().lower()
            if has_missing == 'yes':
                missing_input = input(f"    Enter depths (comma-separated): ").strip()
                missing_depths = [float(x.strip()) for x in missing_input.split(',')]
            else:
                missing_depths = []
        elif missing_choice == 'yes':
            missing_input = input(f"  Enter missing depths for {CORE_NAME} (comma-separated): ").strip()
            missing_depths = [float(x.strip()) for x in missing_input.split(',')] if missing_input else []
        else:
            missing_depths = []
        
        # Determine year for this core
        if default_year_of_core:
            # Use global year
            year_of_core = default_year_of_core
            print(f"  Using year: {year_of_core}")
        else:
            # Use detected year or ask
            detected_year = core_years.get(CORE_NAME_RAW)
            if detected_year:
                year_of_core = detected_year
                print(f"  Using detected year: {year_of_core}")
            else:
                year_of_core = int(input(f"  Enter year for {CORE_NAME}: "))
        
        # Calculate age model
        print(f"  Calculating age model...")
        data = calculate_age_model(merged, year_of_core, missing_depths)
        
        # Save calculated data
        calc_csv_path = compose_output_name("AgeModelData", ".csv", core_folder, SUFFIX)
        data.to_csv(calc_csv_path, index=False)
        print(f"  Saved age model data to {calc_csv_path.name}")
        
        # Handle depth labeling
        if label_choice == 'all':
            depths_to_label = data['Center point of interval'].tolist()
        elif label_choice == 'ask':
            label_input = input(f"  Label depths for {CORE_NAME}? (all/none/specific): ").strip().lower()
            if label_input == 'all':
                depths_to_label = data['Center point of interval'].tolist()
            elif label_input == 'specific':
                depths_input = input(f"    Enter depths (comma-separated): ").strip()
                depths_to_label = [float(x.strip()) for x in depths_input.split(',')]
            else:
                depths_to_label = []
        else:
            depths_to_label = []
        
        # Create plots
        print(f"  Creating plots...")
        save_location = core_folder / "outputs"
        save_location.mkdir(exist_ok=True)
        
        plot_uncorrected_activity(data, CORE_NAME, missing_depths, save_location, META)
        plot_age_model(data, CORE_NAME, missing_depths, depths_to_label, save_location, META)
        
        print(f"  ✓ Successfully processed {CORE_NAME}")
        results.append((CORE_NAME, "Success", str(save_location)))
        
    except Exception as e:
        print(f"  ✗ ERROR processing {core_folder.name}:")
        print(f"    {str(e)}")
        traceback.print_exc()
        failed_cores.append((core_folder.name, str(e)))
        continue

# Print summary
print(f"\n{'='*80}")
print("BATCH PROCESSING COMPLETE")
print(f"{'='*80}")
print(f"\nSuccessfully processed: {len(results)} cores")
for core_name, status, location in results:
    print(f"  ✓ {core_name} -> {location}")

if failed_cores:
    print(f"\nFailed to process: {len(failed_cores)} cores")
    for core_name, error in failed_cores:
        print(f"  ✗ {core_name}: {error}")
else:
    print(f"\nAll cores processed successfully!")


Processing core 1/21: NBP1902_BC28_gamma spec data
Core name: NBP1902BC28gammaspecdata
  PDFs found in root of core folder (34 files)
  Extracting isotope data from PDFs...
  DEBUG: Looking for PDFs in: D:\210Pb_thismachine\cores\NBP1902_BC28_gamma spec data
  DEBUG: Found 35 PDF files
PDF file 'AgeModelPlot_NBP1902BC28_20250911.pdf' has less than 3 pages. Skipping.
  Extracted data from 32 PDFs
  ✗ ERROR processing NBP1902_BC28_gamma spec data:
    [Errno 13] Permission denied: 'D:\\210Pb_thismachine\\cores\\NBP1902_BC28_gamma spec data\\CanberraData_NBP1902BC28gammaspecdata_20251201.csv'

Processing core 2/21: NBP1902_JPC17_gamma spec data
Core name: NBP1902JPC17gammaspecdata
  PDFs found in root of core folder (12 files)
  Extracting isotope data from PDFs...
  DEBUG: Looking for PDFs in: D:\210Pb_thismachine\cores\NBP1902_JPC17_gamma spec data
  DEBUG: Found 12 PDF files


Traceback (most recent call last):
  File "C:\Users\aalehrma\AppData\Local\Temp\ipykernel_14052\3434979222.py", line 68, in <module>
    df_processed = reshape_canberra_with_ptsrc_mixed(df_isotopes, write_to=output_csv_path)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\aalehrma\AppData\Local\Temp\ipykernel_14052\519063308.py", line 75, in reshape_canberra_with_ptsrc_mixed
    out.to_csv(write_to, index=False)
  File "C:\Users\aalehrma\anaconda3\envs\myenv\Lib\site-packages\pandas\util\_decorators.py", line 333, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\aalehrma\anaconda3\envs\myenv\Lib\site-packages\pandas\core\generic.py", line 3986, in to_csv
    return DataFrameRenderer(formatter).to_csv(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\aalehrma\anaconda3\envs\myenv\Lib\site-packages\pandas\io\formats\format.py", line 1014, in to_csv
    csv_formatter.s

  Extracted data from 12 PDFs
  Saved Canberra data to CanberraData_NBP1902JPC17gammaspecdata_20251201.csv

Processing core 3/21: NBP2002 MC30
Core name: NBP2002MC30
  PDFs found in root of core folder (30 files)
  Extracting isotope data from PDFs...
  DEBUG: Looking for PDFs in: D:\210Pb_thismachine\cores\NBP2002 MC30
  DEBUG: Found 30 PDF files
  Extracted data from 30 PDFs
  Saved Canberra data to CanberraData_NBP2002MC30_20251201.csv
  Weights file: NBP2002MC30_weights_aal.csv
  DEBUG: Weights file has 15 rows
  DEBUG: Weights columns: ['Core', 'top of interval (cm)', 'Center point of interval', 'Base of interval (cm)', 'Sediment weight (g)']
  DEBUG: Sample processed depths: top=[0, 10, 12], base=[1, 11, 13]
  DEBUG: Sample weight tops: [0, 2, 3]
  DEBUG: Sample weight bases: [1, 3, 4]
  DEBUG: After merge: 15 rows
  Merged 15 samples
  Using detected year: 2020
  Calculating age model...
  Saved age model data to AgeModelData_NBP2002MC30_20251201.csv
  Creating plots...
Figure s

# Batch Processing Complete!

All cores have been processed. Check the summary above for any errors.

#### When you've finished, go to Cell > All Output > Clear to be ready for the next batch run.