# Test Event Waveform Processing

This notebook tests the event waveform processing script with a small subset of data.

In [1]:
# Import required libraries
import sys
import os
import logging
import numpy as np
import pandas as pd
from obspy import UTCDateTime
from obspy.clients.fdsn import Client
from pnwstore.mseed import WaveformClient

# Import functions from the script
from event_waveform_processing import find_column, process_event

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")

ModuleNotFoundError: No module named 'pnwstore'

## Load Data

Load the events and picks CSV files.

In [None]:
# Load data
events_path = '../data/Cascadia_relocated_catalog_ver_3.csv'
picks_path = '../data/Cascadia_relocated_catalog_picks_ver_3.csv'

events_df = pd.read_csv(events_path)
picks_df = pd.read_csv(picks_path)

print(f"Events shape: {events_df.shape}")
print(f"Picks shape: {picks_df.shape}")
print(f"\nEvents columns: {events_df.columns.tolist()}")
print(f"\nPicks columns: {picks_df.columns.tolist()[:10]}...")  # First 10 columns

In [None]:
# Display first few events
events_df.head()

In [None]:
# Display first few picks
picks_df.head()

## Test Column Detection

Test the `find_column` function to ensure it correctly identifies column names.

In [None]:
# Test column finding
event_col = find_column(events_df, ("event id", "event_id", "event"))
origin_col = find_column(events_df, ("origin", "time", "origin time", "datetime"))
station_col = find_column(picks_df, ("station name", "station", "sta"))
phase_col = find_column(picks_df, ("phase", "type", "phase_hint"))

print(f"Event ID column: {event_col}")
print(f"Origin time column: {origin_col}")
print(f"Station column: {station_col}")
print(f"Phase column: {phase_col}")

## Select Test Event

Choose a single event to test the processing workflow.

In [None]:
# Get a list of available event IDs
event_ids = events_df[event_col].dropna().unique()
print(f"Total number of events: {len(event_ids)}")
print(f"\nFirst 10 event IDs: {event_ids[:10]}")

In [None]:
# Select a test event (you can change this)
test_event_id = event_ids[0]
print(f"Testing with event ID: {test_event_id}")

# Get event details
test_event = events_df[events_df[event_col] == test_event_id].iloc[0]
print(f"\nEvent details:")
print(test_event)

In [None]:
# Get picks for this event
pick_event_col = find_column(picks_df, ("event id", "event_id", "event"))
test_picks = picks_df[picks_df[pick_event_col] == test_event_id]
print(f"Number of picks for this event: {len(test_picks)}")
print(f"\nPicks:")
test_picks.head(10)

## Test WaveformClient Connection

Verify that we can connect to the WaveformClient and fetch data.

In [None]:
# Initialize clients
client = Client('IRIS')
client_waveform = WaveformClient()

print("Clients initialized successfully")

In [None]:
# Test fetching waveforms for one station
if len(test_picks) > 0:
    # Get first pick's station info
    test_pick = test_picks.iloc[0]
    raw_station = str(test_pick[station_col]).strip()
    parts = raw_station.split('.')
    
    if len(parts) == 2:
        station, network = parts[0].strip(), parts[1].strip()
        origin_time = UTCDateTime(test_event[origin_col])
        
        print(f"Testing waveform download for: {network}.{station}")
        print(f"Origin time: {origin_time}")
        
        try:
            st_test = client_waveform.get_waveforms(
                network=network,
                station=station,
                channel="?H?,?N?",
                year=origin_time.strftime('%Y'),
                month=origin_time.strftime('%m'),
                day=origin_time.strftime('%d'),
            )
            print(f"\nSuccessfully downloaded {len(st_test)} traces")
            print(st_test)
            
            # Trim to event window
            st_test.trim(starttime=origin_time - 30, endtime=origin_time + 120)
            print(f"\nAfter trimming: {len(st_test)} traces")
            print(st_test)
            
        except Exception as e:
            print(f"Error downloading waveforms: {e}")

## Test Process Event Function

Run the main processing function on the test event.

In [None]:
# Process the test event
_, measurements, origin = process_event(
    test_event_id,
    events_df,
    picks_df,
    client,
    sample_rate=100,
    highpass_freq=4.0,
    window_before=30,
    window_after=120,
)

print(f"\nOrigin time: {origin}")
print(f"Number of stations with measurements: {len(measurements)}")

In [None]:
# Display measurements
if measurements:
    print("\nMeasurements by station:")
    for station_key, station_data in measurements.items():
        print(f"\n{station_key}:")
        for comp in ['Z', 'N', 'E']:
            comp_data = station_data[comp]
            if not np.isnan(comp_data['max_amp']):
                print(f"  {comp}: max_amp={comp_data['max_amp']:.2f} mm, "
                      f"min_amp={comp_data['min_amp']:.2f} mm, "
                      f"duration={comp_data['duration']:.2f} s")
else:
    print("No measurements returned")

## Update Picks DataFrame

Test updating the picks DataFrame with the computed measurements.

In [None]:
# Create a test copy of picks_df
test_picks_df = picks_df.copy()

# Find or create amplitude column
amplitude_col = None
for c in test_picks_df.columns:
    if "amplitude" in c.strip().lower():
        amplitude_col = c
        break
if amplitude_col is None:
    amplitude_col = " Amplitude "
    test_picks_df[amplitude_col] = np.nan

# Create component-specific columns
for comp in ['Z', 'N', 'E']:
    for mtype in ['max_amp', 'min_amp', 'duration']:
        col_name = f" {comp}_{mtype} "
        if col_name not in test_picks_df.columns:
            test_picks_df[col_name] = np.nan

print(f"Amplitude column: '{amplitude_col}'")
print(f"\nNew columns added: {[f' {c}_{m} ' for c in ['Z', 'N', 'E'] for m in ['max_amp', 'min_amp', 'duration']]}")

In [None]:
# Update measurements for the test event
if measurements:
    pick_station_col = find_column(test_picks_df, ("station name", "station", "sta"))
    
    for station_key, station_data in measurements.items():
        network, sta = station_key.split('.')
        candidates = [f"{sta}.{network}", f"{network}.{sta}"]
        
        # Find matching rows
        mask = (test_picks_df[pick_event_col] == test_event_id) & \
               (test_picks_df[pick_station_col].astype(str).str.strip().isin(candidates))
        
        # Update measurements
        max_all_comps = float('-inf')
        for comp in ['Z', 'N', 'E']:
            comp_data = station_data[comp]
            for mtype in ['max_amp', 'min_amp', 'duration']:
                col_name = f" {comp}_{mtype} "
                test_picks_df.loc[mask, col_name] = comp_data[mtype]
            
            if not np.isnan(comp_data['max_amp']):
                max_all_comps = max(max_all_comps, comp_data['max_amp'])
        
        if max_all_comps != float('-inf'):
            test_picks_df.loc[mask, amplitude_col] = max_all_comps

print("Updated picks DataFrame")

In [None]:
# Display updated picks for the test event
updated_picks = test_picks_df[test_picks_df[pick_event_col] == test_event_id]
updated_picks_with_amp = updated_picks[updated_picks[amplitude_col].notna()]

if len(updated_picks_with_amp) > 0:
    print(f"\nUpdated picks with amplitudes ({len(updated_picks_with_amp)} rows):")
    cols_to_show = [pick_station_col, amplitude_col]
    cols_to_show.extend([f" {c}_{m} " for c in ['Z', 'N', 'E'] 
                         for m in ['max_amp', 'min_amp', 'duration']])
    display(updated_picks_with_amp[cols_to_show])
else:
    print("\nNo picks were updated with amplitudes")

## Test with Multiple Events (Optional)

Process a few more events to ensure the workflow is working correctly.

In [None]:
# Process first 3 events as a test
n_test_events = 3
test_event_ids = event_ids[:n_test_events]

print(f"Processing {n_test_events} events...\n")

for i, eid in enumerate(test_event_ids, start=1):
    print(f"[{i}/{n_test_events}] Processing event {eid}")
    
    _, measurements, origin = process_event(
        eid,
        events_df,
        test_picks_df,
        client,
        sample_rate=100,
        highpass_freq=4.0,
        window_before=30,
        window_after=120,
    )
    
    if measurements:
        print(f"  -> Got measurements for {len(measurements)} stations")
    else:
        print(f"  -> No measurements returned")

print("\nDone!")

## Summary

Check the overall results and data quality.

In [None]:
# Summary statistics
n_with_amp = test_picks_df[amplitude_col].notna().sum()
n_total = len(test_picks_df)

print(f"Total picks: {n_total}")
print(f"Picks with amplitude measurements: {n_with_amp} ({100*n_with_amp/n_total:.1f}%)")

if n_with_amp > 0:
    print(f"\nAmplitude statistics:")
    print(test_picks_df[amplitude_col].describe())