# Event Waveform Processing and Analysis

This notebook processes event waveforms from the Cascadia dataset:
1. Reads event and pick information from CSV files
2. Downloads waveforms from IRIS FDSN
3. Processes waveforms (resampling, filtering)
4. Calculates amplitudes for local magnitude estimation
5. Visualizes waveforms with picks

by Marine Denolle (mdenolle@uw.edu)

In [1]:
# Import required libraries
import obspy
from obspy.clients.fdsn.client import Client
from obspy import UTCDateTime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from obspy.signal.filter import highpass

In [2]:
import sys, subprocess

for cmd, action in [
    ([sys.executable, "-m", "pip", "uninstall", "-y", "obspy"], "Uninstalling"),
    ([sys.executable, "-m", "pip", "install", "--upgrade", "obspy"], "Installing latest")
]:
    print(f"{action} obspy...")
    r = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    print("OK" if r.returncode == 0 else "FAILED")

print("Done. Restart the kernel to use the new obspy.")


Uninstalling obspy...
OK
Installing latest obspy...
OK
Done. Restart the kernel to use the new obspy.


In [3]:
# Set processing parameters
sample_rate = 100  # Hz
highpass_freq = 4  # Hz
window_after = 120  # seconds after origin time
window_before = 30  # seconds before origin time

# Initialize FDSN client
client = Client("IRIS")

In [4]:
# Read event and pick data
events_df = pd.read_csv('../data/Cascadia_relocated_catalog_ver_3.csv')
picks_df = pd.read_csv('~/Downloads/Cascadia_relocated_catalog_picks_ver_3.csv')

print("Event data summary:")
print(f"Number of events: {len(events_df)}")
print("\nFirst few events:")
print(events_df.head())

print("\nPick data summary:")
print(f"Number of picks: {len(picks_df)}")
print("\nFirst few picks:")
print(picks_df.head())

Event data summary:
Number of events: 63887

First few events:
   Latitude   Longitude   Depth (km)             Origin Time (UTC)  \
0  47.22533  -122.16895       56.111   2010-01-01T00:15:17.262282Z   
1  48.19518  -121.77276        3.820   2010-01-01T00:16:49.375360Z   
2  47.86208  -122.09903       17.799   2010-01-01T07:18:03.689209Z   
3  47.96435  -122.91906       21.286   2010-01-01T08:51:56.371091Z   
4  45.87262  -122.19180        9.822   2010-01-01T16:12:43.838660Z   

    Uncertainity (km)   Horizontal Uncertainity (km)   Geometric Std. (km)  \
0              10.223                         10.216                 0.790   
1               7.560                          3.786                 0.140   
2               5.118                          4.807                 0.195   
3               1.899                          1.884                 0.287   
4               2.842                          2.838                 0.229   

    Detection Value   Num. P   Num. S   RMS Res

In [21]:
def process_event(event_id, events_df, picks_df):
    """
    Process single event and return waveforms and metadata
    
    Parameters:
    -----------
    event_id : str
        Event identifier
    events_df : pandas.DataFrame
        DataFrame containing event information
    picks_df : pandas.DataFrame
        DataFrame containing pick information
    
    Returns:
    --------
    st : obspy.Stream
        Processed waveforms
    amplitudes : dict
        Peak amplitudes for each station
    origin_time : UTCDateTime
        Event origin time
    """

    # window_before and window_after are defined globally
    global window_before, window_after
    # Get event information
    event_cols = [col for col in events_df.columns if 'event' in col.lower()][0]
    origin_cols = [col for col in events_df.columns if 'origin' in col.lower()][0]
    station_cols = [col for col in picks_df.columns if 'station' in col.lower()][0]
    event = events_df[events_df[event_cols] == event_id].iloc[0]
    origin_time = UTCDateTime(event[origin_cols])
    
    # Get associated picks
    event_picks = picks_df[picks_df[event_cols] == event_id]
    # Storage for waveforms and amplitudes
    st = obspy.Stream()
    station_amplitudes = {}
    
    # Process each station's data
    for _, pick in event_picks.iterrows():
        try:
            # Get network and station codes
            station,network = pick[station_cols].split('.')
            # remove any headers or trailing spaces
            station = station.strip()
            network = network.strip()
            # sta = client.get_stations(network=network, station=station,
                # starttime=origin_time - 7200,
                # endtime=origin_time + 7200,level="response")
            # Download waveforms
            st_temp = client.get_waveforms(
                network=network,
                station=station,
                location="*",
                channel="*H*",
                starttime=origin_time - window_before,
                endtime=origin_time + window_after
            )
            
            # Process each trace
            for tr in st_temp:
                # Resample
                tr.resample(sample_rate)
                
                # Apply highpass filter
                tr.filter('highpass', freq=highpass_freq)
                
                # Add to stream
                st += tr
                
            # Calculate amplitude (average of 3 components)
            station_amps = []
            for comp in ['Z', 'N', 'E']:
                tr = st_temp.select(component=comp)
                if len(tr) > 0:
                    station_amps.append(np.max(np.abs(tr[0].data)))
            
            if station_amps:
                station_amplitudes[network+'.'+station] = np.mean(station_amps)
            
        except Exception as e:
            print(f"Error processing {pick[network+'.'+station]}: {e}")
            continue
    
    return st, station_amplitudes, origin_time

In [22]:
event_id = events_df[' Event ID '].values.tolist()
st, amplitudes, origin_time = process_event(event_id[0], events_df, picks_df)

In [29]:
picks_df.keys()

Index(['Pick Time (UTC)', ' Station Name', ' Phase Type', ' Residual (s)',
       ' Event ID ', ' Pick ID '],
      dtype='object')

In [None]:
# for each event, process the waveforms and store amplitudes in a new column in picks_df
for eid in event_id:
    st, amplitudes, origin_time = process_event(eid, events_df, picks_df)
    # Store amplitudes in picks_df
    for station_key, amp in amplitudes.items():
        network, station = station_key.split('.')
        # find the row in picks_df based on any unique identifier in Station regardless of empty spaces
        station_col = picks_df[' Station Name'].str.strip()
        # find the row in picks_df based on Event ID and Station
        picks_df.loc[(picks_df[' Event ID '] == eid) & (station_col == station+"."+network), ' Amplitude '] = amp
    #print updated picks_df
    print(picks_df[picks_df[' Event ID '] == eid])

# save picks_df with amplitudes to a new csv file
picks_df.to_csv('../data/Cascadia_relocated_catalog_picks_with_amplitudes_ver_3.csv', index=False)