In [1]:
from obspy import read
from obspy import UTCDateTime
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import os
import numpy as np

In [2]:
# Load the pick data
pick_data = pd.read_csv(
    "dataset_earthquakes/metadata.csv",
    usecols=['trace_name_original_1', 'trace_p_pick_time', 'trace_s_pick_time', 'source_sensor_distance']
)

In [3]:
# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Directory containing miniSEED files
mseed_dir = "miniSEED_files"
output_dir = "output_snr_plots"
os.makedirs(output_dir, exist_ok=True)  # Create the output directory if it doesn't exist


In [4]:
# SNR calculation function
def calculate_snr(data, p_pick_index, pre_window, post_window):
    """
    Calculate SNR as the ratio of signal energy to noise energy.
    
    Args:
        data (numpy array): The trace data.
        p_pick_index (int): Index of the P-wave pick in the data array.
        pre_window (int): Window length (samples) for noise before the P pick.
        post_window (int): Window length (samples) for the signal after the P pick.
    
    Returns:
        float: Signal-to-Noise Ratio (SNR).
    """
    if p_pick_index - pre_window < 0 or p_pick_index + post_window >= len(data):
        return None  # Avoid out-of-bounds calculations

    noise = data[p_pick_index - pre_window:p_pick_index]  # Noise window before P-pick
    signal = data[p_pick_index:p_pick_index + post_window]  # Signal window after P-pick

    noise_energy = np.sum(noise ** 2)
    signal_energy = np.sum(signal ** 2)

    if noise_energy == 0:
        return None  # Avoid division by zero

    snr = 10 * np.log10(signal_energy / noise_energy)
    return snr

# Parameters for SNR
pre_window = 2  # Noise window before P-pick (in seconds)
post_window = 5  # Signal window after P-pick (in seconds)

# Loop through each miniSEED file in the directory
for file_path in Path(mseed_dir).glob("*.MSEED"):
    mseed_name = sanitize_filename(file_path.name.strip())  # Sanitize the miniSEED file name

    # Check if the miniSEED file matches an entry in the CSV
    matched_row = None
    for _, row in pick_data.iterrows():
        if sanitize_filename(row['trace_name_original_1']) == mseed_name:
            matched_row = row
            break

    if matched_row is None:
        print(f"No match found for file: {file_path.name}")
        continue

    print(f"Match found: {file_path.name}")

    # Extract P and S pick times and sensor distance
    try:
        p_pick = UTCDateTime(matched_row['trace_p_pick_time'])
        s_pick = UTCDateTime(matched_row['trace_s_pick_time'])
        source_sensor_distance = matched_row['source_sensor_distance']
    except Exception as e:
        print(f"Error parsing data for {file_path.name}: {e}")
        continue

    # Read the miniSEED file
    stream = read(file_path)

    # Process each trace in the miniSEED file
    for trace in stream:
        print(f"Processing trace: {trace.id}")

        # Get the sampling rate and calculate the index of the P-pick
        sampling_rate = trace.stats.sampling_rate
        p_pick_index = int((p_pick - trace.stats.starttime) * sampling_rate)
        pre_window_samples = int(pre_window * sampling_rate)
        post_window_samples = int(post_window * sampling_rate)

        # Calculate SNR
        snr = calculate_snr(trace.data, p_pick_index, pre_window_samples, post_window_samples)
        if snr is None:
            print(f"Skipping SNR calculation for trace {trace.id} due to windowing issues.")
            continue

        print(f"Calculated SNR for trace {trace.id}: {snr:.2f} dB")

        # Plot the waveform and SNR
        fig, ax1 = plt.subplots(figsize=(12, 6))

        # Plot the waveform
        times = trace.times("matplotlib")
        ax1.plot(times, trace.data, label=f"Waveform: {trace.id}", color="blue", alpha=0.6)
        ax1.set_xlabel("Time (UTC)")
        ax1.set_ylabel("Amplitude")
        ax1.set_title(f"Waveform and SNR: {trace.id}\nDistance: {source_sensor_distance} km, SNR: {snr:.2f} dB")
        ax1.legend(loc="upper left")
        ax1.grid()

        # Show P and S picks on the waveform
        ax1.axvline(p_pick.matplotlib_date, color="red", linestyle="--", label="P-wave Pick")
        ax1.axvline(s_pick.matplotlib_date, color="black", linestyle="--", label="S-wave Pick")
        ax1.legend()

        # Save the plot
        output_file = Path(output_dir) / f"{file_path.stem}_{trace.id}_snr.png"
        plt.savefig(output_file)
        plt.close()
        print(f"SNR plot saved to: {output_file}")

Match found: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN1.MSEED
Processing trace: WS.POZA.S5.DN1
Calculated SNR for trace WS.POZA.S5.DN1: 39.79 dB
SNR plot saved to: output_snr_plots\34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN1_WS.POZA.S5.DN1_snr.png
No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DNZ.MSEED
Match found: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN1.MSEED
Processing trace: WS.POZA.S3.DN1
Calculated SNR for trace WS.POZA.S3.DN1: 41.21 dB
SNR plot saved to: output_snr_plots\34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN1_WS.POZA.S3.DN1_snr.png
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DNZ.MSEED
Match found: 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN1.MSEED
Processing trace: WS.POZA.S2.DN1
Calculated SNR for trace WS.POZA.S