In [None]:
from obspy import read
from obspy import UTCDateTime
from obspy.signal.trigger import classic_sta_lta
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import os
import numpy as np

# Load the pick data
pick_data = pd.read_csv(
    "dataset_earthquakes/metadata.csv",
    usecols=['trace_name_original_1', 'trace_p_pick_time', 'trace_s_pick_time', 'source_sensor_distance']
)

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Directory containing miniSEED files
mseed_dir = "miniSEED_files"
output_dir = "output_sta_lta_snr_plots"
os.makedirs(output_dir, exist_ok=True)  # Create the output directory if it doesn't exist

# Define STA/LTA parameters
sta_window = 1  # Short-Term Average window (seconds)
lta_window = 10  # Long-Term Average window (seconds)

# Sliding SNR Calculation Function
def sliding_snr(trace, window_length=5, overlap=0.5):
    """
    Calculate SNR as a sliding window ratio of signal to noise.
    """
    sampling_rate = trace.stats.sampling_rate
    window_samples = int(window_length * sampling_rate)
    step_samples = int(window_samples * (1 - overlap))

    snr_values = []
    times = []

    for start_idx in range(0, len(trace.data) - window_samples, step_samples):
        # Extract the window
        noise_window = trace.data[start_idx:start_idx + window_samples // 2]
        signal_window = trace.data[start_idx + window_samples // 2:start_idx + window_samples]

        # Calculate RMS values
        noise_rms = np.sqrt(np.mean(noise_window**2))
        signal_rms = np.sqrt(np.mean(signal_window**2))
        snr = signal_rms / noise_rms if noise_rms > 0 else 0

        snr_values.append(snr)
        times.append(trace.stats.starttime + (start_idx / sampling_rate))

    return times, snr_values

# Loop through each miniSEED file in the directory
for file_path in Path(mseed_dir).glob("*.MSEED"):
    mseed_name = sanitize_filename(file_path.name.strip())  # Sanitize the miniSEED file name

    # Check if the miniSEED file matches an entry in the CSV
    matched_row = None
    for _, row in pick_data.iterrows():
        if sanitize_filename(row['trace_name_original_1']) == mseed_name:
            matched_row = row
            break

    if matched_row is None:
        print(f"No match found for file: {file_path.name}")
        continue

    print(f"Match found: {file_path.name}")

    # Extract P and S pick times and sensor distance
    try:
        p_pick = UTCDateTime(matched_row['trace_p_pick_time'])
        s_pick = UTCDateTime(matched_row['trace_s_pick_time'])
        source_sensor_distance = matched_row['source_sensor_distance']
    except Exception as e:
        print(f"Error parsing data for {file_path.name}: {e}")
        continue

    # Read the miniSEED file
    stream = read(file_path)

    # Process each trace in the miniSEED file
    for trace in stream:
        print(f"Processing trace: {trace.id}")

        # Get the sampling rate of the trace
        sampling_rate = trace.stats.sampling_rate

        # Calculate STA/LTA
        try:
            cft = classic_sta_lta(trace.data, int(sta_window * sampling_rate), int(lta_window * sampling_rate))
        except Exception as e:
            print(f"Error computing STA/LTA for trace {trace.id}: {e}")
            continue

        # Calculate SNR as a graph
        snr_times, snr_values = sliding_snr(trace, window_length=5, overlap=0.5)

        # Plot the waveform, STA/LTA ratio, and SNR
        fig, ax1 = plt.subplots(figsize=(12, 6))

        # Plot the waveform
        times = trace.times("matplotlib")
        ax1.plot(times, trace.data, label=f"Waveform: {trace.id}", color="blue", alpha=0.6)
        ax1.set_xlabel("Time (UTC)")
        ax1.set_ylabel("Amplitude")
        ax1.set_title(f"Waveform, STA/LTA, and SNR: {trace.id}\nDistance: {source_sensor_distance} km")
        ax1.legend(loc="upper left")
        ax1.grid()

        # Plot the STA/LTA ratio
        ax2 = ax1.twinx()
        ax2.plot(times[:len(cft)], cft, label="STA/LTA Ratio", color="orange", alpha=0.8)
        ax2.set_ylabel("STA/LTA Ratio")
        ax2.legend(loc="upper right")

        # Plot the SNR as a graph
        snr_times_matplotlib = [t.matplotlib_date for t in snr_times]
        ax2.plot(snr_times_matplotlib, snr_values, label="SNR", color="green", alpha=0.8)

        # Show P and S picks on the waveform
        ax1.axvline(p_pick.matplotlib_date, color="red", linestyle="--", label="P-wave Pick")
        ax1.axvline(s_pick.matplotlib_date, color="black", linestyle="--", label="S-wave Pick")

        # Save the plot
        output_file = Path(output_dir) / f"{file_path.stem}_{trace.id}_sta_lta_snr_graph.png"
        plt.savefig(output_file)
        plt.close()
        print(f"STA/LTA and SNR graph plot saved to: {output_file}")


In [None]:
# this is the code for the multiple snr windows like 2,5 seconds 5,5 seconds


from obspy import read
from obspy import UTCDateTime
from obspy.signal.trigger import classic_sta_lta
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import os
import numpy as np

# Load the pick data
pick_data = pd.read_csv(
    "dataset_earthquakes/metadata.csv",
    usecols=['trace_name_original_1', 'trace_p_pick_time', 'trace_s_pick_time', 'source_sensor_distance']
)

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Directory containing miniSEED files
mseed_dir = "miniSEED_files"
output_dir = "output_sta_lta_snr(multiple)_plots"
os.makedirs(output_dir, exist_ok=True)  # Create the output directory if it doesn't exist

# Define STA/LTA parameters
sta_window = 1  # Short-Term Average window (seconds)
lta_window = 10  # Long-Term Average window (seconds)

# SNR Calculation Function for specific windows
def calculate_snr(trace, pre_window, post_window):
    sampling_rate = trace.stats.sampling_rate
    pre_samples = int(pre_window * sampling_rate)
    post_samples = int(post_window * sampling_rate)

    snr_values = []
    times = []

    for i in range(pre_samples, len(trace.data) - post_samples):
        noise_window = trace.data[i - pre_samples:i]
        signal_window = trace.data[i:i + post_samples]

        # Calculate RMS values
        noise_rms = np.sqrt(np.mean(noise_window**2))
        signal_rms = np.sqrt(np.mean(signal_window**2))
        snr = signal_rms / noise_rms if noise_rms > 0 else 0

        snr_values.append(snr)
        times.append(trace.stats.starttime + i / sampling_rate)

    return times, snr_values

# Loop through each miniSEED file in the directory
for file_path in Path(mseed_dir).glob("*.MSEED"):
    mseed_name = sanitize_filename(file_path.name.strip())  # Sanitize the miniSEED file name

    # Check if the miniSEED file matches an entry in the CSV
    matched_row = None
    for _, row in pick_data.iterrows():
        if sanitize_filename(row['trace_name_original_1']) == mseed_name:
            matched_row = row
            break

    if matched_row is None:
        print(f"No match found for file: {file_path.name}")
        continue

    print(f"Match found: {file_path.name}")

    # Extract P and S pick times and sensor distance
    try:
        p_pick = UTCDateTime(matched_row['trace_p_pick_time'])
        s_pick = UTCDateTime(matched_row['trace_s_pick_time'])
        source_sensor_distance = matched_row['source_sensor_distance']
    except Exception as e:
        print(f"Error parsing data for {file_path.name}: {e}")
        continue

    # Read the miniSEED file
    stream = read(file_path)

    # Process each trace in the miniSEED file
    for trace in stream:
        print(f"Processing trace: {trace.id}")

        # Get the sampling rate of the trace
        sampling_rate = trace.stats.sampling_rate

        # Calculate STA/LTA
        try:
            cft = classic_sta_lta(trace.data, int(sta_window * sampling_rate), int(lta_window * sampling_rate))
        except Exception as e:
            print(f"Error computing STA/LTA for trace {trace.id}: {e}")
            continue

        # Calculate SNR for two different windows
        snr_times_2_5, snr_values_2_5 = calculate_snr(trace, pre_window=2, post_window=2)
        snr_times_5_5, snr_values_5_5 = calculate_snr(trace, pre_window=5, post_window=5)

        # Plot the waveform, STA/LTA ratio, and SNR
        fig, ax1 = plt.subplots(figsize=(12, 6))

        # Plot the waveform
        times = trace.times("matplotlib")
        ax1.plot(times, trace.data, label=f"Waveform: {trace.id}", color="blue", alpha=0.6)
        ax1.set_xlabel("Time (UTC)")
        ax1.set_ylabel("Amplitude")
        ax1.set_title(f"Waveform, STA/LTA, and SNR: {trace.id}\nDistance: {source_sensor_distance} km")
        ax1.legend(loc="upper left")
        ax1.grid()

        # Plot the STA/LTA ratio
        ax2 = ax1.twinx()
        ax2.plot(times[:len(cft)], cft, label="STA/LTA Ratio", color="orange", alpha=0.8)
        ax2.set_ylabel("STA/LTA Ratio")
        ax2.legend(loc="upper right")

        # Plot SNR for (2, 5) window
        snr_times_2_5_matplotlib = [t.matplotlib_date for t in snr_times_2_5]
        ax2.plot(snr_times_2_5_matplotlib, snr_values_2_5, label="SNR (2s, 5s)", color="green", alpha=0.8)

        # Plot SNR for (5, 5) window
        snr_times_5_5_matplotlib = [t.matplotlib_date for t in snr_times_5_5]
        ax2.plot(snr_times_5_5_matplotlib, snr_values_5_5, label="SNR (5s, 5s)", color="purple", alpha=0.8)

        # Show P and S picks on the waveform
        ax1.axvline(p_pick.matplotlib_date, color="red", linestyle="--", label="P-wave Pick")
        ax1.axvline(s_pick.matplotlib_date, color="black", linestyle="--", label="S-wave Pick")

        # Save the plot
        output_file = Path(output_dir) / f"{file_path.stem}_{trace.id}_sta_lta_snr(multiple)_graph.png"
        plt.savefig(output_file)
        plt.close()
        print(f"STA/LTA and SNR graph plot saved to: {output_file}")


Match found: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN1.MSEED
Processing trace: WS.POZA.S5.DN1
STA/LTA and SNR graph plot saved to: output_sta_lta_snr(multiple)_plots\34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN1_WS.POZA.S5.DN1_sta_lta_snr(multiple)_graph.png
No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DNZ.MSEED
Match found: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN1.MSEED
Processing trace: WS.POZA.S3.DN1
STA/LTA and SNR graph plot saved to: output_sta_lta_snr(multiple)_plots\34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN1_WS.POZA.S3.DN1_sta_lta_snr(multiple)_graph.png
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DNZ.MSEED
Match found: 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN1.MSEED
Processing trace: WS.POZA.S2.DN1
STA/LTA and SN

KeyboardInterrupt: 

In [6]:
# this is thee code for thge 2,2 seconds window and 5,5 seconds window


from obspy import read
from obspy import UTCDateTime
from obspy.signal.trigger import classic_sta_lta
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import os
import numpy as np

# Load the pick data
pick_data = pd.read_csv(
    "dataset_earthquakes/metadata.csv",
    usecols=['trace_name_original_Z', 'trace_p_pick_time', 'trace_s_pick_time', 'source_sensor_distance']
)

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Directory containing miniSEED files
mseed_dir = "miniSEED_files"
output_dir = "output_sta_lta_snr_plots(multiple)"
os.makedirs(output_dir, exist_ok=True)  # Create the output directory if it doesn't exist

# Define STA/LTA parameters
sta_window = 1  # Short-Term Average window (seconds)
lta_window = 10  # Long-Term Average window (seconds)

# SNR Calculation Function
def calculate_snr(trace, pre_window, post_window):
    """
    Calculate SNR for each point using pre-window (noise) and post-window (signal).
    """
    sampling_rate = trace.stats.sampling_rate
    pre_samples = int(pre_window * sampling_rate)
    post_samples = int(post_window * sampling_rate)

    snr_values = []
    times = []

    for i in range(pre_samples, len(trace.data) - post_samples):
        noise_window = trace.data[i - pre_samples:i]
        signal_window = trace.data[i:i + post_samples]

        # Calculate RMS values
        noise_rms = np.sqrt(np.mean(noise_window**2)) if len(noise_window) > 0 else 0
        signal_rms = np.sqrt(np.mean(signal_window**2)) if len(signal_window) > 0 else 0

        # Calculate SNR
        snr = signal_rms / noise_rms if noise_rms > 0 else 0
        snr_values.append(snr)
        times.append(trace.stats.starttime + (i / sampling_rate))

    return times, snr_values

# Loop through each miniSEED file in the directory
for file_path in Path(mseed_dir).glob("*.MSEED"):
    mseed_name = sanitize_filename(file_path.name.strip())  # Sanitize the miniSEED file name

    # Check if the miniSEED file matches an entry in the CSV
    matched_row = None
    for _, row in pick_data.iterrows():
        if sanitize_filename(row['trace_name_original_Z']) == mseed_name:
            matched_row = row
            break

    if matched_row is None:
        print(f"No match found for file: {file_path.name}")
        continue

    print(f"Match found: {file_path.name}")

    # Extract P and S pick times and sensor distance
    try:
        p_pick = UTCDateTime(matched_row['trace_p_pick_time'])
        s_pick = UTCDateTime(matched_row['trace_s_pick_time'])
        source_sensor_distance = matched_row['source_sensor_distance']
    except Exception as e:
        print(f"Error parsing data for {file_path.name}: {e}")
        continue

    # Read the miniSEED file
    stream = read(file_path)

    # Process each trace in the miniSEED file
    for trace in stream:
        print(f"Processing trace: {trace.id}")

        # Get the sampling rate of the trace
        sampling_rate = trace.stats.sampling_rate

        # Calculate STA/LTA
        try:
            cft = classic_sta_lta(trace.data, int(sta_window * sampling_rate), int(lta_window * sampling_rate))
        except Exception as e:
            print(f"Error computing STA/LTA for trace {trace.id}: {e}")
            continue

        # Calculate SNR for both window configurations
        snr_times_2_2, snr_values_2_2 = calculate_snr(trace, pre_window=2, post_window=2)
        snr_times_5_5, snr_values_5_5 = calculate_snr(trace, pre_window=5, post_window=5)

        # Plot the waveform, STA/LTA ratio, and SNR
        fig, ax1 = plt.subplots(figsize=(12, 6))

        # Plot the waveform
        times = trace.times("matplotlib")
        ax1.plot(times, trace.data, label=f"Waveform: {trace.id}", color="blue", alpha=0.6)
        ax1.set_xlabel("Time (UTC)")
        ax1.set_ylabel("Amplitude")
        ax1.set_title(f"Waveform, STA/LTA, and SNR: {trace.id}\nDistance: {source_sensor_distance} km")
        ax1.legend(loc="upper left")
        ax1.grid()

        # Plot the STA/LTA ratio
        ax2 = ax1.twinx()
        ax2.plot(times[:len(cft)], cft, label="STA/LTA Ratio", color="orange", alpha=0.8)
        ax2.set_ylabel("STA/LTA Ratio")
        ax2.legend(loc="upper right")

        # Plot the SNR as a graph for both configurations
        snr_times_2_2_matplotlib = [t.matplotlib_date for t in snr_times_2_2]
        snr_times_5_5_matplotlib = [t.matplotlib_date for t in snr_times_5_5]

        ax2.plot(snr_times_2_2_matplotlib, snr_values_2_2, label="SNR (2s, 2s)", color="green", alpha=0.8)
        ax2.plot(snr_times_5_5_matplotlib, snr_values_5_5, label="SNR (5s, 5s)", color="purple", alpha=0.8)

        # Show P and S picks on the waveform
        ax1.axvline(p_pick.matplotlib_date, color="red", linestyle="--", label="P-wave Pick")
        ax1.axvline(s_pick.matplotlib_date, color="black", linestyle="--", label="S-wave Pick")

        # Save the plot
        output_file = Path(output_dir) / f"{file_path.stem}_{trace.id}_sta_lta_snr_graph(multiple).png"
        plt.savefig(output_file)
        plt.close()
        print(f"STA/LTA and SNR graph plot saved to: {output_file}")


No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN1.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN2.MSEED
Match found: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DNZ.MSEED
Processing trace: WS.POZA.S5.DNZ
STA/LTA and SNR graph plot saved to: output_sta_lta_snr_plots(multiple)\34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DNZ_WS.POZA.S5.DNZ_sta_lta_snr_graph(multiple).png
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN1.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN2.MSEED
Match found: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DNZ.MSEED
Processing trace: WS.POZA.S3.DNZ
STA/LTA and SNR graph plot saved to: output_sta_lta_snr_plots(multiple)\34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DNZ_WS.POZA.S3.DNZ_sta_lta_snr_graph(multiple).png
No match found for file: 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN1.MSEED
No match found for file: 34161341_2

In [2]:
from obspy import read
from obspy import UTCDateTime
from obspy.signal.trigger import classic_sta_lta
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import os
import numpy as np

# Load the pick data
pick_data = pd.read_csv(
    "dataset_earthquakes/metadata.csv",
    usecols=['trace_name_original_1', 'trace_p_pick_time', 'trace_s_pick_time', 'source_sensor_distance']
)

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Directory containing miniSEED files
mseed_dir = "miniSEED_files"
output_dir = "output_mer_plots"
os.makedirs(output_dir, exist_ok=True)  # Create the output directory if it doesn't exist

# Define STA/LTA parameters
sta_window = 1  # Short-Term Average window (seconds)
lta_window = 10  # Long-Term Average window (seconds)

# Modified Energy Ratio Calculation Function
def calculate_mer(trace, pick_time, pre_window, post_window):
    """
    Calculate the Modified Energy Ratio (MER) for a given trace and pick time.

    Parameters:
        trace (obspy.Trace): The trace containing the waveform data.
        pick_time (UTCDateTime): The pick time to center the calculation around.
        pre_window (float): Duration of the pre-noise window in seconds.
        post_window (float): Duration of the post-signal window in seconds.

    Returns:
        float: The Modified Energy Ratio (MER).
    """
    sampling_rate = trace.stats.sampling_rate
    pre_samples = int(pre_window * sampling_rate)
    post_samples = int(post_window * sampling_rate)

    # Calculate start and end indices for noise and signal windows
    pick_idx = int((pick_time - trace.stats.starttime) * sampling_rate)
    noise_window = trace.data[max(0, pick_idx - pre_samples):pick_idx]
    signal_window = trace.data[pick_idx:min(len(trace.data), pick_idx + post_samples)]

    # Calculate energy in the windows
    noise_energy = np.sum(noise_window**2)
    signal_energy = np.sum(signal_window**2)

    return signal_energy / noise_energy if noise_energy > 0 else 0

# Loop through each miniSEED file in the directory
for file_path in Path(mseed_dir).glob("*.MSEED"):
    mseed_name = sanitize_filename(file_path.name.strip())  # Sanitize the miniSEED file name

    # Check if the miniSEED file matches an entry in the CSV
    matched_row = None
    for _, row in pick_data.iterrows():
        if sanitize_filename(row['trace_name_original_1']) == mseed_name:
            matched_row = row
            break

    if matched_row is None:
        print(f"No match found for file: {file_path.name}")
        continue

    print(f"Match found: {file_path.name}")

    # Extract P and S pick times and sensor distance
    try:
        p_pick = UTCDateTime(matched_row['trace_p_pick_time'])
        s_pick = UTCDateTime(matched_row['trace_s_pick_time'])
        source_sensor_distance = matched_row['source_sensor_distance']
    except Exception as e:
        print(f"Error parsing data for {file_path.name}: {e}")
        continue

    # Read the miniSEED file
    stream = read(file_path)

    # Process each trace in the miniSEED file
    for trace in stream:
        print(f"Processing trace: {trace.id}")

        # Get the sampling rate of the trace
        sampling_rate = trace.stats.sampling_rate

        # Calculate STA/LTA
        try:
            cft = classic_sta_lta(trace.data, int(sta_window * sampling_rate), int(lta_window * sampling_rate))
        except Exception as e:
            print(f"Error computing STA/LTA for trace {trace.id}: {e}")
            continue

        # Calculate MER for different window configurations
        mer_2_5 = calculate_mer(trace, p_pick, pre_window=2, post_window=5)
        mer_5_5 = calculate_mer(trace, p_pick, pre_window=5, post_window=5)

        # Plot the waveform, STA/LTA, and MER
        fig, ax1 = plt.subplots(figsize=(12, 6))

        # Plot the waveform
        times = trace.times("matplotlib")
        ax1.plot(times, trace.data, label=f"Waveform: {trace.id}", color="blue", alpha=0.6)
        ax1.set_xlabel("Time (UTC)")
        ax1.set_ylabel("Amplitude")
        ax1.set_title(f"Waveform, STA/LTA, and MER: {trace.id}\nDistance: {source_sensor_distance} km")
        ax1.legend(loc="upper left")
        ax1.grid()

        # Plot the STA/LTA ratio
        ax2 = ax1.twinx()
        ax2.plot(times[:len(cft)], cft, label="STA/LTA Ratio", color="orange", alpha=0.8)
        ax2.set_ylabel("STA/LTA Ratio")
        ax2.legend(loc="upper right")

        # Add MER values as annotations
        ax1.axhline(mer_2_5, color="green", linestyle="--", label="MER (2s Pre, 5s Post)", alpha=0.8)
        ax1.axhline(mer_5_5, color="red", linestyle="--", label="MER (5s Pre, 5s Post)", alpha=0.8)

        # Show P and S picks on the waveform
        ax1.axvline(p_pick.matplotlib_date, color="red", linestyle="--", label="P-wave Pick")
        ax1.axvline(s_pick.matplotlib_date, color="black", linestyle="--", label="S-wave Pick")

        # Save the plot
        output_file = Path(output_dir) / f"{file_path.stem}_{trace.id}_mer_plot.png"
        plt.savefig(output_file)
        plt.close()
        print(f"MER plot saved to: {output_file}")


Match found: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN1.MSEED
Processing trace: WS.POZA.S5.DN1
MER plot saved to: output_mer_plots\34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN1_WS.POZA.S5.DN1_mer_plot.png
No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DNZ.MSEED
Match found: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN1.MSEED
Processing trace: WS.POZA.S3.DN1
MER plot saved to: output_mer_plots\34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN1_WS.POZA.S3.DN1_mer_plot.png
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DNZ.MSEED
Match found: 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN1.MSEED
Processing trace: WS.POZA.S2.DN1
MER plot saved to: output_mer_plots\34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN1_WS.POZA.S2.DN1_mer_plot.png
No match

In [8]:
#code for the checking of wheather the plot was matc hing or not 




from obspy import read
from obspy import UTCDateTime
from obspy.signal.trigger import classic_sta_lta
import pandas as pd
from pathlib import Path
import os
import numpy as np

# Load the pick data
pick_data = pd.read_csv(
    "dataset_earthquakes/metadata.csv",
    usecols=['trace_name_original_1', 'trace_p_pick_time', 'trace_s_pick_time', 'source_sensor_distance']
)

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Directory containing miniSEED files
mseed_dir = "miniSEED_files"

# Define SNR Calculation Function
def calculate_snr(trace, pre_window, post_window):
    """
    Calculate SNR for each point using pre-window (noise) and post-window (signal).
    """
    sampling_rate = trace.stats.sampling_rate
    pre_samples = int(pre_window * sampling_rate)
    post_samples = int(post_window * sampling_rate)

    snr_values = []
    times = []

    for i in range(pre_samples, len(trace.data) - post_samples):
        noise_window = trace.data[i - pre_samples:i]
        signal_window = trace.data[i:i + post_samples]

        # Calculate RMS values
        noise_rms = np.sqrt(np.mean(noise_window**2)) if len(noise_window) > 0 else 0
        signal_rms = np.sqrt(np.mean(signal_window**2)) if len(signal_window) > 0 else 0

        # Calculate SNR
        snr = signal_rms / noise_rms if noise_rms > 0 else 0
        snr_values.append(snr)
        times.append(trace.stats.starttime + (i / sampling_rate))

    return times, snr_values

# Parameters
pre_window = 2  # Pre-window for SNR calculation (seconds)
post_window = 2  # Post-window for SNR calculation (seconds)
snr_threshold = 2  # Minimum SNR value to consider
time_tolerance = 1.0  # Tolerance in seconds for "near match" to P-pick time

# Initialize counters
total_files = 0.0
matching_files = 0

# Loop through each miniSEED file in the directory
for file_path in Path(mseed_dir).glob("*.MSEED"):
    mseed_name = sanitize_filename(file_path.name.strip())  # Sanitize the miniSEED file name

    # Check if the miniSEED file matches an entry in the CSV
    matched_row = None
    for _, row in pick_data.iterrows():
        if sanitize_filename(row['trace_name_original_1']) == mseed_name:
            matched_row = row
            break

    if matched_row is None:
        print(f"No match found for file: {file_path.name}")
        continue

    total_files += 1

    # Extract P-pick time
    try:
        p_pick = UTCDateTime(matched_row['trace_p_pick_time'])
    except Exception as e:
        print(f"Error parsing P-pick time for {file_path.name}: {e}")
        continue

    # Read the miniSEED file
    stream = read(file_path)

    # Process each trace in the miniSEED file
    for trace in stream:
        print(f"Processing trace: {trace.id}")

        # Calculate SNR for 2s,2s duration window
        try:
            snr_times, snr_values = calculate_snr(trace, pre_window, post_window)
        except Exception as e:
            print(f"Error calculating SNR for trace {trace.id}: {e}")
            continue

        # Find the peak SNR and its corresponding time
        peak_snr = max(snr_values)
        peak_time = snr_times[np.argmax(snr_values)]

        # Check if the peak SNR exceeds the threshold and is near the P-pick time
        if peak_snr >= snr_threshold and abs((peak_time - p_pick).total_seconds()) <= time_tolerance:
            matching_files += 1
            print(f"Match found for file: {file_path.name}, Trace: {trace.id}")
            break  # Only count the file once if one trace matches

print(f"\nTotal files processed: {total_files}")
print(f"Files with SNR peak near P-pick: {matching_files}")


Processing trace: WS.POZA.S5.DN1


AttributeError: 'float' object has no attribute 'total_seconds'

In [9]:
from obspy import read
from obspy import UTCDateTime
from obspy.signal.trigger import classic_sta_lta
import pandas as pd
from pathlib import Path
import os
import numpy as np

# Load the pick data
pick_data = pd.read_csv(
    "dataset_earthquakes/metadata.csv",
    usecols=['trace_name_original_1', 'trace_p_pick_time', 'trace_s_pick_time', 'source_sensor_distance']
)

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Directory containing miniSEED files
mseed_dir = "miniSEED_files"

# Define SNR Calculation Function
def calculate_snr(trace, pre_window, post_window):
    """
    Calculate SNR for each point using pre-window (noise) and post-window (signal).
    """
    sampling_rate = trace.stats.sampling_rate
    pre_samples = int(pre_window * sampling_rate)
    post_samples = int(post_window * sampling_rate)

    snr_values = []
    times = []

    for i in range(pre_samples, len(trace.data) - post_samples):
        noise_window = trace.data[i - pre_samples:i]
        signal_window = trace.data[i:i + post_samples]

        # Calculate RMS values
        noise_rms = np.sqrt(np.mean(noise_window**2)) if len(noise_window) > 0 else 0
        signal_rms = np.sqrt(np.mean(signal_window**2)) if len(signal_window) > 0 else 0

        # Calculate SNR
        snr = signal_rms / noise_rms if noise_rms > 0 else 0
        snr_values.append(snr)
        times.append(trace.stats.starttime + (i / sampling_rate))

    return times, snr_values

# Parameters
pre_window = 2  # Pre-window for SNR calculation (seconds)
post_window = 2  # Post-window for SNR calculation (seconds)
snr_threshold = 2  # Minimum SNR value to consider
time_tolerance = 1.0  # Tolerance in seconds for "near match" to P-pick time

# Initialize counters
total_files = 0
matching_files = 0

# Loop through each miniSEED file in the directory
for file_path in Path(mseed_dir).glob("*.MSEED"):
    mseed_name = sanitize_filename(file_path.name.strip())  # Sanitize the miniSEED file name

    # Check if the miniSEED file matches an entry in the CSV
    matched_row = None
    for _, row in pick_data.iterrows():
        if sanitize_filename(row['trace_name_original_1']) == mseed_name:
            matched_row = row
            break

    if matched_row is None:
        print(f"No match found for file: {file_path.name}")
        continue

    total_files += 1

    # Extract P-pick time
    try:
        p_pick = UTCDateTime(matched_row['trace_p_pick_time'])
    except Exception as e:
        print(f"Error parsing P-pick time for {file_path.name}: {e}")
        continue

    # Read the miniSEED file
    stream = read(file_path)

    # Process each trace in the miniSEED file
    for trace in stream:
        print(f"Processing trace: {trace.id}")

        # Calculate SNR for 2s,2s duration window
        try:
            snr_times, snr_values = calculate_snr(trace, pre_window, post_window)
        except Exception as e:
            print(f"Error calculating SNR for trace {trace.id}: {e}")
            continue

        # Find the peak SNR and its corresponding time
        peak_snr = max(snr_values)
        peak_time = snr_times[np.argmax(snr_values)]

        # Check if the peak SNR exceeds the threshold and is near the P-pick time
        if peak_snr >= snr_threshold:
            # Convert both times to UTCDateTime if not already
            if not isinstance(peak_time, UTCDateTime):

SyntaxError: incomplete input (3296768272.py, line 104)

In [13]:
#claude ai version code


from obspy import read
from obspy import UTCDateTime
import pandas as pd
from pathlib import Path
import numpy as np
from scipy.signal import find_peaks
import matplotlib.pyplot as plt

# Load the pick data
pick_data = pd.read_csv(
    "dataset_earthquakes/metadata.csv",
    usecols=['trace_name_original_1', 'trace_p_pick_time', 'trace_s_pick_time', 'source_sensor_distance']
)

def sanitize_filename(name):
    """Sanitize file names for consistent matching"""
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

def calculate_snr(trace, time_point, pre_window=2, post_window=2):
    """
    Calculate SNR for a specific time point using pre and post windows
    
    Args:
        trace: ObsPy trace object
        time_point: UTCDateTime of the point to calculate SNR around
        pre_window: seconds before time_point for noise window
        post_window: seconds after time_point for signal window
    
    Returns:
        float: SNR value
    """
    sampling_rate = trace.stats.sampling_rate
    
    # Convert time point to sample index
    point_index = int((time_point - trace.stats.starttime) * sampling_rate)
    
    # Calculate window indices
    noise_start = max(0, point_index - int(pre_window * sampling_rate))
    noise_end = point_index
    signal_start = point_index
    signal_end = min(len(trace.data), point_index + int(post_window * sampling_rate))
    
    # Extract windows
    noise_window = trace.data[noise_start:noise_end]
    signal_window = trace.data[signal_start:signal_end]
    
    # Calculate RMS values
    noise_rms = np.sqrt(np.mean(noise_window**2)) if len(noise_window) > 0 else 1e-10
    signal_rms = np.sqrt(np.mean(signal_window**2)) if len(signal_window) > 0 else 0
    
    return signal_rms / noise_rms

def calculate_snr_series(trace, pre_window=2, post_window=2):
    """
    Calculate SNR for entire trace using sliding windows
    
    Returns:
        times: array of timestamps
        snr_values: array of SNR values
    """
    sampling_rate = trace.stats.sampling_rate
    times = []
    snr_values = []
    
    for i in range(len(trace.data)):
        current_time = trace.stats.starttime + i/sampling_rate
        snr = calculate_snr(trace, current_time, pre_window, post_window)
        times.append(current_time)
        snr_values.append(snr)
    
    return np.array(times), np.array(snr_values)

def find_snr_peaks(times, snr_values, min_snr=2.0, min_distance_samples=100):
    """Find peaks in SNR values above threshold"""
    peaks, _ = find_peaks(snr_values, height=min_snr, distance=min_distance_samples)
    return times[peaks], snr_values[peaks]

def validate_p_picks(trace, p_pick_time, time_tolerance=1.0, snr_threshold=2.0):
    """
    Validate P-picks by comparing with SNR peaks
    
    Returns:
        bool: Whether P-pick is valid
        float: SNR at P-pick time
        UTCDateTime: Nearest SNR peak time
        float: SNR at peak
    """
    # Calculate SNR series
    times, snr_values = calculate_snr_series(trace)
    
    # Find SNR peaks
    peak_times, peak_snrs = find_snr_peaks(times, snr_values, snr_threshold)
    
    # Calculate SNR at P-pick time
    p_pick_snr = calculate_snr(trace, p_pick_time)
    
    # Find nearest peak to P-pick
    if len(peak_times) > 0:
        time_diffs = np.abs([t.timestamp - p_pick_time.timestamp for t in peak_times])
        nearest_peak_idx = np.argmin(time_diffs)
        nearest_peak_time = peak_times[nearest_peak_idx]
        nearest_peak_snr = peak_snrs[nearest_peak_idx]
        
        # Check if within tolerance
        is_valid = time_diffs[nearest_peak_idx] <= time_tolerance
    else:
        is_valid = False
        nearest_peak_time = None
        nearest_peak_snr = None
    
    return is_valid, p_pick_snr, nearest_peak_time, nearest_peak_snr

def plot_validation_results(trace, p_pick_time, times, snr_values, peak_times, peak_snrs):
    """Plot trace data, SNR values, and validation results"""
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
    
    # Plot trace data
    trace_times = np.arange(len(trace.data)) / trace.stats.sampling_rate
    ax1.plot(trace_times, trace.data, 'b-', label='Trace Data')
    ax1.axvline(p_pick_time - trace.stats.starttime, color='r', linestyle='--', label='P-Pick')
    ax1.set_title('Seismic Trace')
    ax1.legend()
    
    # Plot SNR values
    ax2.plot(times - trace.stats.starttime, snr_values, 'g-', label='SNR')
    ax2.scatter(peak_times - trace.stats.starttime, peak_snrs, color='r', label='SNR Peaks')
    ax2.axvline(p_pick_time - trace.stats.starttime, color='r', linestyle='--', label='P-Pick')
    ax2.axhline(2.0, color='k', linestyle=':', label='SNR Threshold')
    ax2.set_title('Signal-to-Noise Ratio')
    ax2.legend()
    
    plt.tight_layout()
    return fig

# Main processing loop
mseed_dir = "miniSEED_files"
results = []

for file_path in Path(mseed_dir).glob("*.MSEED"):
    mseed_name = sanitize_filename(file_path.name)
    
    # Find matching metadata
    matched_row = pick_data[pick_data['trace_name_original_1'].apply(sanitize_filename) == mseed_name].iloc[0]
    
    if matched_row is None:
        print(f"No match found for file: {file_path.name}")
        continue
    
    try:
        p_pick = UTCDateTime(matched_row['trace_p_pick_time'])
        stream = read(file_path)
        
        for trace in stream:
            print(f"Processing trace: {trace.id}")
            
            # Validate P-pick
            is_valid, p_pick_snr, nearest_peak_time, nearest_peak_snr = validate_p_picks(trace, p_pick)
            
            # Calculate SNR series for plotting
            times, snr_values = calculate_snr_series(trace)
            peak_times, peak_snrs = find_snr_peaks(times, snr_values)
            
            # Plot results
            fig = plot_validation_results(trace, p_pick, times, snr_values, peak_times, peak_snrs)
            plt.savefig(f"validation_{trace.id}.png")
            plt.close()
            
            # Store results
            results.append({
                'trace_id': trace.id,
                'is_valid': is_valid,
                'p_pick_snr': p_pick_snr,
                'nearest_peak_time': nearest_peak_time,
                'nearest_peak_snr': nearest_peak_snr,
                'time_difference': None if nearest_peak_time is None else abs(nearest_peak_time - p_pick)
            })
            
    except Exception as e:
        print(f"Error processing {file_path.name}: {e}")

# Create summary DataFrame
results_df = pd.DataFrame(results)
print("\nValidation Results Summary:")
print(f"Total traces processed: {len(results_df)}")
print(f"Valid P-picks: {results_df['is_valid'].sum()}")
print(f"Average P-pick SNR: {results_df['p_pick_snr'].mean():.2f}")

Processing trace: WS.POZA.S5.DN1


IndexError: single positional indexer is out-of-bounds

In [12]:
#gemini version of the code 
from obspy import read
from obspy import UTCDateTime
from obspy.signal.trigger import classic_sta_lta
import pandas as pd
from pathlib import Path
import os
import numpy as np

# Load the pick data
pick_data = pd.read_csv(
    "dataset_earthquakes/metadata.csv",
    usecols=['trace_name_original_1', 'trace_p_pick_time', 'trace_s_pick_time', 'source_sensor_distance']
)

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Directory containing miniSEED files
mseed_dir = "miniSEED_files"

# Define SNR Calculation Function
def calculate_snr(trace, pre_window, post_window, p_pick_time, time_tolerance):
    """
    Calculate SNR for each point considering P-pick time and tolerance.
    """
    sampling_rate = trace.stats.sampling_rate

    # Convert pre-window and post-window to samples
    pre_samples = int(pre_window * sampling_rate)
    post_samples = int(post_window * sampling_rate)

    snr_values = []
    times = []

    # Adjust window based on P-pick time and tolerance
    start_time = p_pick_time - time_tolerance
    end_time = p_pick_time + time_tolerance

    # Ensure window stays within trace bounds
    start_index = max(0, int((start_time - trace.stats.starttime) * sampling_rate))
    end_index = min(len(trace.data), int((end_time - trace.stats.starttime) * sampling_rate))

    # Calculate SNR within the adjusted window
    for i in range(start_index, end_index - post_samples):
        noise_window = trace.data[i: i + pre_samples]
        signal_window = trace.data[i + pre_samples: i + pre_samples + post_samples]

        # Calculate RMS values
        noise_rms = np.sqrt(np.mean(noise_window**2)) if len(noise_window) > 0 else 0
        signal_rms = np.sqrt(np.mean(signal_window**2)) if len(signal_window) > 0 else 0

        # Calculate SNR
        snr = signal_rms / noise_rms if noise_rms > 0 else 0
        snr_values.append(snr)
        times.append(trace.stats.starttime + (i / sampling_rate))

    return times, snr_values

# Parameters
pre_window = 2  # Pre-window for SNR calculation (seconds)
post_window = 2  # Post-window for SNR calculation (seconds)
snr_threshold = 1.3  # Minimum SNR value to consider
time_tolerance = 1.0  # Tolerance in seconds for "near match" to P-pick time

# Initialize counters
total_files = 0
matching_files = 0

# Loop through each miniSEED file in the directory
for file_path in Path(mseed_dir).glob("*.MSEED"):
    mseed_name = sanitize_filename(file_path.name.strip())  # Sanitize the miniSEED file name

    # Check if the miniSEED file matches an entry in the CSV
    matched_row = None
    for _, row in pick_data.iterrows():
        if sanitize_filename(row['trace_name_original_1']) == mseed_name:
            matched_row = row
            break

    if matched_row is None:
        print(f"No match found for file: {file_path.name}")
        continue

    total_files += 1

    # Extract P-pick time
    try:
        p_pick = UTCDateTime(matched_row['trace_p_pick_time'])
    except Exception as e:
        print(f"Error parsing P-pick time for {file_path.name}: {e}")
        continue

    # Read the miniSEED file
    stream = read(file_path)

    # Process each trace in the miniSEED file
    for trace in stream:
        print(f"Processing trace: {trace.id}")

        # Calculate SNR with adjusted window
        times, snr_values = calculate_snr(trace, pre_window, post_window, p_pick, time_tolerance)

        # Further processing or analysis with SNR values and times
        # ... (e.g., plot SNR, identify peaks, etc.)

        # Check if SNR exceeds threshold
        if any(snr > snr_threshold for snr in snr_values):
            matching_files += 1
            print(f"Trace {trace.id} meets SNR threshold.")

print(f"Total files processed: {total_files}")
print(f"Files meeting SNR threshold: {matching_files}")

Processing trace: WS.POZA.S5.DN1
No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DNZ.MSEED
Processing trace: WS.POZA.S3.DN1
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DNZ.MSEED
Processing trace: WS.POZA.S2.DN1
No match found for file: 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DNZ.MSEED
Processing trace: WS.POZA.S4.DN1
No match found for file: 34161341_2023-02-21T00.07.00.490708Z_WS.POZA.S4.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.490708Z_WS.POZA.S4.DNZ.MSEED
Processing trace: WS.POZA.S6.DN1
No match found for file: 34161341_2023-02-21T00.07.00.493150Z_WS.POZA.S6.DN2.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.493150Z_WS.POZA.S6.DNZ.MSEED
Error