In [1]:
from obspy import read, UTCDateTime
import pandas as pd
import numpy as np
from pathlib import Path
from scipy.stats import mode, skew, kurtosis, zscore

# Load pick data
pick_data = pd.read_csv(
    "dataset_earthquakes/metadata.csv", 
    usecols=['trace_name_original_Z', 'trace_p_pick_time']
)

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Function to convert ADX355 counts to acceleration
def convert_adx355_counts_to_acceleration(counts, scale_factor=256000):
    acceleration_g = counts / scale_factor  # Convert counts to g
    acceleration_m_s2 = acceleration_g * 9.81  # Convert g to m/s²
    return acceleration_g, acceleration_m_s2

# Directories
mseed_dir = "miniSEED_files"
output_csv = "acceleration_stats_250_samples.csv"

# Number of samples per window
num_samples = 250  # Adjusted to 250 samples per window

# Store results
results = []

# Loop through miniSEED files
for file_path in Path(mseed_dir).glob("*.MSEED"):
    mseed_name = sanitize_filename(file_path.name.strip())

    # Find matching metadata row
    matched_row = None
    for _, row in pick_data.iterrows():
        if sanitize_filename(row['trace_name_original_Z']) == mseed_name:
            matched_row = row
            break

    if matched_row is None:
        print(f"No match found for file: {file_path.name}")
        continue

    print(f"Processing: {file_path.name}")

    # Extract P-pick time
    try:
        p_pick = UTCDateTime(matched_row['trace_p_pick_time'])
    except Exception as e:
        print(f"Error parsing P-pick time for {file_path.name}: {e}")
        continue

    # Read miniSEED file
    stream = read(file_path)

    for trace in stream:
        print(f"Processing trace: {trace.id}")

        sampling_rate = trace.stats.sampling_rate
        total_samples = len(trace.data)

        try:
            # Convert counts to acceleration
            _, accel_m_s2 = convert_adx355_counts_to_acceleration(trace.data)
        except Exception as e:
            print(f"Error converting counts for {trace.id}: {e}")
            continue

        # Get time array
        times = np.linspace(trace.stats.starttime.timestamp, trace.stats.endtime.timestamp, total_samples)

        # Find P-pick index
        closest_idx = np.argmin(np.abs(times - p_pick.timestamp))

        # Extract statistics for three windows:
        stats_per_file = [file_path.name, trace.id, str(p_pick)]  # First columns

        for offset in [-num_samples, 0, num_samples]:  # Before, At, After
            start_idx = closest_idx + offset
            end_idx = start_idx + num_samples

            # Ensure indices are within bounds
            if start_idx < 0 or end_idx > total_samples:
                print(f"Skipping window for {trace.id} due to insufficient data.")
                stats_per_file.extend(["N/A"] * 10)  # Append empty stats if data is missing
                continue

            segment = accel_m_s2[start_idx:end_idx]

            # Compute statistics
            mean_val = np.mean(segment)
            median_val = np.median(segment)
            mode_val = mode(segment, keepdims=True).mode[0]
            std_val = np.std(segment)
            skewness_val = skew(segment)
            kurtosis_val = kurtosis(segment)
            variance_val = np.var(segment)
            max_val = np.max(segment)
            min_val = np.min(segment)
            z_scores = np.mean(zscore(segment))

            # Append stats to the row
            stats_per_file.extend([mean_val, median_val, mode_val, std_val, 
                                   skewness_val, kurtosis_val, variance_val, 
                                   max_val, min_val, z_scores])

        results.append(stats_per_file)

# Define column headers
columns = ["File", "Trace ID", "P-Pick Time"]
stats_headers = ["Mean", "Median", "Mode", "Std Dev", "Skewness", "Kurtosis", "Variance", "Max", "Min", "Z-Score"]

# Repeat headers for S1, S2, S3
for sample in ["S1", "S2", "S3"]:
    columns.extend([f"{sample} {stat}" for stat in stats_headers])

# Save results to CSV
df_results = pd.DataFrame(results, columns=columns)
df_results.to_csv(output_csv, index=False)
print(f"Acceleration statistics saved to {output_csv}")


No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN1.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DN2.MSEED
Processing: 34161341_2023-02-21T00.07.00.489723Z_WS.POZA.S5.DNZ.MSEED
Processing trace: WS.POZA.S5.DNZ
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN1.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DN2.MSEED
Processing: 34161341_2023-02-21T00.07.00.490024Z_WS.POZA.S3.DNZ.MSEED
Processing trace: WS.POZA.S3.DNZ
No match found for file: 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN1.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DN2.MSEED
Processing: 34161341_2023-02-21T00.07.00.490032Z_WS.POZA.S2.DNZ.MSEED
Processing trace: WS.POZA.S2.DNZ
No match found for file: 34161341_2023-02-21T00.07.00.490708Z_WS.POZA.S4.DN1.MSEED
No match found for file: 34161341_2023-02-21T00.07.00.490708Z_WS.POZA.S4.DN2.MSEED
Processing: 34161341_2023-0