In [1]:
import random
from obspy import read
import pandas as pd
import numpy as np
from pathlib import Path
from scipy.stats import mode, skew, kurtosis, zscore

# Directories
mseed_noise_dir = "miniSEED_files_noise"
output_noise_csv = "noise_acceleration_stats_250_samples.csv"

# Number of samples per window
num_samples = 250  # 250 samples for each random window

# Store results
noise_results = []

# Loop through noise miniSEED files
for file_path in Path(mseed_noise_dir).glob("*.MSEED"):
    mseed_name = file_path.stem  # Use stem to avoid sanitizing

    print(f"Processing noise file: {file_path.name}")

    # Read miniSEED file
    stream = read(file_path)

    for trace in stream:
        print(f"Processing trace: {trace.id}")

        sampling_rate = trace.stats.sampling_rate
        total_samples = len(trace.data)

        try:
            # Convert counts to acceleration
            _, accel_m_s2 = convert_adx355_counts_to_acceleration(trace.data)
        except Exception as e:
            print(f"Error converting counts for {trace.id}: {e}")
            continue

        # Extract statistics for three random windows
        stats_per_file = [file_path.name, trace.id]  # First columns

        for _ in range(3):  # Generate 3 random windows
            # Select random start index for the window
            start_idx = random.randint(0, total_samples - num_samples)
            end_idx = start_idx + num_samples

            segment = accel_m_s2[start_idx:end_idx]

            # Compute statistics
            mean_val = np.mean(segment)
            median_val = np.median(segment)
            mode_val = mode(segment, keepdims=True).mode[0]
            std_val = np.std(segment)
            skewness_val = skew(segment)
            kurtosis_val = kurtosis(segment)
            variance_val = np.var(segment)
            max_val = np.max(segment)
            min_val = np.min(segment)
            z_scores = np.mean(zscore(segment))

            # Append stats to the row
            stats_per_file.extend([mean_val, median_val, mode_val, std_val, 
                                   skewness_val, kurtosis_val, variance_val, 
                                   max_val, min_val, z_scores])

        noise_results.append(stats_per_file)

# Define column headers
columns = ["File", "Trace ID"]
stats_headers = ["Mean", "Median", "Mode", "Std Dev", "Skewness", "Kurtosis", "Variance", "Max", "Min", "Z-Score"]

# Repeat headers for S1, S2, S3 (for random windows)
for sample in ["S1", "S2", "S3"]:
    columns.extend([f"{sample} {stat}" for stat in stats_headers])

# Save results to CSV
df_noise_results = pd.DataFrame(noise_results, columns=columns)
df_noise_results.to_csv(output_noise_csv, index=False)
print(f"Noise acceleration statistics saved to {output_noise_csv}")


Processing noise file: noise_2023-02-21T01_07_00.489562Z_WS.POZA.S5.DN1.MSEED
Processing trace: WS.POZA.S5.DN1
Error converting counts for WS.POZA.S5.DN1: name 'convert_adx355_counts_to_acceleration' is not defined
Processing noise file: noise_2023-02-21T01_07_00.489562Z_WS.POZA.S5.DN2.MSEED
Processing trace: WS.POZA.S5.DN2
Error converting counts for WS.POZA.S5.DN2: name 'convert_adx355_counts_to_acceleration' is not defined
Processing noise file: noise_2023-02-21T01_07_00.489562Z_WS.POZA.S5.DNZ.MSEED
Processing trace: WS.POZA.S5.DNZ
Error converting counts for WS.POZA.S5.DNZ: name 'convert_adx355_counts_to_acceleration' is not defined
Processing noise file: noise_2023-02-21T01_07_00.490149Z_WS.POZA.S6.DN1.MSEED
Processing trace: WS.POZA.S6.DN1
Error converting counts for WS.POZA.S6.DN1: name 'convert_adx355_counts_to_acceleration' is not defined
Processing noise file: noise_2023-02-21T01_07_00.490149Z_WS.POZA.S6.DN2.MSEED
Processing trace: WS.POZA.S6.DN2
Error converting counts for W

In [2]:
import random
from obspy import read
import pandas as pd
import numpy as np
from pathlib import Path
from scipy.stats import mode, skew, kurtosis, zscore

# Function to sanitize file names
def sanitize_filename(name):
    return name.replace(".", "").replace(":", "").replace("-", "").replace("_", "")

# Function to convert ADX355 counts to acceleration
def convert_adx355_counts_to_acceleration(counts, scale_factor=256000):
    acceleration_g = counts / scale_factor  # Convert counts to g
    acceleration_m_s2 = acceleration_g * 9.81  # Convert g to m/s²
    return acceleration_g, acceleration_m_s2

# Directories
mseed_noise_dir = "miniSEED_files_noise"
output_noise_csv = "noise_acceleration_stats_250_samples.csv"

# Number of samples per window
num_samples = 250  # 250 samples for each random window

# Store results
noise_results = []

# Loop through noise miniSEED files
for file_path in Path(mseed_noise_dir).glob("*.MSEED"):
    mseed_name = file_path.stem  # Use stem to avoid sanitizing

    print(f"Processing noise file: {file_path.name}")

    # Read miniSEED file
    stream = read(file_path)

    for trace in stream:
        print(f"Processing trace: {trace.id}")

        sampling_rate = trace.stats.sampling_rate
        total_samples = len(trace.data)

        try:
            # Convert counts to acceleration
            _, accel_m_s2 = convert_adx355_counts_to_acceleration(trace.data)
        except Exception as e:
            print(f"Error converting counts for {trace.id}: {e}")
            continue

        # Extract statistics for three random windows
        stats_per_file = [file_path.name, trace.id]  # First columns

        for _ in range(3):  # Generate 3 random windows
            # Select random start index for the window
            start_idx = random.randint(0, total_samples - num_samples)
            end_idx = start_idx + num_samples

            segment = accel_m_s2[start_idx:end_idx]

            # Compute statistics
            mean_val = np.mean(segment)
            median_val = np.median(segment)
            mode_val = mode(segment, keepdims=True).mode[0]
            std_val = np.std(segment)
            skewness_val = skew(segment)
            kurtosis_val = kurtosis(segment)
            variance_val = np.var(segment)
            max_val = np.max(segment)
            min_val = np.min(segment)
            z_scores = np.mean(zscore(segment))

            # Append stats to the row
            stats_per_file.extend([mean_val, median_val, mode_val, std_val, 
                                   skewness_val, kurtosis_val, variance_val, 
                                   max_val, min_val, z_scores])

        noise_results.append(stats_per_file)

# Define column headers
columns = ["File", "Trace ID"]
stats_headers = ["Mean", "Median", "Mode", "Std Dev", "Skewness", "Kurtosis", "Variance", "Max", "Min", "Z-Score"]

# Repeat headers for S1, S2, S3 (for random windows)
for sample in ["S1", "S2", "S3"]:
    columns.extend([f"{sample} {stat}" for stat in stats_headers])

# Save results to CSV
df_noise_results = pd.DataFrame(noise_results, columns=columns)
df_noise_results.to_csv(output_noise_csv, index=False)
print(f"Noise acceleration statistics saved to {output_noise_csv}")


Processing noise file: noise_2023-02-21T01_07_00.489562Z_WS.POZA.S5.DN1.MSEED
Processing trace: WS.POZA.S5.DN1
Processing noise file: noise_2023-02-21T01_07_00.489562Z_WS.POZA.S5.DN2.MSEED
Processing trace: WS.POZA.S5.DN2
Processing noise file: noise_2023-02-21T01_07_00.489562Z_WS.POZA.S5.DNZ.MSEED
Processing trace: WS.POZA.S5.DNZ
Processing noise file: noise_2023-02-21T01_07_00.490149Z_WS.POZA.S6.DN1.MSEED
Processing trace: WS.POZA.S6.DN1
Processing noise file: noise_2023-02-21T01_07_00.490149Z_WS.POZA.S6.DN2.MSEED
Processing trace: WS.POZA.S6.DN2
Processing noise file: noise_2023-02-21T01_07_00.490149Z_WS.POZA.S6.DNZ.MSEED
Processing trace: WS.POZA.S6.DNZ
Processing noise file: noise_2023-02-21T01_07_00.491238Z_WS.POZA.S3.DN1.MSEED
Processing trace: WS.POZA.S3.DN1
Processing noise file: noise_2023-02-21T01_07_00.491238Z_WS.POZA.S3.DN2.MSEED
Processing trace: WS.POZA.S3.DN2
Processing noise file: noise_2023-02-21T01_07_00.491238Z_WS.POZA.S3.DNZ.MSEED
Processing trace: WS.POZA.S3.DNZ
P