CALCULATING THE STATS FOR THE JSON FILES DATED 11/02/2025


In [8]:
import os
import json
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis, mode
from datetime import datetime, timedelta
from obspy.signal.trigger import classic_sta_lta

# Define input and output directories
input_root = "AWS EARTHQUAKE DATASET"
output_root = "output plot save"
csv_output_path = "sta_lta_stats.csv"

# Prepare CSV storage
columns = [
    "File", "Before Mean", "Before Median", "Before Mode", "Before Std Dev", "Before Skewness", "Before Kurtosis", "Before Variance", "Before Max", "Before Min", "Before Mean STA/LTA", "Before Max STA/LTA",
    "After Mean", "After Median", "After Mode", "After Std Dev", "After Skewness", "After Kurtosis", "After Variance", "After Max", "After Min", "After Mean STA/LTA", "After Max STA/LTA"
]
data_records = []

# Traverse all subdirectories
for date_folder in os.listdir(input_root):
    date_path = os.path.join(input_root, date_folder)
    if not os.path.isdir(date_path):  
        continue  
    
    for device_folder in os.listdir(date_path):
        device_path = os.path.join(date_path, device_folder)
        if not os.path.isdir(device_path):
            continue  
        
        for json_file in os.listdir(device_path):
            if not json_file.endswith(".jsonl"):
                continue  
            
            file_path = os.path.join(device_path, json_file)
            
            x_data, time_data = [], []
            
            with open(file_path, "r") as file:
                for line in file:
                    record = json.loads(line)
                    device_time = datetime.utcfromtimestamp(record["device_t"])
                    sr = record["sr"]
                    
                    duration = len(record["x"]) / sr
                    time_values = [device_time - timedelta(seconds=(duration - i / sr)) for i in range(len(record["x"]))]

                    x_data.extend(record["x"])
                    time_data.extend(time_values)

            x_data = np.array(x_data)
            time_data = np.array(time_data)
            
            # Compute STA/LTA
            sta_window = int(1 * sr)
            lta_window = int(10 * sr)
            sta_lta_x = classic_sta_lta(x_data, sta_window, lta_window)
            
            # Find indices where STA/LTA exceeds threshold
            threshold_indices = np.where(sta_lta_x >= 3)[0]
            
            if len(threshold_indices) == 0:
                continue  # Skip if no exceedance
            
            for idx in threshold_indices:
                before_start = max(0, idx - int(1 * sr))
                after_end = min(len(x_data), idx + int(3 * sr))
                
                before_data = x_data[before_start:idx]
                after_data = x_data[idx:after_end]
                before_sta_lta = sta_lta_x[before_start:idx]
                after_sta_lta = sta_lta_x[idx:after_end]
                
                def compute_stats(data, sta_lta):
                    if len(data) == 0:  # Handle empty data case
                        return [np.nan] * 11

                    mode_value = mode(data, keepdims=True)[0]  # Ensure mode is always an array
                    return [
                            np.mean(data), np.median(data), mode_value[0] if mode_value.size > 0 else np.nan, np.std(data),
                            skew(data), kurtosis(data), np.var(data), np.max(data), np.min(data),
                            np.mean(sta_lta), np.max(sta_lta)
                    ]

                
                before_stats = compute_stats(before_data, before_sta_lta)
                after_stats = compute_stats(after_data, after_sta_lta)
                
                data_records.append([json_file] + before_stats + after_stats)

# Save to CSV
df = pd.DataFrame(data_records, columns=columns)
df.to_csv(csv_output_path, index=False)

print("STA/LTA statistical analysis completed and saved to CSV!")


  device_time = datetime.utcfromtimestamp(record["device_t"])


STA/LTA statistical analysis completed and saved to CSV!
