In [1]:
import os
import json
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis, mode, zscore
from datetime import datetime, timedelta
from obspy.signal.trigger import classic_sta_lta

def compute_mer(signal, window_size=50):
    energy = np.convolve(signal**2, np.ones(window_size), mode='valid')
    mer = np.zeros(len(signal))
    mer[window_size - 1: window_size - 1 + len(energy)] = energy
    return mer

# Define input and output directories
input_root = "AWS EARTHQUAKE DATASET"
csv_output_path = "sliding_window_stats.csv"

# Prepare CSV storage
columns = [
    "Date", "Device ID", "File", "Window Index", "Window Start Time", 
    "Before Mean", "Before Median", "Before Mode", "Before Std Dev", "Before Skewness", 
    "Before Kurtosis", "Before Variance", "Before Max", "Before Min", "Before Z-Score", 
    "Before Mean STA/LTA", "Before Max STA/LTA", "Before Mean MER", "Before Max MER", 
    "After Mean", "After Median", "After Mode", "After Std Dev", "After Skewness", 
    "After Kurtosis", "After Variance", "After Max", "After Min", "After Z-Score", 
    "After Mean STA/LTA", "After Max STA/LTA", "After Mean MER", "After Max MER"
]
data_records = []

# Traverse dataset
for date_folder in os.listdir(input_root):
    date_path = os.path.join(input_root, date_folder)
    if not os.path.isdir(date_path):  
        continue  
    
    for device_folder in os.listdir(date_path):
        device_path = os.path.join(date_path, device_folder)
        if not os.path.isdir(device_path):
            continue  
        
        for json_file in os.listdir(device_path):
            if not json_file.endswith(".jsonl"):
                continue  
            
            file_path = os.path.join(device_path, json_file)
            x_data, time_data = [], []
            
            with open(file_path, "r") as file:
                for line in file:
                    record = json.loads(line)
                    device_time = datetime.utcfromtimestamp(record["device_t"])
                    sr = record["sr"]
                    
                    duration = len(record["x"]) / sr
                    time_values = [
                        (device_time + timedelta(seconds=(i / sr))).strftime('%H:%M:%S.%f')[:-3] 
                        for i in range(len(record["x"]))
                    ]
                    
                    x_data.extend(record["x"])
                    time_data.extend(time_values)

            x_data = np.array(x_data)
            time_data = np.array(time_data)
            
            # Compute STA/LTA
            sta_window = int(1 * sr)
            lta_window = int(10 * sr)
            sta_lta_x = classic_sta_lta(x_data, sta_window, lta_window)
            
            # Compute MER
            mer_x = compute_mer(x_data, window_size=50)
            
            # Sliding window parameters
            before_duration = int(1 * sr)  # 1 sec
            after_duration = int(0.5 * sr)  # 0.5 sec
            step_size = int(0.5 * sr)  # Move by 0.5 sec each step
            window_size = before_duration + after_duration
            
            window_index = 1
            start_idx = 0
            
            while start_idx + window_size <= len(x_data):
                window_start_time = time_data[start_idx]
                
                before_data = x_data[start_idx:start_idx + before_duration]
                after_data = x_data[start_idx + before_duration:start_idx + window_size]
                before_sta_lta = sta_lta_x[start_idx:start_idx + before_duration]
                after_sta_lta = sta_lta_x[start_idx + before_duration:start_idx + window_size]
                before_mer = mer_x[start_idx:start_idx + before_duration]
                after_mer = mer_x[start_idx + before_duration:start_idx + window_size]
                
                def compute_stats(data, sta_lta, mer):
                    if len(data) == 0:
                        return [np.nan] * 13
                    
                    mode_value = mode(data, keepdims=True)[0]
                    return [
                        np.mean(data), np.median(data), mode_value[0] if mode_value.size > 0 else np.nan, np.std(data),
                        skew(data), kurtosis(data), np.var(data), np.max(data), np.min(data),
                        np.mean(zscore(data)), np.mean(sta_lta), np.max(sta_lta),
                        np.mean(mer), np.max(mer)
                    ]
                
                before_stats = compute_stats(before_data, before_sta_lta, before_mer)
                after_stats = compute_stats(after_data, after_sta_lta, after_mer)
                
                data_records.append([date_folder, device_folder, json_file, f"Window {window_index}", window_start_time] + before_stats + after_stats)
                
                window_index += 1
                start_idx += step_size  # Move window forward by step size

# Save to CSV
df = pd.DataFrame(data_records, columns=columns)
df.to_csv(csv_output_path, index=False)

print("Sliding window statistical analysis completed and saved to CSV!")


  device_time = datetime.utcfromtimestamp(record["device_t"])


Sliding window statistical analysis completed and saved to CSV!
