In [2]:
import os
import json
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis, mode, zscore
from datetime import datetime, timedelta
from obspy.signal.trigger import classic_sta_lta

def compute_mer(signal, window_size=50):
    energy = np.convolve(signal**2, np.ones(window_size), mode='valid')
    mer = np.zeros(len(signal))
    mer[window_size - 1: window_size - 1 + len(energy)] = energy
    return mer

# Load the trained model and scaler
model_path = "sgd_classifier_model_updated.pkl"
scaler_path = "scaler_updated.pkl"
with open(model_path, 'rb') as model_file:
    model = pickle.load(model_file)
with open(scaler_path, 'rb') as scaler_file:
    scaler = pickle.load(scaler_file)

# Define JSON file path
json_file_path = "AWS EARTHQUAKE DATASET\date=2018-01-19time=16-17-45/001/15.jsonl"

def process_json(json_file):
    with open(json_file, "r") as file:
        for line in file:
            record = json.loads(line)
            device_time = datetime.utcfromtimestamp(record["device_t"])
            sr = record["sr"]
            
            duration = len(record["x"]) / sr
            time_values = [
                (device_time + timedelta(seconds=(i / sr))).strftime('%H:%M:%S.%f')[:-3] 
                for i in range(len(record["x"]))
            ]
            
            x_data = np.array(record["x"])
            time_data = np.array(time_values)
    
    # Compute STA/LTA and MER
    sta_window = int(1 * sr)
    lta_window = int(10 * sr)
    sta_lta_x = classic_sta_lta(x_data, sta_window, lta_window)
    mer_x = compute_mer(x_data, window_size=50)
    
    before_duration = int(1 * sr)
    after_duration = int(0.5 * sr)
    step_size = int(0.5 * sr)
    window_size = before_duration + after_duration
    
    data_records = []
    window_index = 1
    start_idx = 0
    
    predictions = []
    
    while start_idx + window_size <= len(x_data):
        window_start_time = time_data[start_idx]
        
        before_data = x_data[start_idx:start_idx + before_duration]
        after_data = x_data[start_idx + before_duration:start_idx + window_size]
        before_sta_lta = sta_lta_x[start_idx:start_idx + before_duration]
        after_sta_lta = sta_lta_x[start_idx + before_duration:start_idx + window_size]
        before_mer = mer_x[start_idx:start_idx + before_duration]
        after_mer = mer_x[start_idx + before_duration:start_idx + window_size]
        
        def compute_stats(data, sta_lta, mer):
            if len(data) == 0:
                return [np.nan] * 13
            mode_value = mode(data, keepdims=True)[0]
            return [
                np.mean(data), np.median(data), mode_value[0] if mode_value.size > 0 else np.nan, np.std(data),
                skew(data), kurtosis(data), np.var(data), np.max(data), np.min(data),
                np.mean(zscore(data)), np.mean(sta_lta), np.max(sta_lta),
                np.mean(mer), np.max(mer)
            ]
        
        before_stats = compute_stats(before_data, before_sta_lta, before_mer)
        after_stats = compute_stats(after_data, after_sta_lta, after_mer)
        
        feature_vector = np.array(before_stats + after_stats).reshape(1, -1)
        scaled_features = scaler.transform(feature_vector)
        predicted_label = model.predict(scaled_features)[0]
        
        predictions.append((window_index, start_idx, predicted_label, window_start_time))
        data_records.append([window_index, window_start_time] + before_stats + after_stats + [predicted_label])
        
        window_index += 1
        start_idx += step_size
    
    df = pd.DataFrame(data_records, columns=[
        "Window Index", "Window Start Time", "Before Mean", "Before Median", "Before Mode", "Before Std Dev", 
        "Before Skewness", "Before Kurtosis", "Before Variance", "Before Max", "Before Min", "Before Z-Score", 
        "Before Mean STA/LTA", "Before Max STA/LTA", "Before Mean MER", "Before Max MER", 
        "After Mean", "After Median", "After Mode", "After Std Dev", "After Skewness", 
        "After Kurtosis", "After Variance", "After Max", "After Min", "After Z-Score", 
        "After Mean STA/LTA", "After Max STA/LTA", "After Mean MER", "After Max MER", "Predicted Label"
    ])
    
    # Plot results
    plt.figure(figsize=(12, 6))
    plt.plot(time_data, x_data, label='Waveform', color='gray')
    
    for win_idx, start_idx, label, start_time in predictions:
        if label == 1:
            plt.axvspan(time_data[start_idx], time_data[start_idx + window_size], color='red', alpha=0.3, label=f'Window {win_idx}')
            plt.text(time_data[start_idx], np.max(x_data), f'W{win_idx}\n{start_time}', color='black', fontsize=8, verticalalignment='top')
    
    plt.xlabel('Time')
    plt.ylabel('Amplitude')
    plt.title('Seismic Waveform with Predicted Event Windows')
    plt.legend()
    plt.show()
    
    return df

# Run the function
result_df = process_json(json_file_path)
result_df.to_csv("single_json_prediction.csv", index=False)
print("Processing complete, results saved!")


  json_file_path = "AWS EARTHQUAKE DATASET\date=2018-01-19time=16-17-45/001/15.jsonl"
  json_file_path = "AWS EARTHQUAKE DATASET\date=2018-01-19time=16-17-45/001/15.jsonl"


FileNotFoundError: [Errno 2] No such file or directory: 'AWS EARTHQUAKE DATASET\\date=2018-01-19time=16-17-45/001/15.jsonl'