In [11]:
import numpy as np
import pandas as pd
from obspy import read
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.signal import spectrogram
from scipy.ndimage import label
from scipy.ndimage import gaussian_filter1d
import os
import keras
from keras import layers


In [16]:
class SeismicDataProcessor:
    def __init__(self, csv_file, mseed_file):
        """Initializes the processor with the provided CSV and MiniSEED files."""
        self.csv_file = csv_file
        self.mseed_file = mseed_file
        self.sxx = None
        self.frequencies = None
        self.load_data()

    def load_data(self):
        """Loads CSV and MiniSEED data."""
        try:
            # Load CSV data for time and velocity
            self.csv_data = pd.read_csv(self.csv_file)
            self.time = self.csv_data['rel_time(sec)'].values
            self.velocity = self.csv_data['velocity(c/s)'].values

            print("CSV data loaded:")
            print(f"Time data length: {len(self.time)}")
            print(f"Velocity data length: {len(self.velocity)}")

            # Load MiniSEED data for spectrogram
            self.mseed_data = read(self.mseed_file)
            trace = self.mseed_data[0]  # Assuming single trace
            self.sampling_rate = trace.stats.sampling_rate
            self.calculate_spectrogram(trace.data)

        except Exception as e:
            print(f"Error loading data: {e}")

    def calculate_spectrogram(self, velocity):
        """Calculates the spectrogram from the velocity data."""
        print("Calculating spectrogram...")
        if velocity is None or len(velocity) == 0:
            print("Velocity data is empty. Cannot calculate spectrogram.")
            return

        self.frequencies, self.time_spec, self.sxx = spectrogram(velocity, fs=self.sampling_rate)

        if self.sxx is None:
            print("Spectrogram calculation failed. sxx is None.")
        else:
            print(f"Spectrogram calculated with shape: {self.sxx.shape}")

    def calculate_power(self):
        """Calculate the power of the velocity signal from the spectrogram data."""
        if self.sxx is None:
            print("Cannot calculate power because sxx is None.")
            return None
        power = np.abs(self.sxx) ** 2
        print("Power calculated.")
        return np.mean(power, axis=0)  # Average across frequencies

    def detect_power_clusters(self, power_scaled, percentile=90):
        """Detects clusters in the power data based on a threshold."""
        if power_scaled is None:
            print("Cannot detect clusters because power_scaled is None.")
            return None, None

        power_threshold = np.percentile(power_scaled, percentile)
        power_above_threshold = power_scaled > power_threshold
        labeled_clusters, num_clusters = label(power_above_threshold)

        # Identify the cluster with the maximum total power
        max_power_cluster = None
        max_cluster_sum = 0

        for cluster_id in range(1, num_clusters + 1):
            cluster_indices = np.where(labeled_clusters == cluster_id)[0]
            cluster_sum = np.sum(power_scaled[cluster_indices])

            if cluster_sum > max_cluster_sum:
                max_cluster_sum = cluster_sum
                max_power_cluster = cluster_indices

        arrival_time = self.time_spec[max_power_cluster[0]] if max_power_cluster is not None else None
        print(f"Detected power clusters: {max_power_cluster}, Arrival time: {arrival_time}")
        return max_power_cluster, arrival_time

    def smooth_power(self, power, window_len=10):
        """Smooths the power data using a simple moving average."""
        smoothed_power = np.convolve(power, np.ones(window_len) / window_len, mode='same')
        print("Power smoothed.")
        return smoothed_power

    def plot_results(self, power_scaled, max_power_cluster, arrival_time):
        """Plots the time series, spectrogram, and power clusters."""
        plt.figure(figsize=(10, 10))
        self.plot_velocity_time_series(arrival_time)
        self.plot_spectrogram(arrival_time)
        self.plot_power_and_clusters(power_scaled, max_power_cluster, arrival_time)

    def plot_velocity_time_series(self, arrival_time):
        """Plots the velocity time series with the detected trigger time."""
        plt.subplot(3, 1, 1)
        plt.plot(self.time, self.velocity, label="Velocity (c/s)")
        plt.xlabel('Time (sec)')
        plt.ylabel('Velocity (c/s)')
        plt.title('Velocity Time Series')
        if arrival_time:
            plt.axvline(arrival_time, color='r', linestyle='--', label='Trigger Time')
        plt.legend()

    def plot_spectrogram(self, arrival_time):
        """Plots the spectrogram of the velocity data."""
        plt.subplot(3, 1, 2)
        plt.pcolormesh(self.time_spec, self.frequencies, 10 * np.log10(self.sxx), shading='gouraud')
        plt.xlabel('Time (sec)')
        plt.ylabel('Frequency (Hz)')
        plt.title('Spectrogram (dB)')
        if arrival_time:
            plt.axvline(arrival_time, color='r', linestyle='--', label='Trigger Time')
        plt.colorbar()

    def plot_power_and_clusters(self, power_scaled, max_power_cluster, arrival_time):
        """Plots the scaled power and detected clusters."""
        plt.subplot(3, 1, 3)
        plt.plot(self.time_spec, power_scaled, label='Smoothed Power', color='b')
        if max_power_cluster is not None:
            plt.scatter(self.time_spec[max_power_cluster], power_scaled[max_power_cluster], color='r', label='Detected Cluster')
        plt.xlabel('Time (sec)')
        plt.ylabel('Power')
        plt.title('Power Over Time')
        if arrival_time:
            plt.axvline(arrival_time, color='r', linestyle='--', label='Trigger Time')
        plt.legend()

    def process_data(self):
        """Main processing pipeline to load data, compute spectrogram, detect clusters, and plot results."""
        print("Processing data...")
        power_scaled = self.calculate_power()
        if power_scaled is None:
            return None

        smoothed_power = self.smooth_power(power_scaled)

        # Detect power clusters and arrival time
        max_power_cluster, arrival_time = self.detect_power_clusters(smoothed_power)

        # Plot the results
        self.plot_results(smoothed_power, max_power_cluster, arrival_time)

        return arrival_time

In [17]:
if __name__ == "__main__":
    # Paths to the specific CSV and MiniSEED files
    csv_file = 'data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1974-07-17HR00_evid00153.csv'  # Replace with your CSV file path
    mseed_file = 'data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1974-07-17HR00_evid00153.mseed'  # Replace with your MiniSEED file path

    # Initialize the processor with the provided files
    processor = SeismicDataProcessor(csv_file, mseed_file)

    # Process the data
    arrival_time = processor.process_data()

    # Find the timestamp corresponding to the detected arrival time
    if arrival_time is not None:
        closest_index = (np.abs(processor.time - arrival_time)).argmin()
        corresponding_timestamp = processor.csv_data['time_abs'].iloc[closest_index]  # Replace with actual timestamp column name

        print(f"Detected trigger time (relative): {arrival_time}")
        print(f"Corresponding timestamp: {corresponding_timestamp}")

        # Save the result to a CSV file
        result_data = pd.DataFrame({
            'filename': [os.path.basename(csv_file)],
            'detected_trigger_time': [arrival_time],
            'corresponding_timestamp': [corresponding_timestamp]
        })
        result_data.to_csv('trigger_times.csv', mode='a', header=not os.path.exists('trigger_times.csv'), index=False)

        print("Arrival time and timestamp saved to 'trigger_times.csv'.")
    else:
        print("No arrival time detected.")

Error loading data: 'rel_time(sec)'
Processing data...
Cannot calculate power because sxx is None.
No arrival time detected.
