In [29]:
# Required Libraries
import os
import numpy as np
import pandas as pd
from obspy import read
from obspy.signal.trigger import classic_sta_lta, trigger_onset
from scipy import signal
from datetime import timedelta
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [30]:
# Function to convert relative time to absolute time
def convert_rel_to_abs_time(start_time, time_rel):
    """
    Convert relative time to absolute time using the trace start time.
    """
    return (start_time + timedelta(seconds=float(time_rel))).strftime('%Y-%m-%dT%H:%M:%S.%f')

# Function to apply a bandpass filter to a seismic trace
def apply_bandpass_filter(trace, lowcut=0.01, highcut=3.0, sampling_rate=6.625, order=4):
    """
    Apply a bandpass filter to seismic trace to remove noise outside the frequency range.
    """
    sos = signal.butter(order, [lowcut, highcut], btype='bandpass', fs=sampling_rate, output='sos')
    filtered_trace = signal.sosfilt(sos, trace)
    return filtered_trace

# STA/LTA Feature Extraction
def extract_sta_lta_features(trace, sampling_rate, sta_window=1.0, lta_window=5.0):
    """
    Extract STA/LTA features to detect seismic events.
    """
    sta_samples = int(sta_window * sampling_rate)
    lta_samples = int(lta_window * sampling_rate)
    cft = classic_sta_lta(trace, sta_samples, lta_samples)
    
    return cft  # STA/LTA characteristic function

# Normalization/Standardization
def normalize_trace(trace, method='standard'):
    """
    Normalize or standardize seismic data.
    
    - 'standard': standardizes the data (mean = 0, std = 1)
    - 'minmax': normalizes the data to range [0, 1]
    """
    if method == 'standard':
        scaler = StandardScaler()
    elif method == 'minmax':
        scaler = MinMaxScaler()
    else:
        raise ValueError("Invalid normalization method. Choose 'standard' or 'minmax'.")
    
    trace = trace.reshape(-1, 1)  # Reshape trace for scaler
    trace_normalized = scaler.fit_transform(trace).flatten()  # Normalize and flatten back
    return trace_normalized

In [31]:
# Function to segment seismic events
def segment_seismic_events(trace, segment_length=3600, sampling_rate=6.625):
    """
    Segment seismic trace into smaller parts based on segment length (in seconds).
    
    Parameters:
    - trace: Seismic data (1-hour or 1-day trace).
    - segment_length: Length of each segment in seconds.
    - sampling_rate: Sampling rate of the data (samples per second).
    
    Returns:
    - List of segmented traces.
    """
    samples_per_segment = int(segment_length * sampling_rate)
    num_segments = len(trace) // samples_per_segment
    
    segmented_traces = []
    for i in range(num_segments):
        start = i * samples_per_segment
        end = (i + 1) * samples_per_segment
        segmented_traces.append(trace[start:end])
    
    return segmented_traces

In [32]:
# Preprocess seismic data and extract features
def preprocess_seismic_data(filepath, filetype, sampling_rate=6.625):
    """
    Preprocess seismic data from .csv or .mseed files:
    - Apply bandpass filter
    - Extract STA/LTA features
    - Normalize/Standardize
    - Segment events
    """
    if filetype == 'csv':
        seismic_data = pd.read_csv(filepath)
        trace = seismic_data['velocity(c/s)'].values
        time_rel = seismic_data['rel_time(sec)'].values
    elif filetype == 'mseed':
        st = read(filepath)
        tr = st[0]
        trace = tr.data
        time_rel = tr.times()
    
    # Apply bandpass filter
    filtered_trace = apply_bandpass_filter(trace, sampling_rate=sampling_rate)
    
    # Normalize/Standardize the trace
    normalized_trace = normalize_trace(filtered_trace, method='standard')
    
    # Segment the trace into events
    segmented_traces = segment_seismic_events(normalized_trace, segment_length=3600, sampling_rate=sampling_rate)
    
    # Extract STA/LTA features
    sta_lta_features = [extract_sta_lta_features(segment, sampling_rate) for segment in segmented_traces]
    
    # Convert relative times to absolute times (for labeled data)
    if filetype == 'mseed':
        starttime = tr.stats.starttime.datetime
        time_abs = [convert_rel_to_abs_time(starttime, rel_time) for rel_time in time_rel]
    else:
        time_abs = time_rel  # For CSV, time_rel is already present.
    
    return segmented_traces, sta_lta_features, time_abs

In [33]:








# Function to load lunar seismic data
def load_lunar_data(catalog_path, data_dir):
    """
    Load lunar seismic data from the catalog and associated .mseed files.
    Only .mseed files are loaded, and the catalog is used for labeling.
    
    Parameters:
    - catalog_path: Path to the lunar catalog CSV file.
    - data_dir: Directory containing the lunar .mseed files.
    
    Returns:
    - lunar_data: List of preprocessed seismic traces.
    - lunar_labels: List of labels (from the catalog).
    - lunar_time_abs: List of converted absolute times.
    """
    catalog = pd.read_csv(catalog_path)
    lunar_data = []
    lunar_labels = []
    lunar_time_abs = []

    for _, row in catalog.iterrows():
        filename = row['filename'] + '.mseed'
        file_path = os.path.join(data_dir, filename)

        if os.path.exists(file_path):
            print(f"Loading MSEED file: {file_path}")
            segmented_traces, sta_lta_features, time_abs = preprocess_seismic_data(file_path, filetype='mseed')
            lunar_data.extend(segmented_traces)  # Store all segmented traces
            lunar_labels.extend([row['mq_type']] * len(segmented_traces))  # Label for each segment
            lunar_time_abs.extend(time_abs)  # Append absolute time conversions for all segments
        else:
            print(f"File {filename} not found.")

    return lunar_data, lunar_labels, lunar_time_abs

In [34]:
#Function to load martian seismic data
def load_martian_data(data_dir):
    """
    Load martian seismic data from the .mseed files (unlabeled).
    
    Parameters:
    - data_dir: Directory containing the martian .mseed files.
    
    Returns:
    - martian_data: List of preprocessed seismic traces.
    - martian_time_abs: List of converted absolute times (as much as possible).
    """
    martian_data = []
    martian_time_abs = []

    for root, _, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.mseed'):
                file_path = os.path.join(root, file)
                print(f"Loading MSEED file: {file_path}")
                segmented_traces, sta_lta_features, time_abs = preprocess_seismic_data(file_path, filetype='mseed')
                martian_data.extend(segmented_traces)  # Store segmented traces
                martian_time_abs.extend(time_abs)  # Store absolute times

    return martian_data, martian_time_abs

In [35]:
# Main function
def main():
    # Define paths to lunar and martian catalog and data directories
    lunar_catalog_path = '../../data/lunar_data/training/catalogs/apollo12_catalog_GradeA_final.csv'
    lunar_data_directory = '../../data/lunar_data/training/data/S12_GradeA/'
    
    martian_data_directory = '../../data/marsquake_data/training/data/'

    # Load lunar data
    print("Loading and preprocessing Lunar MSEED data...")
    lunar_data, lunar_labels, lunar_time_abs = load_lunar_data(lunar_catalog_path, lunar_data_directory)
    print(f"Lunar Data Loaded: {len(lunar_data)} traces.")

    # Load martian data (assuming similar function to load martian data)
    print("Loading and preprocessing Martian MSEED data...")
    martian_data, martian_time_abs = load_martian_data(martian_data_directory)
    print(f"Martian Data Loaded: {len(martian_data)} traces.")

    # Further actions such as data inspection can be done here
    print("Data loading complete.")

# Call the main function
if __name__ == "__main__":
    main()

Loading and preprocessing Lunar MSEED data...
Loading MSEED file: ../../data/lunar_data/training/data/S12_GradeA/xa.s12.00.mhz.1970-01-19HR00_evid00002.mseed
Loading MSEED file: ../../data/lunar_data/training/data/S12_GradeA/xa.s12.00.mhz.1970-03-25HR00_evid00003.mseed
Loading MSEED file: ../../data/lunar_data/training/data/S12_GradeA/xa.s12.00.mhz.1970-03-26HR00_evid00004.mseed
Loading MSEED file: ../../data/lunar_data/training/data/S12_GradeA/xa.s12.00.mhz.1970-04-25HR00_evid00006.mseed
Loading MSEED file: ../../data/lunar_data/training/data/S12_GradeA/xa.s12.00.mhz.1970-04-26HR00_evid00007.mseed
Loading MSEED file: ../../data/lunar_data/training/data/S12_GradeA/xa.s12.00.mhz.1970-06-15HR00_evid00008.mseed
Loading MSEED file: ../../data/lunar_data/training/data/S12_GradeA/xa.s12.00.mhz.1970-06-26HR00_evid00009.mseed
Loading MSEED file: ../../data/lunar_data/training/data/S12_GradeA/xa.s12.00.mhz.1970-07-20HR00_evid00010.mseed
Loading MSEED file: ../../data/lunar_data/training/data/S1