In [6]:
import pandas as pd
import numpy as np
from scipy import signal
from sklearn.ensemble import IsolationForest
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import os
from obspy import read
from obspy.signal.trigger import trigger_onset
import joblib

# Load the catalog
def load_catalog(catalog_path):
    catalog = pd.read_csv(catalog_path)
    catalog['time_abs'] = pd.to_datetime(catalog['time_abs(%Y-%m-%dT%H:%M:%S.%f)'])
    return catalog

# Load a single MSEED file
def load_mseed_file(file_path):
    try:
        stream = read(file_path)
        trace = stream[0]
        
        data = pd.DataFrame({
            'time_rel(sec)': trace.times(),
            'velocity': trace.data,
        })
        
        data['time_abs'] = pd.to_datetime(trace.stats.starttime.datetime) + pd.to_timedelta(data['time_rel(sec)'], unit='s')
        
        return data, trace.stats.sampling_rate
    except Exception as e:
        print(f"Error loading file {file_path}: {str(e)}")
        return None, None

# Feature engineering
def engineer_features(data, sampling_rate):
    window_size = int(sampling_rate * 15)  # 15 seconds window
    data['velocity_rolling_mean'] = data['velocity'].rolling(window=window_size).mean()
    data['velocity_rolling_std'] = data['velocity'].rolling(window=window_size).std()
    
    f, t, Sxx = signal.spectrogram(data['velocity'], fs=sampling_rate)
    spectral_mean = np.mean(Sxx, axis=0)
    spectral_std = np.std(Sxx, axis=0)
    
    data['spectral_mean'] = np.interp(data['time_rel(sec)'], t, spectral_mean)
    data['spectral_std'] = np.interp(data['time_rel(sec)'], t, spectral_std)
    
    sta_window = int(sampling_rate * 120)  # 2 minutes
    lta_window = int(sampling_rate * 600)  # 10 minutes
    data['sta'] = data['velocity'].abs().rolling(window=sta_window).mean()
    data['lta'] = data['velocity'].abs().rolling(window=lta_window).mean()
    data['sta_lta_ratio'] = data['sta'] / data['lta']
    
    # Add trigger on/off feature
    cft = data['sta_lta_ratio'].fillna(0)
    thr_on, thr_off = 4, 1.5
    on_off = trigger_onset(cft, thr_on, thr_off)
    data['trigger'] = 0
    for start, end in on_off:
        data.loc[start:end, 'trigger'] = 1
    
    return data

# Train Isolation Forest model
def train_isolation_forest(data):
    features = ['velocity', 'velocity_rolling_mean', 'velocity_rolling_std', 'spectral_mean', 'spectral_std', 'sta_lta_ratio', 'trigger']
    X = data[features].dropna()
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    clf = IsolationForest(contamination=0.01, random_state=42)
    clf.fit(X_scaled)
    
    return clf, scaler

# Train ARIMA model
def train_arima(data):
    model = ARIMA(data['velocity'], order=(5,1,0))
    results = model.fit()
    return results

# Main training function
def train_models(catalog_path, data_directory):
    catalog = load_catalog(catalog_path)
    all_data = pd.DataFrame()
    
    for _, row in catalog.iterrows():
        file_name = row['filename']
        file_path = os.path.join(data_directory, file_name + '.mseed').replace("\\","/")
        
        if os.path.exists(file_path):
            data, sampling_rate = load_mseed_file(file_path)
            if data is not None and sampling_rate is not None:
                data = engineer_features(data, sampling_rate)
                all_data = pd.concat([all_data, data])
        else:
            print(f"File not found: {file_path}")
    
    if all_data.empty:
        print("No data was successfully loaded. Please check your file paths and data.")
        return None, None, None, None
    
    isolation_forest, scaler = train_isolation_forest(all_data)
    arima_model = train_arima(all_data)
    
    return isolation_forest, scaler, arima_model, all_data

# Plot training data
def plot_training_data(training_data):
    plt.figure(figsize=(15, 15))
    plt.subplot(4, 1, 1)
    plt.plot(training_data['time_rel(sec)'], training_data['velocity'])
    plt.title('Velocity')
    plt.subplot(4, 1, 2)
    plt.plot(training_data['time_rel(sec)'], training_data['sta_lta_ratio'])
    plt.title('STA/LTA Ratio')
    plt.subplot(4, 1, 3)
    plt.plot(training_data['time_rel(sec)'], training_data['spectral_mean'])
    plt.title('Spectral Mean')
    plt.subplot(4, 1, 4)
    plt.plot(training_data['time_rel(sec)'], training_data['trigger'])
    plt.title('Trigger On/Off')
    plt.tight_layout()
    plt.savefig('training_data_plot.png')
    plt.close()

# Example usage
catalog_path = './data/lunar/training/catalogs/apollo12_catalog_GradeA_final.csv'
data_directory = './data/lunar/training/data'

print(f"Catalog path: {os.path.abspath(catalog_path)}")
print(f"Data directory: {os.path.abspath(data_directory)}")

isolation_forest, scaler, arima_model, training_data = train_models(catalog_path, data_directory)

if training_data is not None:
    plot_training_data(training_data)
    print("Training completed. Models are ready for use.")

    # Save models for later use
    joblib.dump(isolation_forest, 'isolation_forest_model.joblib')
    joblib.dump(scaler, 'scaler.joblib')
    joblib.dump(arima_model, 'arima_model.joblib')
else:
    print("Training failed. Please check your data and file paths.")

Catalog path: C:\Users\dipes\Siesmic_model_NASA\data\lunar\training\catalogs\apollo12_catalog_GradeA_final.csv
Data directory: C:\Users\dipes\Siesmic_model_NASA\data\lunar\training\data
File not found: ./data/lunar/training/data/xa.s12.00.mhz.1970-01-19HR00_evid00002.mseed
File not found: ./data/lunar/training/data/xa.s12.00.mhz.1970-03-25HR00_evid00003.mseed
File not found: ./data/lunar/training/data/xa.s12.00.mhz.1970-03-26HR00_evid00004.mseed
File not found: ./data/lunar/training/data/xa.s12.00.mhz.1970-04-25HR00_evid00006.mseed
File not found: ./data/lunar/training/data/xa.s12.00.mhz.1970-04-26HR00_evid00007.mseed
File not found: ./data/lunar/training/data/xa.s12.00.mhz.1970-06-15HR00_evid00008.mseed
File not found: ./data/lunar/training/data/xa.s12.00.mhz.1970-06-26HR00_evid00009.mseed
File not found: ./data/lunar/training/data/xa.s12.00.mhz.1970-07-20HR00_evid00010.mseed
File not found: ./data/lunar/training/data/xa.s12.00.mhz.1970-07-20HR00_evid00011.mseed
File not found: ./data

In [4]:
import pandas as pd
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt

# Load and preprocess test data
def load_test_data(file_path):
    data = pd.read_csv(file_path)
    data['time_abs'] = pd.to_datetime(data['time_abs(%Y-%m-%dT%H:%M:%S.%f)'])
    data['velocity'] = data['velocity(m/s)']
    return data

# Apply feature engineering to test data
def engineer_test_features(data):
    # Use the same feature engineering as in training
    data['velocity_rolling_mean'] = data['velocity'].rolling(window=100).mean()
    data['velocity_rolling_std'] = data['velocity'].rolling(window=100).std()
    
    f, t, Sxx = signal.spectrogram(data['velocity'], fs=1/(data['time_rel(sec)'].diff().mean()))
    spectral_mean = np.mean(Sxx, axis=0)
    spectral_std = np.std(Sxx, axis=0)
    
    data['spectral_mean'] = np.interp(data['time_rel(sec)'], t, spectral_mean)
    data['spectral_std'] = np.interp(data['time_rel(sec)'], t, spectral_std)
    
    sta_window = 120
    lta_window = 600
    data['sta'] = data['velocity'].abs().rolling(window=sta_window).mean()
    data['lta'] = data['velocity'].abs().rolling(window=lta_window).mean()
    data['sta_lta_ratio'] = data['sta'] / data['lta']
    
    return data

# Detect anomalies using Isolation Forest
def detect_anomalies(data, isolation_forest, scaler):
    features = ['velocity', 'velocity_rolling_mean', 'velocity_rolling_std', 'spectral_mean', 'spectral_std', 'sta_lta_ratio']
    X = data[features].dropna()
    X_scaled = scaler.transform(X)
    
    anomalies = isolation_forest.predict(X_scaled)
    data['is_anomaly'] = pd.Series(anomalies).replace({1: 0, -1: 1})
    return data

# Forecast using ARIMA
def forecast_arima(data, arima_model):
    forecast = arima_model.forecast(steps=len(data))
    data['forecast'] = forecast
    return data

# Identify potential seismic events
def identify_events(data, anomaly_threshold=0.8, forecast_threshold=2):
    potential_events = data[
        (data['is_anomaly'] == 1) & 
        (data['sta_lta_ratio'] > anomaly_threshold) & 
        (np.abs(data['velocity'] - data['forecast']) > forecast_threshold * data['velocity_rolling_std'])
    ]
    return potential_events

# Main testing function
def test_models(file_path, isolation_forest, scaler, arima_model):
    test_data = load_test_data(file_path)
    test_data = engineer_test_features(test_data)
    
    test_data = detect_anomalies(test_data, isolation_forest, scaler)
    test_data = forecast_arima(test_data, arima_model)
    
    potential_events = identify_events(test_data)
    
    return test_data, potential_events

# Example usage
test_file_path = 'path/to/your/test_data.csv'
test_data, potential_events = test_models(test_file_path, isolation_forest, scaler, arima_model)

# Plot results
plt.figure(figsize=(15, 10))
plt.plot(test_data['time_rel(sec)'], test_data['velocity'], label='Velocity')
plt.plot(test_data['time_rel(sec)'], test_data['forecast'], label='ARIMA Forecast', alpha=0.7)
plt.scatter(potential_events['time_rel(sec)'], potential_events['velocity'], color='red', label='Potential Events')
plt.legend()
plt.title('Test Data with Potential Seismic Events')
plt.xlabel('Time (seconds)')
plt.ylabel('Velocity (m/s)')
plt.show()

# Print potential event times
print("Potential seismic event times:")
print(potential_events['time_abs'])

FileNotFoundError: [Errno 2] No such file or directory: 'isolation_forest_model.joblib'

In [None]:
import pandas as pd
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt

# Load and preprocess test data
def load_test_data(file_path):
    data = pd.read_csv(file_path)
    data['time_abs'] = pd.to_datetime(data['time_abs(%Y-%m-%dT%H:%M:%S.%f)'])
    data['velocity'] = data['velocity(m/s)']
    return data

# Apply feature engineering to test data
def engineer_test_features(data):
    # Use the same feature engineering as in training
    data['velocity_rolling_mean'] = data['velocity'].rolling(window=100).mean()
    data['velocity_rolling_std'] = data['velocity'].rolling(window=100).std()
    
    f, t, Sxx = signal.spectrogram(data['velocity'], fs=1/(data['time_rel(sec)'].diff().mean()))
    spectral_mean = np.mean(Sxx, axis=0)
    spectral_std = np.std(Sxx, axis=0)
    
    data['spectral_mean'] = np.interp(data['time_rel(sec)'], t, spectral_mean)
    data['spectral_std'] = np.interp(data['time_rel(sec)'], t, spectral_std)
    
    sta_window = 120
    lta_window = 600
    data['sta'] = data['velocity'].abs().rolling(window=sta_window).mean()
    data['lta'] = data['velocity'].abs().rolling(window=lta_window).mean()
    data['sta_lta_ratio'] = data['sta'] / data['lta']
    
    return data

# Detect anomalies using Isolation Forest
def detect_anomalies(data, isolation_forest, scaler):
    features = ['velocity', 'velocity_rolling_mean', 'velocity_rolling_std', 'spectral_mean', 'spectral_std', 'sta_lta_ratio']
    X = data[features].dropna()
    X_scaled = scaler.transform(X)
    
    anomalies = isolation_forest.predict(X_scaled)
    data['is_anomaly'] = pd.Series(anomalies).replace({1: 0, -1: 1})
    return data

# Forecast using ARIMA
def forecast_arima(data, arima_model):
    forecast = arima_model.forecast(steps=len(data))
    data['forecast'] = forecast
    return data

# Identify potential seismic events
def identify_events(data, anomaly_threshold=0.8, forecast_threshold=2):
    potential_events = data[
        (data['is_anomaly'] == 1) & 
        (data['sta_lta_ratio'] > anomaly_threshold) & 
        (np.abs(data['velocity'] - data['forecast']) > forecast_threshold * data['velocity_rolling_std'])
    ]
    return potential_events

# Main testing function
def test_models(file_path, isolation_forest, scaler, arima_model):
    test_data = load_test_data(file_path)
    test_data = engineer_test_features(test_data)
    
    test_data = detect_anomalies(test_data, isolation_forest, scaler)
    test_data = forecast_arima(test_data, arima_model)
    
    potential_events = identify_events(test_data)
    
    return test_data, potential_events

# Example usage
test_file_path = 'path/to/your/test_data.csv'
test_data, potential_events = test_models(test_file_path, isolation_forest, scaler, arima_model)

# Plot results
plt.figure(figsize=(15, 10))
plt.plot(test_data['time_rel(sec)'], test_data['velocity'], label='Velocity')
plt.plot(test_data['time_rel(sec)'], test_data['forecast'], label='ARIMA Forecast', alpha=0.7)
plt.scatter(potential_events['time_rel(sec)'], potential_events['velocity'], color='red', label='Potential Events')
plt.legend()
plt.title('Test Data with Potential Seismic Events')
plt.xlabel('Time (seconds)')
plt.ylabel('Velocity (m/s)')
plt.show()

# Print potential event times
print("Potential seismic event times:")
print(potential_events['time_abs'])