In [None]:
import numpy as np
import pandas as pd
from os import listdir
from os.path import isfile, join
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.models import load_model
import tensorflow as tf

In [None]:
mypath = "/kaggle/input/seismic-data/data/train/moon"
csv_files = [join(mypath, f) for f in listdir(mypath) if isfile(join(mypath, f))]

In [None]:
catalog_df = pd.read_csv("/kaggle/input/seismic-detection-across-the-solar-system/data/lunar/training/catalogs/apollo12_catalog_GradeA_final.csv")

In [None]:
time_steps = 500  # Set your preferred time window length

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dropout, Flatten, Dense, BatchNormalization

# CNN-based model for time-series classification
model = Sequential()

# Add convolutional layers
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(time_steps, 2)))  # 2 is the number of features
model.add(BatchNormalization())  # Helps with convergence
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

# Flatten and add a dense layer
model.add(Flatten())
model.add(Dense(64, activation='relu'))  # Fully connected layer after convolutions
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid')) # Output layer for binary classification

# model = load_model("/kaggle/working/quake_detector_model(Velocity_only)-CNN.h5")
# Compile the model
model.compile(optimizer='adam', 
              loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.1), 
              metrics=['accuracy'])

In [None]:
def proximity_list(length, certain_index):
    proximity = [0] * length  # Initialize list with zeros
    max_distance = max(certain_index, length - 1 - certain_index)  # Max distance from certain_index to the start/end
    for i in range(length):
        # Calculate distance from the certain index
        distance = abs(i - certain_index)
        # Normalize the value so it peaks at 1 at the certain index and decreases symmetrically
        proximity[i] = 1 - (distance / max_distance) 
    return proximity

In [None]:
def process_file_in_batch(csv_file, model, index):
    # Load a single CSV file
    df = pd.read_csv(csv_file)
        
    df['proximity'] = proximity_list(len(df.index), catalog_df['time_rel(sec)'].iloc[index])
    
    # Include the new features along with time_rel and velocity
    X = df[['proximity','velocity']].values
    y = df['mq'].values
    
    
    # Reshape X for LSTM (samples, time_steps, features)
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps - 1])
        
        
    # Convert to NumPy arrays for model training
    X_seq, y_seq = np.array(X_seq), np.array(y_seq)
    
    
    # Train the model incrementally or append the processed data for batch processing
    model.fit(X_seq, y_seq)
    model.save('quake_detector_model(proximity)-CNN.h5')

In [None]:
# Process files one by one
c = 0
for csv_file in csv_files[0:1]:
    print(c)
    process_file_in_batch(csv_file, model, c)
    c += 1 

In [None]:
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model('/kaggle/input/scismic-model-cnn/tensorflow2/default/1/quake_detector_model(proximity)-CNN.h5')

In [None]:
! pip install obspy

In [None]:
import obspy

# Load the mseed file
file_path = '/kaggle/input/seismic-detection-across-the-solar-system/data/lunar/test/data/S12_GradeB/xa.s12.00.mhz.1970-02-18HR00_evid00016.mseed'
stream = obspy.read(file_path)

# Plot the data
stream.plot()

In [None]:
# Prepare your test data (assuming you have a test CSV file)
test_file = "/kaggle/input/seismic-data/data/test/moon/12_b_00016.csv"
test_df = pd.read_csv(test_file)

test_df['proximity'] = [0] * len(test_df.index)

# Prepare the test data similar to how you prepared the training data
test_X = test_df[['proximity', 'velocity']].values

# Reshape X for CNN (samples, time_steps, features)
time_steps = 500  # Adjust according to your model's time step requirement
X_test = []
for i in range(len(test_X) - time_steps):
    X_test.append(test_X[i:i + time_steps])

X_test = np.array(X_test)

# Make predictions
predictions = model.predict(X_test)

# Define thresholds for significant predictions
high_threshold = 0.9
low_threshold = 0.1

# Filter predictions greater than 0.9 and less than 0.1
high_predictions = predictions[predictions.flatten() > high_threshold]
high_indices = np.where(predictions.flatten() > high_threshold)[0]

low_predictions = predictions[predictions.flatten() < low_threshold]
low_indices = np.where(predictions.flatten() < low_threshold)[0]

# Plot the results
plt.figure(figsize=(15, 5))

# Plot all predictions
plt.plot(test_df['time_rel'][time_steps:], predictions, label='All Predictions', alpha=0.3)

# Plot high predictions (> 0.9)
plt.scatter(test_df['time_rel'][high_indices + time_steps], high_predictions, 
            color='green', label='High Predictions (> 0.9)', zorder=5)

# Plot low predictions (< 0.1)
plt.scatter(test_df['time_rel'][low_indices + time_steps], low_predictions, 
            color='blue', label='Low Predictions (< 0.1)', zorder=5)

# Plot styling
plt.xlabel('Time (seconds)')
plt.ylabel('Prediction Probability')
plt.title('Seismic Event Predictions (High > 0.9, Low < 0.1)')
plt.legend()
plt.grid(True)

# Show plot
plt.show()

# Print out the times of significant high and low predictions
high_times = test_df['time_rel'][high_indices + time_steps]
low_times = test_df['time_rel'][low_indices + time_steps]

print("Times of high predictions (probability > 0.9):")
print(high_times)

print("\nTimes of low predictions (probability < 0.1):")
print(low_times)