## Imports

In [1]:
import csv
import obspy
from obspy import signal
import obspy.signal.filter
from tqdm import tqdm
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
import random
import optuna
from joblib import dump, load

## Reading Data

In [2]:
def get_event_list(path):
    events = []
    with open(path, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            events.append(row)
    
    return events

In [3]:
def make_utc_list(events):
    utc_times = []
    for _, _, date, oTime in events[1:]:
            utc_times.append(obspy.UTCDateTime(date+oTime))
    return utc_times

In [4]:
def get_clean_data(path, event_list):
    clean_data = []

    for year, day, date, oTime in tqdm(event_list[1:]):
        HHE = path+f"/HHE.D/HHE.D/MN.WDD..HHE.D.{year}.{day.zfill(3)}"
        HHN = path+f"/HHN.D/HHN.D/MN.WDD..HHN.D.{year}.{day.zfill(3)}"
        HHZ = path+f"/HHZ.D/HHZ.D/MN.WDD..HHZ.D.{year}.{day.zfill(3)}"

        st = obspy.read(HHE)
        st += obspy.read(HHN)
        st += obspy.read(HHZ)

        quakeTime = obspy.UTCDateTime(date+oTime)

        st = st.slice(starttime=quakeTime-30, endtime=quakeTime+30)

        filtered_data = obspy.signal.filter.highpass(st, freq=2.5, df=5.0001)

        st.data = filtered_data

        clean_data.append(st)
    
    return clean_data

In [6]:
def create_neg_examples(path ,utc_times):
    # Initialize an empty list to store the negative examples
    negative_examples = []

    # Loop to generate random negative examples
    while len(negative_examples) < 200:

        #add year if necessary
        ran_day = random.randint(1, 365)
        ran_hr = random.randint(0, 23)
        ran_min =random.randint(0, 59)
        ran_sec =random.randint(0, 59)
        year = 2010

        if ran_day == 97:
            continue
            
        # Generate a random UTC time on the selected day
        random_utc_time = obspy.UTCDateTime(year=2010, julday=ran_day, hour=ran_hr, minute=ran_min, second=ran_sec)

        flag = False

        for time in utc_times:
            #if the window being created is in an earthquake window
            if random_utc_time < time + 60 and random_utc_time > time - 60:
                flag = True

        if not flag:
            HHE = path+f"/HHE.D/HHE.D/MN.WDD..HHE.D.{year}.{str(ran_day).zfill(3)}"
            HHN = path+f"/HHN.D/HHN.D/MN.WDD..HHN.D.{year}.{str(ran_day).zfill(3)}"
            HHZ = path+f"/HHZ.D/HHZ.D/MN.WDD..HHZ.D.{year}.{str(ran_day).zfill(3)}"

            try:
                st = obspy.read(HHE)
                st += obspy.read(HHN)
                st += obspy.read(HHZ)

                st = st.slice(starttime=random_utc_time-30, endtime=random_utc_time+30)

                filtered_data = obspy.signal.filter.highpass(st, freq=2.5, df=5.0001)

                st.data = filtered_data

                negative_examples.append(st)
                
            except Exception as e:
                continue
            
    return negative_examples

In [7]:
events = get_event_list('../Data/DUMP/2010-WDD-events.csv')
earthquake_data = np.array(get_clean_data('../Data/DUMP', events))
earthquake_labels = np.asarray([1] * len(earthquake_data))

utc_times = make_utc_list(events)
neg_examples = create_neg_examples('../Data/DUMP', utc_times)
negative_labels = np.asarray([0] * len(neg_examples))

combined_data = np.concatenate((earthquake_data, neg_examples), axis=0)
combined_labels = np.concatenate((earthquake_labels, negative_labels), axis=0)

100%|██████████| 133/133 [01:42<00:00,  1.29it/s]
  x = np.asarray(x)


Dump Data

In [9]:
# Save the combined_data and combined_labels
dump(combined_data, 'combined_data.joblib')
dump(combined_labels, 'combined_labels.joblib')

['combined_labels.joblib']

Load Data

In [None]:
# Load the combined_data and combined_labels
combined_data = load('combined_data.joblib')
combined_labels = load('combined_labels.joblib')

In [20]:
X_train, X_test, y_train, y_test = train_test_split(combined_data, combined_labels, test_size=0.33)

## Model

### Original Attempt

In [64]:
# Create a sequential model
model = tf.keras.models.Sequential()

# Add an LSTM layer to the model
model.add(tf.keras.layers.LSTM(128, input_shape=(combined_data.shape[1], combined_data.shape[2])))

# Add a dense layer to the model
model.add(tf.keras.layers.Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [65]:
# Train the model
model.fit(X_train, y_train, batch_size=32, epochs=50)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)

# Print the test loss and test accuracy
print('Test loss:', test_loss)
print('Test accuracy:', test_accuracy)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 1.0082437992095947
Test accuracy: 0.581818163394928


### Tuning Hyperparameters

In [25]:
def objective(trial):
    # Define hyperparameters to optimize
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    lstm_units = trial.suggest_int('lstm_units', 64, 256)
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    batch_size = trial.suggest_int('batch_size', 32, 128)

    # Build the model with suggested hyperparameters
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.LSTM(units=lstm_units, input_shape=(combined_data.shape[1], combined_data.shape[2])))
    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.Dense(1))

    # Compile the model with the given learning rate
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model using the suggested batch size
    model.fit(X_train, y_train, batch_size=batch_size, epochs=10, verbose=0)

    # Evaluate the model
    score = model.evaluate(X_test, y_test)[0]
    
    return score  # Return the value to be minimized (e.g., loss)


In [26]:
study_name = "Test_Study"  # Set your desired study name here
study = optuna.create_study(direction='minimize', study_name=study_name)

study.optimize(objective, n_trials=10)

best_params = study.best_params
print("Best hyperparameters:", best_params)

[I 2023-11-16 01:16:20,474] A new study created in memory with name: Test_Study




[I 2023-11-16 01:16:32,698] Trial 0 finished with value: 5.62830924987793 and parameters: {'learning_rate': 1.4475850153527969e-05, 'lstm_units': 248, 'dropout_rate': 0.0773090416045254, 'batch_size': 66}. Best is trial 0 with value: 5.62830924987793.




[I 2023-11-16 01:16:45,889] Trial 1 finished with value: 1.3834307193756104 and parameters: {'learning_rate': 0.0026507787244878403, 'lstm_units': 169, 'dropout_rate': 0.46633017851636793, 'batch_size': 54}. Best is trial 1 with value: 1.3834307193756104.




[I 2023-11-16 01:16:57,047] Trial 2 finished with value: 5.507222652435303 and parameters: {'learning_rate': 0.000845049998835646, 'lstm_units': 206, 'dropout_rate': 0.39114338702341295, 'batch_size': 57}. Best is trial 1 with value: 1.3834307193756104.




[I 2023-11-16 01:17:06,654] Trial 3 finished with value: 0.8068524599075317 and parameters: {'learning_rate': 0.0034371624103921765, 'lstm_units': 67, 'dropout_rate': 0.09177375894111356, 'batch_size': 55}. Best is trial 3 with value: 0.8068524599075317.




[I 2023-11-16 01:17:15,556] Trial 4 finished with value: 5.360490322113037 and parameters: {'learning_rate': 0.0003374241022286559, 'lstm_units': 69, 'dropout_rate': 0.08738872007541898, 'batch_size': 65}. Best is trial 3 with value: 0.8068524599075317.




[I 2023-11-16 01:17:25,557] Trial 5 finished with value: 5.383359432220459 and parameters: {'learning_rate': 1.2545908781063253e-05, 'lstm_units': 234, 'dropout_rate': 0.48635744867725267, 'batch_size': 63}. Best is trial 3 with value: 0.8068524599075317.




[I 2023-11-16 01:17:34,413] Trial 6 finished with value: 2.192901134490967 and parameters: {'learning_rate': 1.70766074071885e-05, 'lstm_units': 82, 'dropout_rate': 0.4280681716337243, 'batch_size': 90}. Best is trial 3 with value: 0.8068524599075317.




[I 2023-11-16 01:17:44,172] Trial 7 finished with value: 0.6842015981674194 and parameters: {'learning_rate': 0.003278898629822333, 'lstm_units': 238, 'dropout_rate': 0.154814584858593, 'batch_size': 77}. Best is trial 7 with value: 0.6842015981674194.




[I 2023-11-16 01:17:57,239] Trial 8 finished with value: 0.8837992548942566 and parameters: {'learning_rate': 0.0035145938941457382, 'lstm_units': 245, 'dropout_rate': 0.31634550760498764, 'batch_size': 106}. Best is trial 7 with value: 0.6842015981674194.




[I 2023-11-16 01:18:07,251] Trial 9 finished with value: 5.626465797424316 and parameters: {'learning_rate': 0.004379174811786395, 'lstm_units': 127, 'dropout_rate': 0.4437439804858493, 'batch_size': 66}. Best is trial 7 with value: 0.6842015981674194.


Best hyperparameters: {'learning_rate': 0.003278898629822333, 'lstm_units': 238, 'dropout_rate': 0.154814584858593, 'batch_size': 77}


In [None]:
best_learning_rate = study.best_params['learning_rate']
best_lstm_units = study.best_params['lstm_units']
best_dropout_rate = study.best_params['dropout_rate']
batch = study.best_params['batch_size']


model = tf.keras.models.Sequential()
model.add(tf.keras.layers.LSTM(units=best_lstm_units, input_shape=(combined_data.shape[1], combined_data.shape[2])))
model.add(tf.keras.layers.Dropout(best_dropout_rate))
model.add(tf.keras.layers.Dense(1))

optimizer = tf.keras.optimizers.Adam(learning_rate=best_learning_rate)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Now train your model using these hyperparameters
model.fit(X_train, y_train, batch_size=batch, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2068051bf40>

In [None]:
events = get_event_list('../Data/2009/2009-WDD-events.csv')
print("Events")
earthquake_data = np.array(get_clean_data('../Data/2009', events))
print("Data")
earthquake_labels = np.asarray([1] * len(earthquake_data))

utc_times = make_utc_list(events)
neg_examples = create_neg_examples('../Data/2009', utc_times)
print("Negative")
negative_labels = np.asarray([0] * len(neg_examples))

combined_data = np.concatenate((earthquake_data, neg_examples), axis=0)
combined_labels = np.concatenate((earthquake_labels, negative_labels), axis=0)

Events


100%|██████████| 157/157 [03:16<00:00,  1.25s/it]


Data


In [None]:
# Save the combined_data and combined_labels
dump(combined_data, 'combined_data2009.joblib')
dump(combined_labels, 'combined_labels2009.joblib')

In [None]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(combined_data, combined_labels, test_size=0.33)

Testing