In [3]:
import numpy as np
import obspy
import scipy
from obspy import read
import matplotlib.pyplot as plt 
import time
import pandas as pd

from __future__ import print_function
import tensorflow as tf
import keras
from keras.layers import add, Reshape, Dense,Input, TimeDistributed, Dropout, Activation, LSTM, Conv2D, Bidirectional, BatchNormalization 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping,ReduceLROnPlateau
from keras.regularizers import l1
from keras import backend as K
from keras.models import Model
from sklearn.model_selection import train_test_split
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
from scipy import signal
import os
np.seterr(divide='ignore', invalid='ignore')
import h5py
from obspy.signal.trigger import trigger_onset
np.warnings.filterwarnings('ignore')

## Grillo Data

In [4]:
s1 = read("data/signal/1512114894_015_P.mseed")
print(len(s1))

3


In [5]:
s1

3 Trace(s) in Stream:
.015..BHX | 2017-12-01T07:54:54.001000Z - 2017-12-01T07:55:57.969000Z | 31.2 Hz, 2000 samples
.015..BHY | 2017-12-01T07:54:54.001000Z - 2017-12-01T07:55:57.969000Z | 31.2 Hz, 2000 samples
.015..BHZ | 2017-12-01T07:54:54.001000Z - 2017-12-01T07:55:57.969000Z | 31.2 Hz, 2000 samples

In [6]:
s1.plot(outfile="signal1.png")

In [7]:
tr_s1 = s1[1]
trace_data = tr_s1.data
print(type(trace_data))
print(trace_data)

<class 'numpy.ndarray'>
[ 0.          0.01803802 -0.04592396 ...,  0.02292396  0.01096198
  0.        ]


In [8]:
len(trace_data)

2000

### Detrend

In [9]:
s = s1.detrend("demean")

In [10]:
s[1].data

array([ 0.00228925,  0.02032727, -0.04363471, ...,  0.02521321,
        0.01325123,  0.00228925])

In [11]:
len(s[1].data)

2000

### Bandpass filter between 1 and 45Hz

In [12]:
s = s.filter("bandpass", freqmin=1, freqmax=45)

In [13]:
s[1].data

array([ 0.00175945,  0.0146993 , -0.0424117 , ...,  0.00657167,
       -0.00272553, -0.0068781 ])

In [14]:
len(s[1].data)

2000

### Resampled at 100 HZ

In [15]:
s = s.resample(100)

In [16]:
s[1].data

array([ 0.00283503,  0.0039782 ,  0.00282766, ..., -0.003435  ,
       -0.00170013,  0.00060302])

In [17]:
len(s[1].data)

6400

### Normalize

In [18]:
s = s.normalize()

In [19]:
s[1].data

array([ 0.00117299,  0.00164597,  0.00116994, ..., -0.00142122,
       -0.00070343,  0.0002495 ])

In [20]:
len(s[1].data)

6400

In [21]:
s.plot(outfile="tf.png")

### STFT

In [22]:
f, t, Zxx = signal.stft(s, fs=100,nperseg=80)
print(Zxx.shape)

(3, 41, 161)


In [23]:
#Zxx = np.abs(Zxx)
#Zxx = Zxx.T
Zxx.shape

(3, 41, 161)

In [24]:
max(f),min(f)

(50.0, 0.0)

In [25]:
max(t)-min(t)

64.0

In [26]:
fig, ax = plt.subplots()
#f, t, Zxx = signal.stft(sig, fs, window='hann', nperseg=1024)
ind=np.argsort(f) # returns the indices of the sorted f vector
f=f[ind]
Zxx=Zxx[:,ind,:]
plt.pcolormesh(t, f, np.abs(Zxx[0,:,:]*np.conj(Zxx[0,:,:])), shading='gouraud')
plt.plot(outfile="stft.png")

[]

## Code

In [162]:
noise_filename = pd.read_csv('noise.csv', header = None)
noise_stream = []
for i in noise_filename[0]:
    temp = obspy.read("data/noise/" + i)
    noise_stream.append(temp)
    
signal_filename = pd.read_csv('signal.csv', header = None)
signal_stream = []
for i in signal_filename[0]:
    temp = obspy.read("data/signal/" + i)
    signal_stream.append(temp)

In [163]:
def slice_stream(st):
    start_time = st[0].stats.starttime
    end_time = st[0].stats.endtime
    gap = (end_time - start_time)/4
    st = st.slice(start_time + gap, end_time - gap)
    return st

In [164]:
noise_stream = list(map(slice_stream,noise_stream))
signal_stream = list(map(slice_stream,signal_stream))

In [165]:
noise_stream = [n.filter("bandpass", freqmin=1, freqmax=45) for n in noise_stream]
noise_stream = [n.resample(100) for n in noise_stream]
noise_stream = [n.detrend("demean") for n in noise_stream]
noise_stream = [n.normalize() for n in noise_stream]

In [166]:
signal_stream = [n.filter("bandpass", freqmin=1, freqmax=45) for n in signal_stream]
signal_stream = [n.resample(100) for n in signal_stream]
signal_stream = [n.detrend("demean") for n in signal_stream]
signal_stream = [n.normalize() for n in signal_stream]

In [167]:
len(noise_stream), len(signal_stream)

(17850, 1223)

In [168]:
signal_stream[0].plot(outfile="signal11.png")

In [169]:

## the time lap of the data is about 32 seconds and we would like to set the value appox. 0.5 sec before p-wave arrival
## and approx. 3 sec after p-wave arrival to be 1 and others be 0

'''
noise_y = np.zeros(shape = (noise_length, 62, 1))

signal_y_truth = np.append(np.append(np.zeros(shape=(31,1)), np.ones(shape=(6,1)),axis=0), np.zeros(shape=(25,1)),axis=0)
signal_y = np.zeros(shape = (signal_length, 62, 1))
for i in range(signal_length):
    signal_y[i] = signal_y_truth
'''



noise_length = len(noise_stream)


noise_y = np.zeros(shape = (noise_length, 62, 1))

signal_length = len(signal_stream)
signal_y_truth = np.append(np.zeros(shape=(31,1)), np.ones(shape=(31,1)),axis=0)
signal_y = np.zeros(shape = (signal_length, 62, 1))
for i in range(signal_length):
    signal_y[i] = signal_y_truth
    

In [170]:
f_signal = []
t_signal = []
Zxx_signal = np.zeros(shape = (signal_length, 248, 13, 3))

for i in range(signal_length):
    f, t, Zxx = signal.stft(signal_stream[i], fs = 100, nperseg=25)
    f_signal.append(f)
    t_signal.append(t)
    Zxx_signal[i] = np.abs(Zxx).T

f_noise = []
t_noise = []
Zxx_noise = np.zeros(shape = (noise_length, 248, 13, 3))

for i in range(noise_length):
    f, t, Zxx = signal.stft(noise_stream[i], fs = 100, nperseg=25)
    f_noise.append(f)
    t_noise.append(t)
    Zxx_noise[i] = np.abs(Zxx).T

In [171]:
y = np.append(noise_y, signal_y, axis = 0)
X_Zxx = np.append(Zxx_noise, Zxx_signal, axis = 0)

In [172]:
print(y.shape)
print(X_Zxx.shape)

(19073, 62, 1)
(19073, 248, 13, 3)


## Model CRED

In [173]:
def lr_schedule(epoch):
    """
    Learning rate is scheduled to be reduced after 40, 60, 80, 90 epochs.
    """
    lr = 1e-2
    if epoch > 60:
        lr *= 0.5e-3
    elif epoch > 40:
        lr *= 1e-3
    elif epoch > 20:
        lr *= 1e-2
    elif epoch > 10:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr

In [174]:
def block_CNN(filters, ker, inpC): 
    """
    Returns CNN residual blocks
    """
    layer_1 = BatchNormalization()(inpC) 
    act_1 = Activation('relu')(layer_1) 

    conv_1 = Conv2D(filters, (ker-2, ker-2), padding = 'same')(act_1) 
    
    layer_2 = BatchNormalization()(conv_1) 
    act_2 = Activation('relu')(layer_2) 
  
    conv_2 = Conv2D(filters, (ker-2, ker-2), padding = 'same')(act_2) 
    return(conv_2) 

In [175]:
def block_BiLSTM(inpR, filters, rnn_depth):
    """
    Returns LSTM residual blocks
    """
    x = inpR
    for i in range(rnn_depth):
        x_rnn = Bidirectional(LSTM(filters, return_sequences=True))(x)
        x_rnn = Dropout(0.7)(x_rnn)
        if i > 0 :
            x = add([x, x_rnn])
        else:
            x = x_rnn      
    return x
     

In [176]:
def model_cred(shape, filters):
    
    inp = Input(shape=shape, name='input')

    conv2D_2 = Conv2D(filters[0], (9,9), strides = (2,2), padding = 'same', activation = 'relu')(inp) 
    res_conv_2 = keras.layers.add([block_CNN(filters[0], 9, conv2D_2), conv2D_2]) 

    conv2D_3 = Conv2D(filters[1], (5,5), strides = (2,2), padding = 'same', activation = 'relu')(res_conv_2) 
    res_conv_3 = keras.layers.add([block_CNN(filters[1], 5, conv2D_3),conv2D_3]) 
    
    shape = K.int_shape(res_conv_3)   
    reshaped = Reshape((shape[1], shape[2]*shape[3]))(res_conv_3)
    
    res_BIlstm = block_BiLSTM(reshaped, filters = filters[3], rnn_depth = 2)
 
    UNIlstm = LSTM(filters[3], return_sequences=True)(res_BIlstm)
    UNIlstm = Dropout(0.8)(UNIlstm)  
    UNIlstm = BatchNormalization()(UNIlstm)
   
    dense_2 = TimeDistributed(Dense(filters[3], kernel_regularizer=l1(0.01), activation='relu'))(UNIlstm)
    dense_2 = BatchNormalization()(dense_2)
    dense_2 = Dropout(0.8)(dense_2)
    
    dense_3 = TimeDistributed(Dense(1, kernel_regularizer=l1(0.01), activation='sigmoid'))(dense_2)

    out_model = Model(inputs=inp, outputs=dense_3)
    return out_model  

In [177]:
early_stopping_monitor = EarlyStopping(patience=5)
    
lr_scheduler = LearningRateScheduler(lr_schedule)
    
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                                   cooldown=0,
                                   patience=3,
                                   min_lr=0.5e-6)

In [178]:
model = model_cred((248, 13, 3), filters = [8, 16, 32, 64, 128, 256])
model.compile(loss='binary_crossentropy',
                  optimizer=tf.optimizers.Adam(lr=lr_schedule(0)),
                  metrics=['binary_accuracy'])

model.summary()

Learning rate:  0.01
Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 248, 13, 3)  0           []                               
                                ]                                                                 
                                                                                                  
 conv2d_36 (Conv2D)             (None, 124, 7, 8)    1952        ['input[0][0]']                  
                                                                                                  
 batch_normalization_36 (BatchN  (None, 124, 7, 8)   32          ['conv2d_36[0][0]']              
 ormalization)                                                                                    
                                                                       

In [179]:
seed_value = 15
tf.random.set_seed(seed_value)
np.random.seed(seed_value)

In [180]:
X, X_test, y, y_test = train_test_split(X_Zxx, y, test_size=0.1, random_state=15,shuffle=True)

In [181]:
checkpointer = ModelCheckpoint(filepath='model_best.hdf5',
                                       monitor='val_loss', verbose=0, mode='auto', save_best_only=True)
start_time = time.time()  
history = model.fit(
    X,
    y,
    epochs=200,
    batch_size=500,
    verbose=0,
    validation_split = 0.2,
    max_queue_size=5,
    callbacks = [checkpointer, lr_reducer, lr_scheduler, early_stopping_monitor] )
end_time = time.time()

Learning rate:  0.01
Learning rate:  0.01
Learning rate:  0.01
Learning rate:  0.01
Learning rate:  0.01
Learning rate:  0.01
Learning rate:  0.01
Learning rate:  0.01
Learning rate:  0.01
Learning rate:  0.01
Learning rate:  0.01
Learning rate:  0.001
Learning rate:  0.001
Learning rate:  0.001
Learning rate:  0.001
Learning rate:  0.001
Learning rate:  0.001
Learning rate:  0.001
Learning rate:  0.001
Learning rate:  0.001
Learning rate:  0.001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  0.0001
Learning rate:  1e-05
Learning rate:  1e-05
Learning rate:  1e-05
Learning rate:  1e-05
L

In [182]:
history.history['val_binary_accuracy']

[0.9711622595787048,
 0.9711622595787048,
 0.9711622595787048,
 0.9711622595787048,
 0.9711622595787048,
 0.9711622595787048,
 0.9711622595787048,
 0.9711622595787048,
 0.9711622595787048,
 0.9791962504386902,
 0.9907961487770081,
 0.9711622595787048,
 0.9711622595787048,
 0.9711622595787048,
 0.9943339228630066,
 0.9949212074279785,
 0.995611846446991,
 0.9952829480171204,
 0.9963870644569397,
 0.9970307350158691,
 0.9966501593589783,
 0.9971246719360352,
 0.9969978332519531,
 0.9973078966140747,
 0.9975287318229675,
 0.9979609847068787,
 0.9980878233909607,
 0.9981113076210022,
 0.9978764057159424,
 0.9980784058570862,
 0.9980972409248352,
 0.9979609847068787,
 0.9979703426361084,
 0.9978482127189636,
 0.9981018900871277,
 0.998139500617981,
 0.9977777600288391,
 0.9981489181518555,
 0.998172402381897,
 0.998299241065979,
 0.9981771111488342,
 0.9980408549308777,
 0.9980032444000244,
 0.9980361461639404,
 0.9980831146240234,
 0.9980502128601074,
 0.998092532157898,
 0.998106598854064

In [185]:
model.load_weights('model_best.hdf5')
#y = model.predict(Zxx)
model.evaluate(X_test, y_test)



[0.04807717353105545, 0.9987996220588684]

In [186]:
y_pred = model.predict(X_test)

In [135]:
len(y_test[200])

62

In [188]:
y_pred[201],y_test[201]

(array([[ 0.00442594],
        [ 0.00448269],
        [ 0.00449219],
        [ 0.00449893],
        [ 0.00450167],
        [ 0.00450271],
        [ 0.00450337],
        [ 0.004504  ],
        [ 0.00450242],
        [ 0.00449938],
        [ 0.00449967],
        [ 0.00450116],
        [ 0.00450382],
        [ 0.00450253],
        [ 0.00450242],
        [ 0.00450268],
        [ 0.00450295],
        [ 0.00450346],
        [ 0.00450394],
        [ 0.0045042 ],
        [ 0.00450438],
        [ 0.00450379],
        [ 0.0045048 ],
        [ 0.00450471],
        [ 0.00450444],
        [ 0.00450358],
        [ 0.00450453],
        [ 0.00450486],
        [ 0.00450471],
        [ 0.00450459],
        [ 0.00450468],
        [ 0.00450483],
        [ 0.00450492],
        [ 0.00450402],
        [ 0.004504  ],
        [ 0.00450426],
        [ 0.00450468],
        [ 0.0045054 ],
        [ 0.00450516],
        [ 0.00450468],
        [ 0.00450414],
        [ 0.00450346],
        [ 0.0045037 ],
        [ 0