In [1]:
import os
import sys
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout
from tensorflow.keras.layers import GlobalMaxPooling2D, MaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import SGD
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn import preprocessing
import warnings
warnings.filterwarnings('ignore')

In [11]:
model_name="Models/LSTM/Binary/model1_Multicapa_win60_squaredhinge_callback200_monitor-val_accuracy_train2007-2013_validacion2014_testing2015.h5"#The name of the model that you want to test
is_multiclass=False

window=60#The window of the model tensor, in this case the tensor is a matrix with shape (60,12)


In [3]:
def calculate_dist(X1,X2,Y1,Y2):#This function calculate the euclidian dist between two points
  return ((X1-X2)**2+(Y1-Y2)**2)**(1/2)

def create_tensor(dataset,look_back):#This function works in pandas Data Frame
  dataX, dataY=[],[]
  for i in range(look_back,len(dataset)):
    matrix=[]
    current=dataset.iloc[i]
    for j in range(i-look_back,i):
      previous=dataset.iloc[j]
      dist=calculate_dist(current["long"],previous["long"],current["lat"],previous["lat"])#Euclidian dist 
      dt=current["time"]-previous["time"]#Time diff
      dm=current["magn1"]-previous["magn1"]#Magnitude diff
      matrix.append([current["lat"],current["long"],current["z"],current["magn1"],previous["lat"],previous["long"],previous["z"],previous["magn1"],previous["Label"],dist,dt,dm])#Tensor Shape
    dataX.append(matrix)
    dataY.append(current["Label"])#Label of the cluster
  return  np.array(dataX), np.array(dataY)

def labeler(df,multiclass=False):
    columns=["lat","long","z","magn1","time","date_time","cluster"]
    data=df[columns]
    df=df.sort_values(by="cluster",ascending=True)
    
    clusters=list(set(data.cluster.values))
    columns.append("Label")
    df_out=pd.DataFrame(columns=columns)
    for cluster in clusters:
        filter_by_cluster=data[data["cluster"]==cluster]
        
        max_mag=filter_by_cluster.sort_values(by="magn1",ascending=False)
        max_mag_index=list(max_mag.index.values)
        time=list(max_mag.time.values)
        time=time[0]
        max_mag_index=max_mag_index[0]#Index of the seismic eventi with the max magnitude in the cluster
        
        if(multiclass == False):#If you use a binary model the labels needs to be -1 and 1
            if(cluster==-1):
                label=[]
                for i in range(len(filter_by_cluster)):
                    label.append(-1)#We consider the noises points like a Mainshock without Aftershock
                filter_by_cluster["Label"]=label
                
            else:
                label=[]
                for j,row in filter_by_cluster.iterrows():
                    if(row.time<time):
                        label.append(-1)#Mainshock
                    elif(row.time==time):
                        label.append(-1)#Mainshock
                    elif(row.time>time):
                        label.append(1)#Aftershock
                filter_by_cluster["Label"]=label
                
        else:
            if(cluster==-1):
                label=[]
                for i in range(len(filter_by_cluster)):
                    label.append(1)#We consider the noises points like a Mainshock without Aftershock
                filter_by_cluster["Label"]=label
            else:
                label=[]
                for j,row in filter_by_cluster.iterrows():
                    if(row.time<time):
                        label.append(0)#Foreshock
                    elif(row.time==time):
                        label.append(1)#Mainshock
                    elif(row.time>time):
                        label.append(2)#Aftershock
                filter_by_cluster["Label"]=label
                filter_by_cluster["Label"]=label
                
        df_out=pd.concat([df_out,filter_by_cluster])
        df_out=df_out.sort_values(by="time")
    df=df_out[["lat","long","z","magn1","date_time","time","cluster","Label"]]
    return df

In [14]:
def from_dataframe_to_tensor(df_train,df_test,df_validation,window): 

    #Create Tensor 
    trainX,trainY=create_tensor(df_train,window)
    testX,testY=create_tensor(df_test,window)
    valX,valY=create_tensor(df_validation,window)
    
    
    #Normalize the matrix of inputs
    shape_tensor_trainX=trainX.shape
    trainX=trainX.reshape(trainX.shape[0]*trainX.shape[1], 12)

    shape_tensor_valX=valX.shape
    valX=valX.reshape(valX.shape[0]*valX.shape[1], 12)

    shape_tensor_testX=testX.shape
    testX=testX.reshape(testX.shape[0]*testX.shape[1], 12)
    
    #Aplly the same normalization rules to the train,val and test data
    scaler = preprocessing.StandardScaler().fit(trainX)
    trainX=scaler.transform(trainX)

    testX=scaler.transform(testX)
    valX=scaler.transform(valX)
    
    #Re-reshape to the tensor 3D shape
    trainX=trainX.reshape(shape_tensor_trainX)
    valX=valX.reshape(shape_tensor_valX)
    testX=testX.reshape(shape_tensor_testX)
    
    print("Normalization process successfully completed")
    input_shape = trainX.shape[1:]
    return trainX,trainY,testX,testY,valX,valY

In [5]:

#Preproseciong the dataframe
df1 = pd.read_csv("Test_2007_2017_Max_Epsilon_10.0_4_15.0_with_DBSCAN_and_K-Means.csv_with_DBSCAN_and_K-Means.csv",sep=",")#Mejor Constitucion 2010
df1['date_time'] = pd.to_datetime(df1['date_time'], format='%Y-%m-%d')
df1=labeler(df1,is_multiclass)

df2 = pd.read_csv("Test_2007_2017_Max_Epsilon_10.0_4_25.0_with_DBSCAN_and_K-Means.csv_with_DBSCAN_and_K-Means.csv",sep=",")#Mejor Iquique 2014
df2['date_time'] = pd.to_datetime(df2['date_time'], format='%Y-%m-%d')
df2=labeler(df2,is_multiclass)

df3 = pd.read_csv("Test_STDBSCAN_Manual_10.0_10.0_4.0_with_DBSCAN_and_K-Means.csv_with_DBSCAN_and_K-Means.csv",sep=",")#Mejor Coquimbo 2015
df3['date_time'] = pd.to_datetime(df3['date_time'], format='%Y-%m-%d')
df3=labeler(df3,is_multiclass)


df_train=df1[df1["date_time"]<"01-01-2014"]#From 2007-07-01 to 2013-12-31
df_train=df_train.sort_values(by="time")
df_train=df_train.drop(axis=1,columns=["date_time"])
print(df_train)


df_validation=df2[(df2["date_time"]>="01-01-2014")]#From 2024-01-01
df_validation=df_validation[df_validation["date_time"]<"01-01-2015"]#To 2024-12-31
df_validation=df_validation.sort_values(by="time")
df_validation=df_validation.drop(axis=1,columns=["date_time"])
print(df_validation)

df_test=df3[df3["date_time"]>="01-01-2015"]#From 2015-01-2015
df_test=df_test[df_test["date_time"]<"01-01-2016"]#To  2015-12-31
df_test=df_test.sort_values(by="time")
df_test=df_test.drop(axis=1,columns=["date_time"])
print(df_test)



          lat    long      z  magn1         time cluster Label
0     -31.974 -71.157   55.0    2.2  2557.093631      -1    -1
1     -30.682 -71.466   29.8    2.6  2557.106361      -1    -1
2     -30.314 -71.481   44.2    3.2  2557.133398      -1    -1
3     -28.811 -71.286   75.3    4.1  2557.379895      -1    -1
4     -27.144 -71.425   46.8    3.4  2557.613781      -1    -1
...       ...     ...    ...    ...          ...     ...   ...
35825 -21.160 -68.907  115.0    2.7  5113.430706     519     1
35826 -19.738 -69.223   99.4    3.7  5113.449931      -1    -1
35827 -21.044 -68.419  178.7    3.8  5113.473206      -1    -1
35828 -23.940 -67.447  226.5    3.4  5113.817685     564     1
35829 -22.313 -68.658  128.1    3.5  5113.973356      -1    -1

[35830 rows x 7 columns]
          lat    long      z  magn1         time cluster Label
35830 -31.385 -69.557  128.9    3.3  5114.216528      -1    -1
35831 -28.617 -71.199   69.8    3.8  5114.242280      -1    -1
35832 -30.412 -71.270   37.6 

In [15]:
print("Processing this could take a few minutes...")
trainX,trainY,testX,testY,valX,valY=from_dataframe_to_tensor(df_train,df_test,df_validation,window)

Processing this could take a few minutes...


In [16]:
model =  keras.models.load_model(model_name)
print(f'Model Loaded window of: {window}"!')
yhat = model.predict(testX, verbose=0)    
if(is_multiclass==False):
    list_round_values=np.where(yhat>0.5,1,-1)#Round the values greater-than 0.5 to 1
    y_true = list(df_test.iloc[window:]["Label"].values)
    y_pred = list_round_values
    confusion_matrix(y_true, y_pred)
    target_names = ['No replica', 'Replica']
    print(classification_report(y_true, y_pred, target_names=target_names))
    print("Confusion Matrix")
    print(confusion_matrix(y_true, y_pred, labels=[-1,1]))
else:
    pred=[]
    for foreshock,mainshock,aftershock in yhat:
        if(foreshock>mainshock and foreshock >aftershock):
            pred.append(0)
        elif(mainshock>foreshock and mainshock >aftershock):
            pred.append(1)
        else:
            pred.append(2)
    y_true = list(df_test.iloc[window:]["Label"].values)
    y_pred = list(pred)
    confusion_matrix(y_true, y_pred)
    target_names = ['Precursor',"Principal", 'Replica']
    print(classification_report(y_true, y_pred, target_names=target_names))
    
    print("Confusion Matrix")
    print(confusion_matrix(y_true, y_pred, labels=[0,1, 2]))

print("Finish ...")


Model Loaded window of: 60"!
              precision    recall  f1-score   support

  No replica       0.79      0.98      0.88      4433
     Replica       0.92      0.45      0.60      2088

    accuracy                           0.81      6521
   macro avg       0.85      0.72      0.74      6521
weighted avg       0.83      0.81      0.79      6521

Confusion Matrix
[[4347   86]
 [1148  940]]
Finish ...
