# Training of Classification Neural Network



We trained a classification neural network that is able to distinguish velocity structures with 2 - 7 layers.  
Use Love and Rayleigh dispersion curves together with their uncertainty vectors as input data.

In [None]:
#import packages
import matplotlib.pyplot as plt
import math
import numpy as np
import scipy.stats as stats
from sklearn.preprocessing import StandardScaler
from pickle import dump

import tensorflow as tf
from tensorflow.keras.models import Model 
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras import optimizers
import keras
import tensorflow.keras.utils
from tensorflow.keras.layers import Activation
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras import backend as k
from tensorflow_probability import distributions as tfd
from keras.utils.vis_utils import plot_model

### Set-up of neural network

In [None]:
### X_train - training data corresponding to network inputs eg. dispersion curve data
### ytrain - training data corresponding to network outputs eg. 1D velocity model

x_size=100 # size of input data, dispersion curve sampled logarithmically with 100 points from 1-20 Hz
y_size = 6 # size of output data, here 6 different layer numbers (structures with 2 - 7 layers)

In [None]:
### Network configuration ###

inp1 = Input((x_size,))  #input love dispersion curve
hidden_1 = Dense(418,activation=k.relu)(inp1)
hidden_1_2 = Dense(716,activation=k.relu)(hidden_1)
drop_1 = Dropout(0.05)(hidden_1_2)

inp2=Input((x_size,))  #input love uncertainty vector
hidden_2=Dense(369,activation=k.relu)(inp2)
hidden_2_2=Dense(769,activation=k.relu)(hidden_2)
drop_2=Dropout(0.48)(hidden_2_2)

inp3 = Input((x_size,))  #input dispersion curve Rayleigh
hidden_3 = Dense(117,activation=k.relu)(inp3)
hidden_3_2 = Dense(396,activation=k.relu)(hidden_3)
drop_3 = Dropout(0.76)(hidden_3_2)

inp4=Input((x_size,))  #input uncertainty vector Rayleigh
hidden_4=Dense(216,activation=k.relu)(inp4)
hidden_4_2=Dense(828,activation=k.relu)(hidden_4)
drop_4=Dropout(0.69)(hidden_4_2)

merged1=tf.keras.layers.concatenate([drop_1,drop_2]) #merge layers
hidden_5=Dense(879,activation=k.relu)(merged1)
drop_5 = Dropout(0.78)(hidden_5)

merged2=tf.keras.layers.concatenate([drop_3,drop_4])
hidden_6=Dense(832,activation=k.relu)(merged2)
drop_6 = Dropout(0.04)(hidden_6)

merged3=tf.keras.layers.concatenate([drop_5,drop_6])
hidden_7=Dense(332,activation=k.relu)(merged3)
hidden_7_2=Dense(845,activation=k.relu)(hidden_7)
drop_7 = Dropout(0.99)(hidden_7_2)


output = Dense(y_size,activation='softmax')(drop_7)

model = Model(inputs=[inp1,inp2,inp3,inp4], outputs=output) 

### Set optimizer ###
adam = tf.keras.optimizers.Adam(learning_rate=0.001,decay=0.0, amsgrad=True)

mse = tf.keras.losses.MeanSquaredError()

### Compile model ###
model.compile(loss='sparse_categorical_crossentropy',optimizer=adam,metrics=['accuracy']) 

In [None]:
print(model.summary())

In [None]:
#plot network
tf.keras.utils.plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
#define callbacks
NAN = tensorflow.keras.callbacks.TerminateOnNaN() # If the training loss produces Nans, stop the training

### Load and preprocess training data

Training data should consist of a mix of dispersion curves forward modelled from structures with different layer numbers (here 2 - 7 layer structures).  
Dispersion curves and uncertainty vectors should be logarithmically resampled with 100 samples between 1 - 20 Hz corresponding to the frequency:
#### freq=np.logspace(0,1.3,100,base=10)   
The phase velocity and uncertainty should be in km/s !!     
Layer number is an intiger value.

In [None]:
#load data from .npy array; insert your data here
input_disp_lov=np.load("love_disp.npy") #input Love Dispersion
input_disp_ray=np.load("ray_disp.npy") #input Rayleigh Dispersion
input_un_lov=np.load("love_un.npy") #input Love Uncertainty
input_un_ray=np.load("ray_un.npy") #input Rayleigh Uncertainty
output_num=np.load("layer_number.npy") #output layer number

print(input_disp_lov.shape) #shape should be: (number_training_models, 100)
print(input_disp_ray.shape)
print(input_un_lov.shape)
print(input_un_ray.shape)
print(output_num.shape)

In [None]:
#rescale layer number between 0 and 5 (before it was 2 to 7)
new_output_num=[]
for n in range(len(output_num)):
    val=output_num[n]-2
    new_output_num.append(val)
    
print(np.shape(new_output_num))

In [None]:
#plot histogram of layer numbers
plt.hist(new_output_num,bins=6)
plt.show()

In [None]:
#preprocessing
#scale data 

scaler1 = StandardScaler() #for Love dispersion
scaler2 = StandardScaler() #for Love uncertainty
scaler3 = StandardScaler() #for Rayleigh dispersion
scaler4 = StandardScaler() #for Rayleigh uncertainty

input_disp_lov_scal = scaler1.fit_transform(input_disp_lov)
input_un_lov_scal = scaler2.fit_transform(input_un_lov)
input_disp_ray_scal = scaler3.fit_transform(input_disp_ray)
input_un_ray_scal = scaler4.fit_transform(input_un_ray)

#output doesn´t have to be scaled

In [None]:
# save the scaler
dump(scaler1, open('./scaler1.pkl', 'wb'))
dump(scaler2, open('./scaler2.pkl', 'wb'))
dump(scaler3, open('./scaler3.pkl', 'wb'))
dump(scaler4, open('./scaler4.pkl', 'wb'))

In [None]:
#split into training and test set

training_disp_lov=input_disp_lov_scal[:560000] #range depends on number of training data
testing_disp_lov=input_disp_lov_scal[560000:]

training_un_lov=input_un_lov_scal[:560000]
testing_un_lov=input_un_lov_scal[560000:]

training_disp_ray=input_disp_ray_scal[:560000]
testing_disp_ray=input_disp_ray_scal[560000:]

training_un_ray=input_un_ray_scal[:560000]
testing_un_ray=input_un_ray_scal[560000:]

training_num=new_output_num[:560000]
testing_num=new_output_num[560000:]

print(np.shape(training_disp_lov))
print(np.shape(testing_disp_lov))
print(np.shape(training_un_lov))
print(np.shape(testing_un_lov))
print(np.shape(training_disp_ray))
print(np.shape(testing_disp_ray))
print(np.shape(training_un_ray))
print(np.shape(testing_un_ray))
print(np.shape(training_num))
print(np.shape(testing_num))

### Train model  
Number of epochs and batch size can be adapted

In [None]:
#train model
results=model.fit([training_disp_lov,training_un_lov,training_disp_ray,training_un_ray], training_num,batch_size=128, epochs=100,verbose=1, shuffle=True,validation_split=0.1,callbacks=[NAN])



In [None]:
#save model
model.save('./model.h5')


In [None]:
#evaluate model
loss_and_metrics = model.evaluate([testing_disp_lov,testing_un_lov,testing_disp_ray,testing_un_ray], testing_num)


In [None]:
print(loss_and_metrics)

In [None]:
# Loss Curves
fig=plt.figure(figsize=[8,6])

ax=fig.add_subplot(111)
ax.plot(results.history['loss'],'r',linewidth=3.0)
ax.plot(results.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss', 'Validation Loss'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Loss',fontsize=16)
plt.title('Loss Curves',fontsize=16)
plt.show()

### Evaluate performance  
Use previously unseen input data

In [None]:
#test trained model on unseen data; insert your data here
unseen_disp_lov=np.load("love_disp_unseen.npy")
unseen_un_lov=np.load("love_un_unseen.npy")
unseen_disp_ray=np.load("ray_disp_unseen.npy")
unseen_un_ray=np.load("ray_un_unseen.npy")
unseen_num=np.load("layer_num_unseen.npy")

print(np.shape(unseen_disp_lov))

In [None]:
#scale data
unseen_disp_lov_scal=scaler1.transform(unseen_disp_lov)
unseen_un_lov_scal=scaler2.transform(unseen_un_lov)
unseen_disp_ray_scal=scaler3.transform(unseen_disp_ray)
unseen_un_ray_scal=scaler4.transform(unseen_un_ray)

In [None]:
#predict velocities
predictions = model.predict([unseen_disp_lov_scal,unseen_un_lov_scal,unseen_disp_ray_scal,unseen_un_ray_scal])

In [None]:
print((predictions.shape)) 

In [None]:
#plot output for one structure
x=np.linspace(2,7,6) #range of layer numbers
plt.plot(x,predictions[0])
plt.xlabel("Number of Layers")
plt.show()

In [None]:
#predicted layer number for one structure:
np.argmax(predictions[0])

In [None]:
#compute error of prediction
num_error=[]
for n in range(len(predictions)):
    index_max = np.argmax(predictions[n])
    err=index_max-unseen_num[n]
    num_error.append(err)

In [None]:
#plot errors 
fig=plt.figure(figsize=(6,6))

ax=fig.add_subplot(1,1,1)
ax.hist(num_error, bins=100)
plt.ylabel("Number of models")
plt.xlabel("Number of layers")

    
plt.show()

In [None]:
#Plot pearson correlation to visualize error

line=np.linspace(0,8,50)

fig=plt.figure(figsize=(5,5))

ax=fig.add_subplot(1,1,1)
x=[]
for i in range(len(predictions)):
    index_max = np.argmax(predictions[i])+2 #+2 to scale data back to original range of 2 - 7 layers
    x.append(index_max)

y=[]
for i in range(len(unseen_num)):
    y.append(unseen_num[i])

        
corr =stats.pearsonr(x, y)
Z, xax,yax=np.histogram2d(x,y,bins=50,range=[[0,9],[0,9]])
    
ax.pcolormesh(xax, yax, Z.T, cmap="Greys")
plt.plot(line,line, color="red")
plt.xlabel("Predicted num layers",fontsize=12)
plt.ylabel("True num layers",fontsize=12)
plt.title("corr {}".format("%0.3f" % (corr[0])))
    
plt.show()