# Train and evaluate CNN models



Author: Akash Kharita

Date: 02/28/2024


Import modules

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import h5py
import obspy
from tqdm import tqdm
from glob import glob
import random
import sys
from datetime import datetime

from scipy import stats
from scipy import signal
from sklearn.preprocessing import StandardScaler

from obspy.geodetics.base import gps2dist_azimuth

from datetime import timedelta
import time

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset

from scipy.signal import resample
# from zenodo_get import zenodo_get

## Importing all the models

In [None]:

from design_CNN_models import Archtime
# from design_CNN_models import Archtime_do
# from design_CNN_models import WaveDecompNet
# from design_CNN_models import WaveDecompNet_do
# from design_CNN_models import SeismicCNN_batch
# from design_CNN_models import SeismicCNN_batch_do
# from design_CNN_models import SeismicNet
# from design_CNN_models import SeismicNet_do


# from neural_network_processing_functions import extract_datasets
# from neural_network_processing_functions import train_model
# from neural_network_processing_functions import plot_train_val_loss
# from neural_network_processing_functions import plot_accuracy
# from neural_network_processing_functions import extract_datasets_for_test
# from neural_network_processing_functions import train_model_for_test
# from neural_network_processing_functions import test_model

In [None]:

    
# Arch-time from Deepquake paper. 
# it originally takes 2000 samples as input. 

# defining a monolithic and fat CNN

class Archtime(nn.Module):
    def __init__(self, num_classes=4, num_channels = 1, num_features = 5000):
        super(Archtime, self).__init__()
        
        self.num_features = num_features
        self.num_channels = num_channels
        # Define the layers of the CNN architecture
        self.conv1 = nn.Conv1d(in_channels= num_channels, out_channels=64, kernel_size= 10, stride = 4, padding = 0)       
        self.conv2 = nn.Conv1d(in_channels = 64, out_channels = 64, kernel_size = 10, stride = 2)
        self.flatten = nn.Flatten() 
        self.num_features_after_conv = self.calculate_num_features_after_conv() # automatically calculate the number of features
            
        self.fc1 = nn.Linear(self.num_features_after_conv, 32)
        
        self.fc2 = nn.Linear(32,4)
        self.softmax = nn.Softmax(dim = 1)
        
        
    def forward(self, x):
        
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        #x = self.softmax(self.fc2(x))

        return x

    # Lets define a function to visualize the activation as well. 
    
    
    
    def forward_conv(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv2(x))
        x = self.flatten(x)
            
        return x
    
    def calculate_num_features_after_conv(self):
        # Dummy input to calculate the number of features after convolutional layers
        dummy_input = torch.randn(1, self.num_channels, self. num_features)  # Adjust the size based on your input size
        with torch.no_grad():
            conv_output = self.forward_conv(dummy_input)
        num_features_after_conv = conv_output.view(1, -1).size(1)
        return num_features_after_conv   
    
    
    

In [None]:

from torch.utils.data import Dataset
class PNWDataSet(Dataset): # create custom dataset
    def __init__(self, data,labels): # initialize
        self.data = data 
        self.labels = labels 

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        sample_data = self.data[index]
        sample_labels = self.labels[index]
        return torch.Tensor(sample_data),(sample_labels) # return data as a tensor

In [None]:
# we will use a default 3000 waveforms per class.
# there are 4 classes.
# we will do 80-10-10 split.

def prepare_datasets(data_noise="/data/whd01/yiyu_data/PNWML/noise_waveforms.hdf5", medata_noise="/data/whd01/yiyu_data/PNWML/noise_metadata.csv", 
                     data_comcat=  "/data/whd01/yiyu_data/PNWML/comcat_waveforms.hdf5", metadata_comcat="/data/whd01/yiyu_data/PNWML/comcat_metadata.csv", data_exotic="/data/whd01/yiyu_data/PNWML/exotic_waveforms.hdf5",
                     metadata_exotic="/data/whd01/yiyu_data/PNWML/exotic_metadata.csv",
                     before = 5000, after = 10000, num_samples = 3000, batch_size = 32, 
                     num_channels = 3, train_split = 80, val_split=10,test_split = 10, num_features = 5000, 
                     shifting = True, all_samples = True):

    
    """
    This is a function to extract train, test and validation dataset in tensor format as required by Pytorch
    Here is a description of the parameters: - 
    
    Parameters
    -----------
    
    before: if shifting is not true, samples will be extracted (P-50), 
    where P refers to the starttime of the P/pick time of the event.
    The shifting helps in generalizability of the model. The samples will be picked randomly from
    (P-20, P-5)    
    after: if shifting is not true, samples will be extracted (P+100)    
    num_samples: number of samples per classs to extract    
    batch size: batch size of the samples that would be loaded in one iteration from the dataloader    
    num_channels: 1, Currently just using the Z component, but we can use multiple channels. 
    train_size: its the number of elements (per class) in the training dataset on the first split.  (splitting the dataset into train and temp)    
    test_size: its the number of elements (per class) in the testing dataset on the second split. (splitting the temp further into test and val)
    num_features: The number of features or window length.     
    shifting: If true, the samples will be extracted randomly from P-5, P-20s    
    all_samples: if true, all the samples will be loaded in each class
    
    
    
    Returns
    -------
    train_dataset: the dataset containing the examples on which the model was trained. 
    It contains the features and corresponding samples, the size of the training dataset would be  4*train_size
    
    train_dataloader: The dataloader is required when training the model, taking a batch of the samples at a given time. 
    
    y_train: the training labels,  
    
    test_dataset, test_dataloader, y_test: self explanatory, the size of test will be determined by the 4*test_size parameter. 
    
    val_dataset, val_dataloader, y_val: self explanatory, the size of validation set would be determined as 
      (total_samples - 4*train_size - 4*test_size)
     
    """
    
        
    noise_metadata = pd.read_csv(metadata_noise)
    noise_metadata['event_id'] = [noise_metadata['trace_start_time'][i]+'_noise' for i in range(len(noise_metadata))]

    # accessing the data files
    comcat_metadata = pd.read_csv(metadata_comcat)

    # accessing the data files
    exotic_metadata = pd.read_csv(metadata_exotic)
    
    cat_exp = comcat_metadata[comcat_metadata['source_type'] == 'explosion']
    cat_eq = comcat_metadata[comcat_metadata['source_type'] == 'earthquake']
    cat_su = exotic_metadata[exotic_metadata['source_type'] == 'surface event']
    
    
    #extract wavefpr,s
    ## So in the below I am taking a 50s window which starts anywhere randomly from (P-20, P-5) - 
    ## a is a list of obspy traces, b is a list of eventid
    
    a_noise, b_noise = extract_waveforms(noise_metadata, data_noise, num_features = num_features, start = 5000, before = before, after = after, number_samples = num_samples, num_channels = num_channels, shifting = shifting, all_samples = all_samples)
    
    a_exp, b_exp = extract_waveforms(cat_exp, data_comcat, num_features = num_features, start = 5000, before = before, after = after, number_samples = num_samples, num_channels = num_channels, shifting = shifting, all_samples = all_samples)
    
    a_eq, b_eq = extract_waveforms(cat_eq, data_comcat, num_features = num_features,  start = 5000, before = before, after = after, number_samples = num_samples, num_channels = num_channels, shifting = shifting, all_samples = all_samples)
    
    a_su, b_su = extract_waveforms(cat_su, data_exotic, num_features = num_features, start = 7000, before = before, after = after, number_samples = num_samples, num_channels = num_channels, shifting = shifting, all_samples = all_samples)
    
    
    
    
    # stacking the data
    d_noise = np.stack(a_noise)
    d_exp = np.stack(a_exp)
    d_eq = np.stack(a_eq)
    d_su = np.stack(a_su)

    
    if num_channels == 1:
        d_noise = d_noise[:, np.newaxis, :]
        d_exp = d_exp[:, np.newaxis, :]
        d_eq = d_eq[:, np.newaxis, :]
        d_su = d_su[:, np.newaxis, :]
    
    # remove zero data, which is only necessary if we just use single-comp sensors, and I am not even sure it would be necessary actually...
    d_noise = retain_nonzero_arrays(d_noise)
    d_exp = retain_nonzero_arrays(d_exp)
    d_eq = retain_nonzero_arrays(d_eq)
    d_su = retain_nonzero_arrays(d_su)
    
    X = np.vstack([d_noise, d_exp, d_eq, d_su])
    
    tapered = apply_cosine_taper(X)
    filtered = butterworth_filter(tapered, lowcut = 1, highcut = 10, fs = 100, num_corners = 4, filter_type='bandpass')
    data = normalize_arrays_by_max(filtered)
  
    
    # labels to encode   
    y = ['noise']*len(d_noise)+['explosion']*len(d_exp)+['earthquake']*len(d_eq)+['surface']*len(d_su)
    event_ids = np.hstack([b_noise, b_exp, b_eq, b_su])
    y_encoded = label_encoder.fit_transform(y)
    
    
    
    # Make the data a PNWDataSet
    custom_dataset = PNWDataSet(data,y)
    # first split train+val
    train_dataset_torch, val_dataset = random_split(custom_dataset, [train_size, test_size+val_size])
    # then split val into val+test
    test_dataset, val_dataset = random_split(val_dataset, [test_size,val_size])
    
#     # below was before
#     # Split data into training and testing sets
#     train_dataset, test_dataset = random_split(custom_dataset, [train_size, test_size])

#     train_data, temp_data, y_train, y_temp = train_test_split(data, y,  test_size= 4*num_samples - 4*train_size, random_state=42)
#     val_data, test_data, y_val, y_test = train_test_split(temp_data, y_temp, test_size = 4*test_size, random_state = 42)

    
#     label_encoder = LabelEncoder()

#     # Create TensorDataset and DataLoader for training data
#     train_labels = y_train # Define your training labels
#     train_labels_encoded = label_encoder.fit_transform(train_labels)
#     train_labels = torch.Tensor(train_labels_encoded) # Suitable for use in pytorch. 

#     train_data = torch.Tensor(train_data)

#     train_dataset = TensorDataset(train_data, train_labels) # Combines the training data and the numerical labels into 
#     # single dataset. 
#     train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # it allows you to efficiently iterate 
#     # through the data in mini-batches during training. 

    
#     # Similarly, create a DataLoader for validation data
#     val_labels = y_val
#     val_labels_encoded = label_encoder.fit_transform(val_labels)
#     val_labels = torch.Tensor(val_labels_encoded)
    
#     val_data = torch.Tensor(val_data)
#     val_dataset = TensorDataset(val_data, val_labels)
#     val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False)

    # Create a DataLoader
    data_loader_train = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
    data_loader_val = DataLoader(val_dataset, batch_size=batch_size, shuffle=shuffle)
    data_loader_test = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)

#     # Similarly, create a DataLoader for testing data
#     test_labels = y_test  # Define your testing labels
#     test_labels_encoded = label_encoder.fit_transform(test_labels)
#     test_labels = torch.Tensor(test_labels_encoded)

#     test_data = torch.Tensor(test_data)

#     test_dataset = TensorDataset(test_data, test_labels)
#     test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return data_loader_train,data_loader_val,data_loader_test
    
#     return train_dataset, train_loader, y_train, test_dataset, test_loader, y_test, val_dataset, val_loader, y_val, event_ids
    
    


In [None]:


def extract_waveforms(cat, file_name, start=7000, num_features=5000, before=5000, after=10000, number_samples=1000, num_channels=1, all_samples=False, shifting=True):
    
    """
    This is a function defined to extract the waveforms stored in the disk. 
    Inputs:
    cat -  Catalog containing metadata of the events, so we can extract the data using the bucket information
    file_name - path of the h5py file containing the data
    start - origin or first arrival time
    num_features - window length to extract
    before - number of samples to take before the arrival time
    after - number of samples to take after the arrival time.
    num_samples - no. of events per class to extract
    num_channels - no. of channels per event to extract, if set 1, will extract Z component, if set any other number, will extract - ZNE component. 
    all_samples - if true, will extract all the samples corresponding of a given class
    shifting - if true, will extract windows randomly starting between P-5, P-20. The random numbers follow a gaussian distribution. 
    Outputs:
    
    """   
    
    
    # This line initializes empty lists to store waveform data (not traces as in obspy definition)
    st = []    
    # This line initializes empty list to store corresponding event ids. 
    event_ids = []    
    
    # This line opens an HDF5 file in read only mode, the with statement ensures that the file is properly
    # closed after the block of code is executed. 
    with h5py.File(file_name, 'r') as f:
        cat_trace = cat['trace_name'].values        
        
        # If all_samples flag is true, it assigns the values equal to the length of cat_trace to number_samples
        if all_samples:
            number_samples = len(cat_trace)
            
        # Generates a list of random integers between 500 and 2000 (inclusive) if shifting flag is true, otherwise
        # it will generate a list of before equal to number_samples in the length. 
        
        # Note that the np.full function is defined to create a numpy array of specific shape and fill it with a constant value. 
        random_integer_list = np.random.randint(500, 2001, size=number_samples) if shifting else np.full(number_samples, before)
        
        # Note - so since we are taking the first number_samples from the dataset mainly for training,
        # it may include some temporal bias, in future. a to-do will be to randomize this extraction. 
        for i in tqdm(range(number_samples)):
            
            # taking the before samples
            before = random_integer_list[i]
            
            # taking the after samples
            after = num_features - before
            
            # so this code is taking the trace information and splitting it using the $ delimiter
            # because the trace bucket and index are split. 
            
            
            ## here is really a random sampling.
            ii = np.random.randint(len(cat_trace))
            
            trace_info = cat_trace[ii].split('$')
            
            # storing the bucket information
            bucket = trace_info[0]
            
            # storing the index information, 
            ind = int(trace_info[1].split(',')[0])

            if num_channels == 1:
                z_component = f['/data/'+bucket][ind, 2, start - before: start + after]
                
                # This is a kind of quality check applied on the data. 
                # we can also apply some other kind of quality check at this stage. 
                if np.sum(z_component) != 0:
                    event_ids.append(cat['event_id'].values[ii])
                    st.append(z_component)
            else:
                trace_data = f['/data/'+bucket][ind, :, start - before: start + after]
                if np.sum(trace_data) != 0:
                    event_ids.append(cat['event_id'].values[ii])
                    st.append(trace_data)

    return st, event_ids


In [None]:
def train_model(model, train_loader, val_dataset, val_loader, optimizer, n_epochs=100, batch_size=32, num_input=15000, num_channels=3,criterion=nn.CrossEntropyLoss()):
    """
    Function to train and evaluate the defined model.

    Parameters:
        model (torch.nn.Module): The neural network model.
        train_loader (torch.utils.data.DataLoader): DataLoader for training data.
        val_dataset (torch.utils.data.Dataset): Validation dataset.
        val_loader (torch.utils.data.DataLoader): DataLoader for validation data.
        optimizer (torch.optim.Optimizer): Optimizer for training the model.
        n_epochs (int): Number of training epochs.
        batch_size (int): Batch size for training.
        number_input (int): Number of points in the input data.
        num_channels (int): Number of channels in the input data.

    Returns:
        accuracy_list (list): List of accuracies computed from each epoch.
        train_loss_list (list): List of training losses from each epoch.
        val_loss_list (list): List of validation losses from each epoch.
        y_pred (list): List of predicted values.
        y_true (list): List of true values.
    """
    # Check if a GPU is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = model.to(device)
    N_test = len(val_dataset)

    # to store the accuracies computed from each epoch.
    accuracy_list = []

    # to store the losses from each epoch.
    train_loss_list = []
    val_loss_list = []

    # to store the predicted values
    y_pred = []
    y_true = []

    for epoch in tqdm(range(n_epochs)):
        train_loss_data = 0
        for x, y in train_loader:
            # setting the variable to run on GPU.
            x, y = x.to(device), y.to(device)

            # setting the model in training mode.
            model.train()

            # setting the gradients to zero.
            optimizer.zero_grad()

            # computing the output
            z = model(x.view(x.shape[0], num_channels, num_features))

            # converting the labels to standard type.
            y = torch.tensor(y, dtype=torch.long)

            # computing the loss
            loss = criterion(z, y)

            # computing the gradients
            loss.backward()

            # updating the parameters
            optimizer.step()

            train_loss_data += loss.data.cpu().numpy()

        # updating the training loss list
        train_loss_list.append(train_loss_data / len(train_loader))

        val_loss_data = 0
        correct = 0
        for x_test, y_test in val_loader:
            # setting the model in evaluation mode.
            model.eval()

            # pass the data to GPU
            x_test, y_test = x_test.to(device), y_test.to(device)

            # computing the output.
            z = model(x_test.view(x_test.shape[0], num_channels, num_features))
            
             # Convert z to 'Float' data type and y_test to 'Long' data type
            z = z.to(torch.float)
            y_test = y_test.to(torch.long)

            # computing the loss
            val_loss = criterion(z, y_test)
            val_loss_data += val_loss.data.cpu().numpy()

            # computing the number of correct predictions.
            _, yhat = torch.max(z.data, 1)
            correct += (yhat == y_test).sum().item()
            y_pred.append(yhat.cpu().numpy())
            y_true.append(y_test.cpu().numpy())

        # updating the validation loss list
        val_loss_list.append(val_loss_data / len(val_loader))

        accuracy = correct / N_test
        accuracy_list.append(accuracy)

    return accuracy_list, train_loss_list, val_loss_list, y_pred, y_true



## Defining some common parameters for all models

In [None]:
number_epochs = 100
# Define the loss function (e.g., Cross-Entropy)
criterion = nn.CrossEntropyLoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Training and Testing all the models

## Archtime (Original)

In [None]:
# Archtime_normal
num_channels = 1
number_features = 5000

#train_dataset, train_loader, test_dataset, test_loader, val_dataset, val_loader = extract_datasets(num_channels = 1, num_samples = 5000)
#train_dataset, train_loader, y_train, test_dataset, test_loader, y_test,  val_dataset, val_loader, y_val = extract_datasets(before = 1000, after = 40000, num_samples = 5000, batch_size = 32, num_channels = 1, train_size = 4000, test_size = 0, num_features = 5000, shifting = True)


data_loader_train,data_loader_val,data_loader_test = prepare_datasets()


# train_dataset, train_loader, y_train, test_dataset, test_loader, y_test,  val_dataset, val_loader, y_val, event_ids_normal = extract_datasets(before = 1000, after = 4000, num_samples = 5500, batch_size = 32, num_channels = 1, train_size = 5000, test_size = 1, num_features = 5000, shifting = True, all_samples = False)


model_archtime = Archtime(num_channels = 3, num_input = 5000)


optimizer = torch.optim.Adam(model_archtime.parameters(), lr=0.001)
accuracy_archtime, train_loss_archtime, val_loss_archtime, y_pred, y_true  = train_model(model_archtime, train_loader, val_dataset, val_loader, optimizer, n_epochs = number_epochs, num_channels = num_channels, num_features = 5000)


## Archtime (with dropout)

In [None]:
# Archtime_dropout
num_channels = 1
number_features = 5000

#train_dataset, train_loader, test_dataset, test_loader, val_dataset, val_loader = extract_datasets(num_channels = 1, num_samples = 5000)
#train_dataset, train_loader, y_train, test_dataset, test_loader, y_test,  val_dataset, val_loader, y_val, event_ids_norma = extract_datasets(before = 1000, after = 4000, num_samples = 5500, batch_size = 32, num_channels = 1, train_size = 5000, test_size = 1, num_features = 5000, shifting = True, all_samples = False)


model_archtime_do = Archtime_do(num_channels = 1, num_features = 5000)
optimizer = torch.optim.Adam(model_archtime_do.parameters(), lr=0.001)
accuracy_archtime_do, train_loss_archtime_do, val_loss_archtime_do, y_pred, y_true  = train_model(model_archtime_do, train_loader, val_dataset, val_loader, optimizer, n_epochs = number_epochs, num_channels = num_channels, num_features = 5000)


## Wavedecompnet (original)

In [None]:
model_wavedecompnet = WaveDecompNet(num_channels = 1, num_features = 5000)
optimizer = torch.optim.Adam(model_wavedecompnet.parameters(), lr=0.001)
accuracy_wavedecompnet, train_loss_wavedecompnet, val_loss_wavedecompnet, y_pred, y_true  = train_model(model_wavedecompnet, train_loader, val_dataset, val_loader, optimizer, n_epochs = number_epochs, num_channels = num_channels, num_features = 5000)


## WaveDecompNet (with dropout)

In [None]:
model_wavedecompnet_do = WaveDecompNet_do(num_channels = 1, num_features = 5000)
optimizer = torch.optim.Adam(model_wavedecompnet_do.parameters(), lr=0.001)
accuracy_wavedecompnet_do, train_loss_wavedecompnet_do, val_loss_wavedecompnet_do, y_pred, y_true  = train_model(model_wavedecompnet_do, train_loader, val_dataset, val_loader, optimizer, n_epochs = number_epochs, num_channels = num_channels, num_features = 5000)


## SeismicCNN_batch (original)

In [None]:
model_seismiccnn_batch = SeismicCNN_batch(num_channels = 1)
optimizer = torch.optim.Adam(model_seismiccnn_batch.parameters(), lr=0.001)
accuracy_seismiccnn_batch, train_loss_seismiccnn_batch, val_loss_seismiccnn_batch, y_pred, y_true  = train_model(model_seismiccnn_batch, train_loader, val_dataset, val_loader, optimizer, n_epochs = number_epochs, num_channels = num_channels, num_features = 5000)


## SeismicCNN_batch (with dropout)

In [None]:
model_seismiccnn_batch_do = SeismicCNN_batch_do(num_channels = 1)
optimizer = torch.optim.Adam(model_seismiccnn_batch_do.parameters(), lr=0.001)
accuracy_seismiccnn_batch_do, train_loss_seismiccnn_batch_do, val_loss_seismiccnn_batch_do, y_pred, y_true  = train_model(model_seismiccnn_batch_do, train_loader, val_dataset, val_loader, optimizer, n_epochs = number_epochs, num_channels = num_channels, num_features = 5000)


## SeismicNet (original)

In [None]:
# Seismicne dropout
num_channels = 1
number_features = 15000

#train_dataset, train_loader, test_dataset, test_loader, val_dataset, val_loader = extract_datasets(num_channels = 1, num_samples = 5000)

train_dataset, train_loader, y_train, test_dataset, test_loader, y_test,  val_dataset, val_loader, y_val, event_ids_seismicnet = extract_datasets(before = 5000, after = 10000, num_samples = 5000, batch_size = 32, num_channels = 1, train_size = 4000, test_size = 1, num_features = 15000, shifting = False, all_samples = False)
model_seismicnet = SeismicNet(num_channels = 1, num_features = 15000)
optimizer = torch.optim.Adam(model_seismicnet.parameters(), lr=0.001)
accuracy_seismicnet, train_loss_seismicnet, val_loss_seismicnet, y_pred, y_true  = train_model(model_seismicnet, train_loader, val_dataset, val_loader, optimizer, n_epochs = number_epochs, num_channels = num_channels, num_features = 15000)


## SeismicNet (with dropout)

In [None]:
# A dataset will contain the features and corresponding labels. 
# We can access the elements in the dataset by specifying the index. 
# A dataloader - split the whole datasets into batches of specified sizes and shuffle randomly.


model_seismicnet_do = SeismicNet_do(num_channels = 1, num_features = 15000)
optimizer = torch.optim.Adam(model_seismicnet_do.parameters(), lr=0.001)
accuracy_seismicnet_do, train_loss_seismicnet_do, val_loss_seismicnet_do, y_pred, y_true  = train_model(model_seismicnet_do, train_loader, val_dataset, val_loader, optimizer, n_epochs = number_epochs, num_channels = num_channels, num_features = 15000)


## Plotting the performance of individual models

In [None]:
## Archtime models
plot_train_val_loss(train_loss_archtime, val_loss_archtime, title = 'Archtime (original)')
plot_train_val_loss(train_loss_archtime_do, val_loss_archtime_do, title = 'Archtime with dropout')


# SeismicCNN_batch
plot_train_val_loss(train_loss_seismiccnn_batch, val_loss_seismiccnn_batch, title = 'SeismicCNN_batch (original)')
plot_train_val_loss(train_loss_seismiccnn_batch_do, val_loss_seismiccnn_batch_do, title = 'SeismicCNN_batch (original)')


# WaveDecompNet
plot_train_val_loss(train_loss_wavedecompnet, val_loss_wavedecompnet, title = 'WaveDecompNet (original)')
plot_train_val_loss(train_loss_wavedecompnet_do, val_loss_wavedecompnet_do, title = 'WaveDecompent (with dropout)')


# SeismicNet
plot_train_val_loss(train_loss_seismicnet, val_loss_seismicnet, title = 'SeismicNet (original)')
plot_train_val_loss(train_loss_seismicnet_do, val_loss_seismicnet_do, title = 'SeismicNet (with dropout)')

In [None]:
plot_accuracy(accuracy_archtime, accuracy_archtime_do, label1 = 'Archtime (original)', label2 = 'Archtime (with dropout)')

plot_accuracy(accuracy_seismiccnn_batch, accuracy_seismiccnn_batch_do, label1 = 'SeismicCNN (original)', label2 = 'SeismicCNN (with dropout)')


plot_accuracy(accuracy_wavedecompnet, accuracy_wavedecompnet_do, label1 = 'WaveDecompNet (original)', label2 = 'WaveDecompNet (with dropout)')


plot_accuracy(accuracy_seismicnet, accuracy_seismicnet_do, label1 = 'SeismicNet (original)', label2 = 'SeismicNet (with dropout)')












