In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [8]:
import pandas as pd
import h5py
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

import glob
import re
from multiprocessing import cpu_count
from multiprocessing import Pool

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import os
import sys
from functools import partial

from sklearn.metrics import confusion_matrix

In [9]:
# find GPU device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print (torch.cuda.device_count())

1


In [16]:
dataDir = 'dataFiles'
datasetSize = '100K'
earthquakeSampleFraction = 0.25
noiseSampleFraction = 0.25
csvFileName = f'{dataDir}/filtered_earthquakeSampleFraction_{earthquakeSampleFraction}_noiseSampleFraction_{noiseSampleFraction}.csv'
hdf5FileName = f'{dataDir}/filtered_earthquakeSampleFraction_{earthquakeSampleFraction}_noiseSampleFraction_{noiseSampleFraction}.hdf5'

trainSetFraction = 0.8
valSetFraction = 0.1
testSetFraction = 1 - (trainSetFraction + valSetFraction) 

# read the csv file
df_csv = pd.read_csv(csvFileName)

# set trace name as index
df_csv.set_index(['trace_name'], inplace=True)

# keep rows with valid data
df_csv.drop(df_csv[df_csv['s_arrival_sample'].isna()].index, inplace=True)

# split the dataset into train, validation and test
df_train, df_val, df_test = np.split(df_csv.sample(frac = 1), [int(trainSetFraction*len(df_csv)), int((trainSetFraction + valSetFraction)*len(df_csv))])

# read the hdf5 file
hdf5Data = h5py.File(hdf5FileName, 'r')

In [17]:
print ('Train set length =', len(df_train))
print ('Val set length =', len(df_val))
print ('Test set length =', len(df_test))

Train set length = 40000
Val set length = 5000
Test set length = 5000


In [18]:
# define other relevant dirNames

# dir to dump plots
plotImgDir = 'plotImages/cnn1dSArrivalRegression'
os.system(f'mkdir -p {plotImgDir}')

# directories to dump neuralNet params
netParamsDirName = 'netParams/cnn1dSArrivalRegression'
os.system(f'mkdir -p {netParamsDirName}')

0

In [19]:
# data loader

class Dataset(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, listIDs, labels):
        'Initialization'
        self.labels = labels
        self.listIDs = listIDs
        

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.listIDs)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.listIDs[index]

        # Load data and get label
        X = hdf5Data.get('data/' + ID)
        X = np.array(X)
        X = X.T
        y = self.labels[ID]

        return X, y

In [20]:
# define a 1D convolutional neural network

dropOutProb = 0.5

class Net(nn.Module):
    def __init__(self, dropOutProb = 0.5):
        
        # inherit base class
        super(Net, self).__init__()
        
        # layer 1
        self.conv1 = nn.Conv1d(in_channels = 3, out_channels = 12, kernel_size=5, stride = 1, padding = 2)
                
        # layer 2
        self.conv2 = nn.Conv1d(in_channels = 12, out_channels = 12, kernel_size=5, stride = 1, padding = 2)
                
        # layer 3
        self.conv3 = nn.Conv1d(in_channels = 12, out_channels = 8, kernel_size=5, stride = 1, padding = 2)
        
        # use downsampling by 2 for 1st 2 layers, and then downsample by 4 for the 3rd
        self.poolDiv2 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.poolDiv4 = nn.MaxPool1d(kernel_size=4, stride=4)
        
        # dropout layer
        self.dropOut = nn.Dropout(p = dropOutProb)
        
        # fully connected layers
        self.fc1 = nn.Linear(in_features = 375 * 8, out_features = 16)
        self.fc2 = nn.Linear(in_features = 16, out_features = 16)
        self.fc3 = nn.Linear(in_features = 16, out_features = 1)

    def forward(self, x):
        x = self.poolDiv2(F.relu(self.conv1(x)))
        x = self.poolDiv2(F.relu(self.conv2(x)))
        x = self.poolDiv4(F.relu(self.conv3(x)))
        x = x.view(-1, self.num_flat_features(x))
        x = self.dropOut(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):        
        size = x.size()[1:]
        num_features = 1        
        for s in size:
            num_features *= s            
        return num_features


net = Net(dropOutProb)
print(net.parameters)

<bound method Module.parameters of Net(
  (conv1): Conv1d(3, 12, kernel_size=(5,), stride=(1,), padding=(2,))
  (conv2): Conv1d(12, 12, kernel_size=(5,), stride=(1,), padding=(2,))
  (conv3): Conv1d(12, 8, kernel_size=(5,), stride=(1,), padding=(2,))
  (poolDiv2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (poolDiv4): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (dropOut): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=3000, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=1, bias=True)
)>


In [21]:
# Loss criteria
criterion = nn.MSELoss()

In [22]:
# convergence params
movingWindowSize = 20

# multiprocessing cpu params
numWorkers = 4

# define dataset variables to adapt to dataloader format

# training set. divide into batches inside the training loop
trainingListIDs = df_train.index.get_level_values('trace_name').tolist()
trainingLabels = (df_train['s_arrival_sample']).to_dict()


# validation set
validationListIDs = df_val.index.get_level_values('trace_name').tolist()
validationLabels = (df_val['s_arrival_sample']).to_dict()

validationSet = Dataset(validationListIDs, validationLabels)
validationGenerator = torch.utils.data.DataLoader(validationSet, batch_size = 256, num_workers = numWorkers)

# test set
testListIDs = df_test.index.get_level_values('trace_name').tolist()
testLabels = (df_test['s_arrival_sample']).to_dict()

testSet = Dataset(testListIDs, testLabels)
testGenerator = torch.utils.data.DataLoader(testSet, batch_size = 256, num_workers = numWorkers)
    
# hyperParameters
numEpochs = 200
batchSize = 64
learningRateList = [1e-1, 1e-2, 1e-3, 1e-4]
dropOutProbList = [0.3, 0.5, 0.7]
#learningRateList = [1e-3]
#dropOutProbList = [0.5]

# training
lossProgression = np.zeros((len(dropOutProbList), len(learningRateList), numEpochs))
lossProgression[:] = np.nan

trainLossProgression = np.zeros((len(dropOutProbList), len(learningRateList), numEpochs))
trainLossProgression[:] = np.nan

valLossProgression = np.zeros((len(dropOutProbList), len(learningRateList), numEpochs))
valLossProgression[:] = np.nan

bestValLoss = np.zeros((len(dropOutProbList), len(learningRateList)))
bestValLoss[:] = np.inf


for d,dropOutProb in enumerate (dropOutProbList):
    
    numBatches = int(len(df_train)/batchSize) + 1 if len(df_train) % batchSize != 0 else int(len(df_train)/batchSize)
    
    # invoke dataloader with appropriate batchSize
    trainingSet = Dataset(trainingListIDs, trainingLabels)
    trainingGenerator = torch.utils.data.DataLoader(trainingSet, batch_size = batchSize, shuffle=True, num_workers = numWorkers)        
    
    for l,learningRate in enumerate(learningRateList):
        
        # create a neuralNet object
        net = Net(dropOutProb)        
        net.to(device)
        
        
        # optimizer type
        optimizer = optim.Adam(net.parameters(), lr=learningRate)
        
        # movingWindow of previous epochs to check for convergence
        deltaValLossHistory = []
        prevValLoss = np.inf        
        
        fileTag = f'batchSize_{batchSize}_learningRate_{learningRate}_dropOutProb_{dropOutProb}_numEpochs_{numEpochs}_dataFraction_{earthquakeSampleFraction}_{noiseSampleFraction}'
        
        for epoch in range(numEpochs):                        
            
            # set network in training mode
            net.train()
            
            runningLoss = 0.0
            sqError = 0.0
            for i, data in enumerate(trainingGenerator, 0):

                inputBatch, groundTruthBatch = data
                groundTruthBatch = torch.tensor(groundTruthBatch, dtype = torch.float32)
                inputBatch, groundTruthBatch = inputBatch.to(device), groundTruthBatch.to(device)
                                                
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward pass
                outputPredBatch = net(inputBatch)
                
                
                # compute loss
                loss = criterion(outputPredBatch,groundTruthBatch.unsqueeze(1))
                
                # backprop
                loss.backward()
                
                # gradient descent step
                optimizer.step()    
                
                # output prediction label
                outputPredLabelBatch = torch.round(outputPredBatch)

                # collect training loss, accuracy statistics
                if i == numBatches - 1:
                    runningLoss += loss.item() * (len(df_train) - i*batchSize)                    
                else:                        
                    runningLoss += loss.item() * batchSize
                
                sqError += torch.sum((outputPredLabelBatch.squeeze() - groundTruthBatch)**2)
            
            # track training loss/accuracy Progression
            avgLoss = runningLoss/len(df_train)
            lossProgression[d, l, epoch] = avgLoss
            trainLoss = sqError/len(df_train)
            trainLossProgression[d,l, epoch] = trainLoss
                        
            # evaluate validation set accuracy
            
            # set network in eval mode
            net.eval()
            
            sqError = 0.0
            with torch.no_grad():
                
                for i, data in enumerate(validationGenerator, 0):

                    inputBatch, groundTruthBatch = data
                    groundTruthBatch = torch.tensor(groundTruthBatch, dtype = torch.float32)
                    inputBatch, groundTruthBatch = inputBatch.to(device), groundTruthBatch.to(device)
                    outputPredBatch = net(inputBatch)
                    
                    # output prediction label
                    outputPredLabelBatch = torch.round(outputPredBatch)
                    
                    sqError += torch.sum((outputPredLabelBatch.squeeze() - groundTruthBatch)**2)

            valLoss = sqError/len(df_val)
            valLossProgression[d,l,epoch] = valLoss
                        
            print('[batchSize = %d, dropOutProb = %.2g, learningRate = %.2g, epoch = %d] loss: %f, trainLoss: %f, valLoss: %f' %
                                                (batchSize, dropOutProb, learningRate, epoch + 1, avgLoss, trainLoss, valLoss))
            
            # update network parameters if validation accuracy in current epoch is better than the past            
            if valLoss < bestValLoss[d,l]:
                
                # store the neuralNet parameters in a file                
                path = f'{netParamsDirName}/cnn3Layer_12_12_8_fc2layer_16_16_{fileTag}.pth'
                torch.save(net.state_dict(), path)
                
                bestValLoss[d,l] = valLoss
            
            # check for convergence and exit early if valLoss is not improving
            deltaValLoss = valLoss - prevValLoss
            deltaValLossHistory.insert(0, deltaValLoss)            
            
            if epoch >= movingWindowSize:
                deltaValLossHistory.pop()
                
                if sum(deltaValLossHistory) >= 0:
                    print ('Validation loss starting to increase. Exiting...')
                    break
                
            prevValLoss = valLoss                    
        
        print ('Finished training...')
        print ('\n')



[batchSize = 64, dropOutProb = 0.3, learningRate = 0.1, epoch = 1] loss: 1412191410.648600, trainLoss: 1412192000.000000, valLoss: 1675687.625000
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.1, epoch = 2] loss: 1643607.539400, trainLoss: 1643606.750000, valLoss: 1647086.375000
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.1, epoch = 3] loss: 1605980.480800, trainLoss: 1605990.750000, valLoss: 1602389.625000
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.1, epoch = 4] loss: 1553772.883000, trainLoss: 1553774.500000, valLoss: 1542396.000000
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.1, epoch = 5] loss: 1487008.408300, trainLoss: 1486994.375000, valLoss: 1468197.375000
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.1, epoch = 6] loss: 1406988.613200, trainLoss: 1406996.000000, valLoss: 1381179.500000
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.1, epoch = 7] loss: 1316709.478800, trainLoss: 1316708.000000, valLoss: 1287109.250000
[batchSize = 6

[batchSize = 64, dropOutProb = 0.3, learningRate = 0.01, epoch = 6] loss: 442115.844212, trainLoss: 442115.312500, valLoss: 455133.531250
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.01, epoch = 7] loss: 417010.234188, trainLoss: 417012.656250, valLoss: 527964.687500
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.01, epoch = 8] loss: 478843.336731, trainLoss: 478843.750000, valLoss: 478570.375000
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.01, epoch = 9] loss: 445425.514325, trainLoss: 445423.000000, valLoss: 452419.218750
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.01, epoch = 10] loss: 14461347822.849125, trainLoss: 14461301760.000000, valLoss: 530026.687500
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.01, epoch = 11] loss: 192995876.638050, trainLoss: 192995920.000000, valLoss: 549080.125000
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.01, epoch = 12] loss: 497056.973450, trainLoss: 497065.250000, valLoss: 515197.843750
[batchSize = 64

[batchSize = 64, dropOutProb = 0.3, learningRate = 0.001, epoch = 38] loss: 85251.967148, trainLoss: 85252.890625, valLoss: 85698.281250
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.001, epoch = 39] loss: 87482.604106, trainLoss: 87483.734375, valLoss: 81128.179688
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.001, epoch = 40] loss: 101936.690378, trainLoss: 101937.804688, valLoss: 125829.593750
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.001, epoch = 41] loss: 89880.864937, trainLoss: 89881.226562, valLoss: 76914.257812
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.001, epoch = 42] loss: 74636.673095, trainLoss: 74637.195312, valLoss: 75124.929688
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.001, epoch = 43] loss: 90748.692072, trainLoss: 90748.640625, valLoss: 102550.507812
[batchSize = 64, dropOutProb = 0.3, learningRate = 0.001, epoch = 44] loss: 86595.114973, trainLoss: 86595.226562, valLoss: 82085.515625
[batchSize = 64, dropOutProb = 0.3, l

[batchSize = 64, dropOutProb = 0.5, learningRate = 0.1, epoch = 22] loss: 587702.004900, trainLoss: 587704.375000, valLoss: 573755.812500
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.1, epoch = 23] loss: 526086.543150, trainLoss: 526084.562500, valLoss: 515432.187500
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.1, epoch = 24] loss: 470828.792625, trainLoss: 470829.593750, valLoss: 462726.375000
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.1, epoch = 25] loss: 421849.490300, trainLoss: 421843.968750, valLoss: 417313.562500
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.1, epoch = 26] loss: 379041.149400, trainLoss: 379049.156250, valLoss: 377102.812500
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.1, epoch = 27] loss: 342282.060500, trainLoss: 342285.031250, valLoss: 343368.812500
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.1, epoch = 28] loss: 311368.443787, trainLoss: 311366.531250, valLoss: 315523.843750
[batchSize = 64, dropOutProb = 0.5

[batchSize = 64, dropOutProb = 0.5, learningRate = 0.01, epoch = 20] loss: 1473680.769700, trainLoss: 1473678.875000, valLoss: 1480139.125000
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.01, epoch = 21] loss: 1451181.380600, trainLoss: 1451190.000000, valLoss: 1460232.500000
Validation loss starting to increase. Exiting...
Finished training...


[batchSize = 64, dropOutProb = 0.5, learningRate = 0.001, epoch = 1] loss: 1712687.007200, trainLoss: 1712709.750000, valLoss: 864703.687500
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.001, epoch = 2] loss: 664361.931200, trainLoss: 664359.250000, valLoss: 600509.062500
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.001, epoch = 3] loss: 539460.708000, trainLoss: 539460.312500, valLoss: 553330.937500
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.001, epoch = 4] loss: 483330.832169, trainLoss: 483334.500000, valLoss: 679229.750000
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.001, epoch = 5] loss: 488528.6020

[batchSize = 64, dropOutProb = 0.5, learningRate = 0.0001, epoch = 12] loss: 1356124.889800, trainLoss: 1356125.500000, valLoss: 1578738.500000
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.0001, epoch = 13] loss: 1169875.935000, trainLoss: 1169880.500000, valLoss: 1905932.250000
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.0001, epoch = 14] loss: 817684.616700, trainLoss: 817686.500000, valLoss: 1337043.375000
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.0001, epoch = 15] loss: 588282.991600, trainLoss: 588282.125000, valLoss: 990169.187500
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.0001, epoch = 16] loss: 512863.651575, trainLoss: 512865.625000, valLoss: 1093715.625000
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.0001, epoch = 17] loss: 561705.358250, trainLoss: 561710.125000, valLoss: 621197.125000
[batchSize = 64, dropOutProb = 0.5, learningRate = 0.0001, epoch = 18] loss: 524192.985450, trainLoss: 524192.000000, valLoss: 747805.500000
[batc

[batchSize = 64, dropOutProb = 0.7, learningRate = 0.1, epoch = 34] loss: 224022.666813, trainLoss: 224023.859375, valLoss: 241146.187500
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.1, epoch = 35] loss: 224021.409237, trainLoss: 224023.468750, valLoss: 241148.640625
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.1, epoch = 36] loss: 224022.351737, trainLoss: 224021.578125, valLoss: 241146.187500
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.1, epoch = 37] loss: 224022.312075, trainLoss: 224020.703125, valLoss: 241145.750000
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.1, epoch = 38] loss: 224022.999700, trainLoss: 224022.296875, valLoss: 241146.187500
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.1, epoch = 39] loss: 224023.595288, trainLoss: 224023.781250, valLoss: 241146.187500
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.1, epoch = 40] loss: 224021.871700, trainLoss: 224020.890625, valLoss: 241148.640625
[batchSize = 64, dropOutProb = 0.7

[batchSize = 64, dropOutProb = 0.7, learningRate = 0.001, epoch = 9] loss: 366139.820475, trainLoss: 366140.312500, valLoss: 879382.687500
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.001, epoch = 10] loss: 389372.473263, trainLoss: 389368.187500, valLoss: 375202.343750
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.001, epoch = 11] loss: 329247.463938, trainLoss: 329248.031250, valLoss: 336119.531250
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.001, epoch = 12] loss: 310520.099606, trainLoss: 310519.062500, valLoss: 296160.781250
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.001, epoch = 13] loss: 360538.303600, trainLoss: 360535.375000, valLoss: 317575.218750
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.001, epoch = 14] loss: 279283.099962, trainLoss: 279281.156250, valLoss: 327896.562500
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.001, epoch = 15] loss: 263297.486463, trainLoss: 263299.593750, valLoss: 224656.765625
[batchSize = 64, drop

[batchSize = 64, dropOutProb = 0.7, learningRate = 0.0001, epoch = 10] loss: 1595408.050200, trainLoss: 1595405.625000, valLoss: 1724435.250000
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.0001, epoch = 11] loss: 1599003.792800, trainLoss: 1599007.250000, valLoss: 1714704.375000
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.0001, epoch = 12] loss: 1584301.244400, trainLoss: 1584302.625000, valLoss: 1757393.625000
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.0001, epoch = 13] loss: 1578612.304800, trainLoss: 1578614.500000, valLoss: 1727151.000000
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.0001, epoch = 14] loss: 1561272.126600, trainLoss: 1561272.125000, valLoss: 1708104.000000
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.0001, epoch = 15] loss: 1549921.467600, trainLoss: 1549919.750000, valLoss: 1798842.500000
[batchSize = 64, dropOutProb = 0.7, learningRate = 0.0001, epoch = 16] loss: 1536898.047600, trainLoss: 1536903.375000, valLoss: 1753738