In [None]:
!pip install torcheval
!pip install tabulate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torcheval
  Downloading torcheval-0.0.6-py3-none-any.whl (158 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.4/158.4 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchtnt>=0.0.5 (from torcheval)
  Downloading torchtnt-0.1.0-py3-none-any.whl (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.9/87.9 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
Collecting pyre-extensions (from torchtnt>=0.0.5->torcheval)
  Downloading pyre_extensions-0.0.30-py3-none-any.whl (12 kB)
Collecting typing-inspect (from pyre-extensions->torchtnt>=0.0.5->torcheval)
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect->pyre-extensions->torchtnt>=0.0.5->torcheval)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Installing collected packages: mypy-extensions, typing

In [None]:
# 6000 samples for each waveform recorded for 60 seconds of ground motion - sampling rate 100Hz
# 41365 waveforms - size of dataset for the chosen device PB;B082;33.598182;-116.596005;1374.8 HH & EH
# Magnitude Type - ml (local) - ~2.0 to ~6.5 magnitudes - 0 - 600 km distance range ;
# source latitude + longitude = epicenter + depth = hypocenter

# 4 grafice, pentru fiecare loss pentru train si validare (done) + checkpoint
# pe test pe train si val - un tabel care sa contina toate marimile + R2 pe test

from google.colab import drive
import torch
from torch.utils.data import Dataset, DataLoader, default_collate
import h5py
import numpy as np
import librosa as lib
import librosa.display as libd
import pandas as pd
from tabulate import tabulate
import copy

drive.mount('/content/drive')
plots_dir = '/content/drive/My Drive/Plots/full'
models_dir = '/content/drive/My Drive/Models/full'

Mounted at /content/drive


In [None]:
class STEADDataset(Dataset):

  def __init__(self,csv_file,hdf5_file,transform=None):
    """
      Args:
        csv_file (string): Path to the csv file with annotations.
        hdf5_file (string): File with all the waveforms.
        transform (callable, optional): Optional transform to be applied on a sample.
    """
    self.tags = pd.read_csv(csv_file)
    # self.tags = self.tags [(self.tags.trace_category == 'earthquake_local') & (self.tags.source_distance_km<=20)&(self.tags.source_magnitude > 3)]
    self.hdf5_file = hdf5_file
    self.traces = self.tags['trace_name'].to_list()
    self.transform = transform

  def __len__(self):
    return len(self.traces)

  def __getitem__(self,idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()

    dataset = h5py.File(self.hdf5_file,'r')
    tracename = self.traces[idx]
    waveform = dataset.get('data/'+tracename)
    data = np.array(waveform)
    spectrograms = self.getSpectrogram(data)

    sample = {'spectrograms':spectrograms, 'source_magnitude':waveform.attrs['source_magnitude'],
              'source_latitude':waveform.attrs['source_latitude'],'source_longitude':waveform.attrs['source_longitude'],
              'source_depth_km':waveform.attrs['source_depth_km']}

    if self.transform:
      sample = self.transform(sample)

    dataset.close()
    return sample

  def getSpectrogram(self, waveforms):
    # defining axis
    EW = waveforms[:,0]
    NS = waveforms[:,1]
    Vert = waveforms[:,2]

    EW_ft = lib.stft(EW,n_fft=1024, hop_length=32) # n_fft = dimensiunea semnalului din fereastra stft = initial 2048
    NS_ft = lib.stft(NS,n_fft=1024, hop_length=32)
    Vert_ft = lib.stft(Vert,n_fft=1024, hop_length=32) # window = 'hann' # 1024 sau 512
    EW_db = lib.amplitude_to_db(np.abs(EW_ft), ref=np.max) # np.max = normare
    NS_db = lib.amplitude_to_db(np.abs(NS_ft), ref=np.max)
    Vert_db = lib.amplitude_to_db(np.abs(Vert_ft), ref=np.max)

    spectrograms = np.array([EW_db, NS_db, Vert_db])

    return spectrograms


class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
      if sample['source_depth_km'] == "None":
        return None
      else:
        spectrograms = sample['spectrograms']
        results = np.array([sample['source_magnitude'], sample['source_latitude'], sample['source_longitude'], sample['source_depth_km']], dtype=np.float32)

        return {'spectrograms': torch.from_numpy(spectrograms),
                'results':  torch.from_numpy(results)}


In [None]:
def custom_collate_fn(batch):
  # Filter out the None samples
  filtered_batch = [sample for sample in batch if sample is not None]
  if len(filtered_batch) == 0:
    # if the batch length is 0 - all are None
    return None
  else:
    # create the new batch with the eliminated None components
    return default_collate(filtered_batch)

In [None]:
STEAD_dataset = STEADDataset(csv_file="drive/My Drive/dataset.csv",
                             hdf5_file="drive/My Drive/dataset.hdf5",
                             transform=ToTensor())

for i in range(len(STEAD_dataset)):
  sample = STEAD_dataset[i]
  print(sample['results'].size())
  print(sample['spectrograms'].size())
  if i == 3:
    break

print(len(STEAD_dataset))


  self.tags = pd.read_csv(csv_file)


torch.Size([4])
torch.Size([3, 513, 188])
torch.Size([4])
torch.Size([3, 513, 188])
torch.Size([4])
torch.Size([3, 513, 188])
torch.Size([4])
torch.Size([3, 513, 188])
41365


In [None]:
from torch.nn import Module # implement a class rather than using Sequential object
from torch.nn import Conv2d # convolutional layer
from torch.nn import BatchNorm2d #BatchNormalization
from torch.nn import Linear # Fully connected layers
from torch.nn import MaxPool2d # 2D max-pooling to reduce spatial dimensions
from torch.nn import ReLU # activation function
from torch.optim import Adam # Adam Optimizer
from torch.optim.lr_scheduler import ReduceLROnPlateau # Scheduler that reduces learning rate on plateau
from torch import flatten # Flattens the output of a multidimensional volume (CONV or POOl layer) -> Fully connected layer
from torch.utils.data import random_split # for splitting Dataset into Train, Evaluation and Test
from torcheval.metrics import R2Score # Evaluates model's accuracy
from torch import nn
import torch
import time # for timing train loop
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt


In [None]:
# defining residual block class
class block(Module):
  # identity_downsample = conv layers which we might need to do depending on if we ve changed input size or channel numbers
  def __init__(self, in_channels, out_channels, kernel_size=(5,5), padding=(2,2)):
    super(block, self).__init__()
    self.conv1 = Conv2d(in_channels, out_channels, kernel_size= kernel_size, padding=padding)
    self.bn1 = BatchNorm2d(out_channels)
    self.conv2 = Conv2d(out_channels, out_channels, kernel_size= kernel_size, padding=padding)
    self.bn2 = BatchNorm2d(out_channels)
    self.relu = ReLU(inplace=True)
    self.maxpool = MaxPool2d(kernel_size=(2,2), stride=(2,2))

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    identity = x
    x = self.conv2(x)
    x = self.bn2(x)
    x += identity

    x = self.relu(x)

    output = self.maxpool(x)

    return output


class ResNet(Module):
  def __init__(self, block, numChannels, outputNodes):
    super(ResNet, self).__init__()
    # initialize first CONV => RELU => POOL layer
    self.conv1 = Conv2d(in_channels=numChannels, out_channels=16, kernel_size=(7,7), padding=(3,3))
    self.bn1 = BatchNorm2d(16)
    self.relu = ReLU(inplace=True)
    self.maxpool = MaxPool2d(kernel_size=(2,2), stride=(2,2))

    # initialize second CONV => RELU => POOL layer
    self.conv2 = Conv2d(in_channels=16, out_channels=16, kernel_size=(7,7), padding=(3,3))
    self.bn2 = BatchNorm2d(16)

    # ResNet layers
    self.layer1 = block(in_channels=16, out_channels=32, kernel_size=(5,5), padding=(2,2))
    self.layer2 = block(in_channels=32, out_channels=64, kernel_size=(3,3), padding=(1,1))
    self.layer3 = block(in_channels=64, out_channels=96, kernel_size=(3,3), padding=(1,1))

    # initialize last CONV => RELU => CONV => POOL layer
    self.conv3 = Conv2d(in_channels=96, out_channels=128, kernel_size=(3,3), padding=(1,1))
    self.bn3 = BatchNorm2d(128)
    self.conv4 = Conv2d(in_channels=128, out_channels=128, kernel_size=(3,3), padding=(1,1))
    self.bn4 = BatchNorm2d(128)

    # initialize first (and only) set of FC => ReLU layers - fully connected layer
    self.fc1 = Linear(in_features=2048, out_features=1024)

    # initialize first (and only) set of FC => Linear *Regression* Layers
    self.fc2 = Linear(in_features=1024, out_features=outputNodes)

  def forward(self, x):
  # pass the input through the first set of CONV -> ReLU -> POOL layers
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    # pass the input through the second set of CONV -> ReLU -> POOL layer
    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)
    x = self.maxpool(x)

    # pass the input through resnet
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)

    # pass the input through the last CONV => RELU => CONV => POOL layer
    x = self.conv3(x)
    x = self.bn3(x)
    x = self.relu(x)
    x = self.conv4(x)
    x = self.bn4(x)
    x = self.maxpool(x)

    # flatten the output from the previous layer and pass it through FC layer
    x = flatten(x, 1)
    x = self.fc1(x)
    x = self.relu(x)

    # pass the output to our Linear layer for regression predictions
    output = self.fc2(x)

    # return the output predictions
    return output


# defining Early Stopping class
class EarlyStopping():
  def __init__(self, patience = 1, min_delta = 0):
    self.patience = patience
    self.min_delta = min_delta
    self.counter = 0
    self.min_validation_loss = np.inf
    self.best_epoch = 0
    self.best_train = [None] * 5
    self.best_val = [None] * 5

  def earlyStop(self, validation_loss, epoch, TrainLoss, ValLoss):
    if validation_loss <= self.min_validation_loss:
      print("[INFO] In EPOCH {} the loss value improved from {:.5f} to {:.5f}".format(epoch, self.min_validation_loss, validation_loss))
      self.min_validation_loss = validation_loss
      self.counter = 0
      self.best_epoch = epoch
      torch.save(model.state_dict(), f"{models_dir}/ResNet3_state_dict.pt")
      self.setBestLosses(TrainLoss, ValLoss)

    elif validation_loss > (self.min_validation_loss + self.min_delta):
      self.counter += 1
      print("[INFO] In EPOCH {} the loss value did not improve from {:.5f}. This is the {} EPOCH in a row.".format(epoch, self.min_validation_loss, self.counter))
      if self.counter >= self.patience:
        return True
    return False

  def setCounter(self, counter_state):
    self.counter = counter_state

  def setMinValLoss(self, ValLoss):
    self.min_validation_loss = ValLoss

  def setBestLosses(self, TrainLoss, ValLoss):
    self.best_train = TrainLoss
    self.best_val = ValLoss

  def setBestEpoch(self, bestEpoch):
    self.best_epoch = bestEpoch

  def getBestTrainLosses(self):
    return self.best_train

  def getBestValLosses(self):
    return self.best_val

  def getBestEpoch(self):
    return self.best_epoch

  def saveLossesLocally(self):
    np.save(f'{models_dir}/losses_train_3.npy', np.array(self.best_train))
    np.save(f'{models_dir}/losses_val_3.npy', np.array(self.best_val))

  def loadLossesLocally(self):
    self.best_train = np.load(f'{models_dir}/losses_train_3.npy')
    self.best_val = np.load(f'{models_dir}/losses_val_3.npy')


In [None]:
# define training hyperparameters
INIT_LR = 5*1e-4
BATCH_SIZE = 32
EPOCHS = 50

# define the train and validation splits
TRAIN_SPLIT = 0.70
VAL_TEST_SPLIT = 0.15

# set the device we will be using to train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("[INFO] device used for training...{}".format(device))

[INFO] device used for training...cuda


In [None]:
# calculate the train/validation split
print("[INFO] generating the train/validation split...")
numTrainSamples = int(len(STEAD_dataset)*TRAIN_SPLIT)
numValSamples = int(len(STEAD_dataset)*VAL_TEST_SPLIT)
numTestSamples = int(len(STEAD_dataset)-(numTrainSamples+numValSamples))

(trainData, valData, testData) = random_split(STEAD_dataset,[numTrainSamples, numValSamples, numTestSamples], generator=torch.Generator().manual_seed(19))

[INFO] generating the train/validation split...


In [None]:
# initialize the train, validation and test dataloaders
trainDataLoader = DataLoader(trainData, shuffle=True, batch_size=BATCH_SIZE, collate_fn = custom_collate_fn)
valDataLoader = DataLoader(valData, batch_size=BATCH_SIZE, collate_fn = custom_collate_fn)
testDataLoader = DataLoader(testData, batch_size=BATCH_SIZE, collate_fn = custom_collate_fn)

# calculate steps per epoch for training, validation set
trainSteps = len(trainDataLoader.dataset) // BATCH_SIZE
valSteps = len(valDataLoader.dataset) // BATCH_SIZE
testSteps = len(testDataLoader.dataset) // BATCH_SIZE

In [None]:
# load from drive previous results
previous_state = torch.load(f"{models_dir}/train_state_dict_3.pt")

# initialize the ResNet model
print("[INFO] initializing the ResNet model...")
model = ResNet(block=block, numChannels=3, outputNodes=4).to(device)
model.load_state_dict(previous_state['model_state_dict'])

# initialize the optimizer and lr_scheduler
opt = Adam(model.parameters(), lr=INIT_LR)
opt.load_state_dict(previous_state['optimizer_state_dict'])

scheduler = ReduceLROnPlateau(opt, mode='min', factor=0.1, patience=2)
scheduler.load_state_dict(previous_state['scheduler_state_dict'])

# initialize loss function and accuracy measurement
lossFn = nn.MSELoss()
r2score_metric = R2Score(multioutput='raw_values', device=device)

# initialize a dictionary to store training and evaluation history
H = {
    "magnitude_train_loss":[],
    "latitude_train_loss":[],
    "longitude_train_loss":[],
    "depth_train_loss":[],
    "magnitude_val_loss":[],
    "latitude_val_loss":[],
    "longitude_val_loss":[],
    "depth_val_loss":[],
}

# load previous loss history and last epoch
H = previous_state['train_loss_history']
last_epoch = previous_state['epoch']+1
# measure how long training is going to take
print("[INFO] training the network...")
startTime = time.time()

[INFO] initializing the ResNet model...
[INFO] training the network...


In [None]:
# loop over training epochs

early_stopper = EarlyStopping(patience = 4)
early_stopper.setBestLosses([0.63514, 0.03042 , 0.02362 , 0.05065, 2.43586],[1.00210, 0.02329 , 0.02095 , 0.07678, 3.88739])
early_stopper.setBestEpoch(12)
early_stopper.setCounter(0)
early_stopper.setMinValLoss(1.00210)

for e in range(last_epoch, EPOCHS):
  # set the model in training model
  model.train()

  # initialize the total training validation loss
  magnitudeTrainLoss = 0
  latitudeTrainLoss = 0
  longitudeTrainLoss = 0
  depthTrainLoss = 0
  generalTrainLoss = 0

  magnitudeValLoss = 0
  latitudeValLoss = 0
  longitudeValLoss = 0
  depthValLoss = 0
  generalValLoss = 0

  for sampled_batch in trainDataLoader:
    if sampled_batch is None:
      continue

    # send the input to the device
    (x, y) = (sampled_batch['spectrograms'].to(device), sampled_batch['results'].to(device))

    # perform a forward pass and calculate training loss
    pred = model(x)
    loss = lossFn(pred, y)
    # zero out the gradients, perfrom backprop step and update weights
    opt.zero_grad()
    loss.backward()
    opt.step()

    # add the loss to the total training so far
    generalTrainLoss += loss.cpu().detach().numpy()
    magnitudeTrainLoss += lossFn(pred[:,0],y[:,0]).cpu().detach().numpy()
    latitudeTrainLoss += lossFn(pred[:,1],y[:,1]).cpu().detach().numpy()
    longitudeTrainLoss += lossFn(pred[:,2],y[:,2]).cpu().detach().numpy()
    depthTrainLoss += lossFn(pred[:,3],y[:,3]).cpu().detach().numpy()

  # switch off autograd for evaluation
  with torch.no_grad():
    # set the model evaluation mode
    model.eval()

    # for loop over de validation set
    for sampled_batch in valDataLoader:
      if sampled_batch is None:
        continue

      # send the input to the device
      (x, y) = (sampled_batch['spectrograms'].to(device), sampled_batch['results'].to(device))

      # make the predictions and calculate valdiation loss
      pred = model(x)
      generalValLoss += lossFn(pred, y).cpu().detach().numpy()
      magnitudeValLoss += lossFn(pred[:,0],y[:,0]).cpu().detach().numpy()
      latitudeValLoss += lossFn(pred[:,1],y[:,1]).cpu().detach().numpy()
      longitudeValLoss += lossFn(pred[:,2],y[:,2]).cpu().detach().numpy()
      depthValLoss += lossFn(pred[:,3],y[:,3]).cpu().detach().numpy()


  # calculate the average training and validation loss
  avgGeneralTrainLoss = generalTrainLoss / trainSteps
  avgMagnitudeTrainLoss =  magnitudeTrainLoss / trainSteps
  avgLatitudeTrainLoss =  latitudeTrainLoss / trainSteps
  avgLongitudeTrainLoss =  longitudeTrainLoss / trainSteps
  avgDepthTrainLoss =  depthTrainLoss / trainSteps

  avgGeneralValLoss = generalValLoss / valSteps
  avgMagnitudeValLoss =  magnitudeValLoss / valSteps
  avgLatitudeValLoss =  latitudeValLoss / valSteps
  avgLongitudeValLoss =  longitudeValLoss / valSteps
  avgDepthValLoss =  depthValLoss / valSteps

  # Adding another step through an epoch to the scheduler
  scheduler.step(avgGeneralValLoss)

  # update training and evaluation history
  H["magnitude_train_loss"].append(avgMagnitudeTrainLoss)
  H["latitude_train_loss"].append(avgLatitudeTrainLoss)
  H["longitude_train_loss"].append(avgLongitudeTrainLoss)
  H["depth_train_loss"].append(avgDepthTrainLoss)


  H["magnitude_val_loss"].append(avgMagnitudeValLoss)
  H["latitude_val_loss"].append(avgLatitudeValLoss)
  H["longitude_val_loss"].append(avgLongitudeValLoss)
  H["depth_val_loss"].append(avgDepthValLoss)

  # saving current state dicts of the epoch and loss history - checkpoint
  torch.save({
            'epoch': e,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': opt.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'train_loss_history': H
            }, f"{models_dir}/train_state_dict_3.pt")

  # print the model training and validation information
  print("[INFO] EPOCH: {}/{} ...".format(e+1, EPOCHS))
  print("Train loss (General, Magnitude, Latitude, Longitude, Depth): {:.5f}, {:.5f} , {:.5f} , {:.5f}, {:.5f}".format(
      avgGeneralTrainLoss, avgMagnitudeTrainLoss, avgLatitudeTrainLoss, avgLongitudeTrainLoss, avgDepthTrainLoss))
  print("Val loss (General, Magnitude, Latitude, Longitude, Depth): {:.5f}, {:.5f} , {:.5f} , {:.5f}, {:.5f}".format(
      avgGeneralValLoss, avgMagnitudeValLoss, avgLatitudeValLoss, avgLongitudeValLoss, avgDepthValLoss))

  # checking if resulting loss in evaluation is the best
  if early_stopper.earlyStop(avgGeneralValLoss, (e+1), [avgGeneralTrainLoss, avgMagnitudeTrainLoss, avgLatitudeTrainLoss, avgLongitudeTrainLoss, avgDepthTrainLoss],
                [avgGeneralValLoss, avgMagnitudeValLoss, avgLatitudeValLoss, avgLongitudeValLoss, avgDepthValLoss]):
    # if it is not better - stopping train process
    print("[INFO] Early Stopping the train process. The patience has been exceeded!")
    print("===========================================================================================")
    break

  print("===========================================================================================")

# finish measuring how long training took
endTime = time.time()
print("[INFO] Total time taken to train the model: {:.2f}s".format(endTime-startTime))
print("[INFO] The best loss value was found in EPOCH {} where the performance was {:.5f}. Model's parameters saved!".format(early_stopper.getBestEpoch(), early_stopper.getBestValLosses()[0]))
early_stopper.saveLossesLocally()

[INFO] EPOCH: 13/50 ...
Train loss (General, Magnitude, Latitude, Longitude, Depth): 0.58257, 0.02870 , 0.02337 , 0.05071, 2.22750
Val loss (General, Magnitude, Latitude, Longitude, Depth): 1.00940, 0.02323 , 0.02312 , 0.04329, 3.94798
[INFO] In EPOCH 13 the loss value did not improve from 1.00210. This is the 1 EPOCH in a row.
[INFO] EPOCH: 14/50 ...
Train loss (General, Magnitude, Latitude, Longitude, Depth): 0.52979, 0.02973 , 0.02365 , 0.05393, 2.01187
Val loss (General, Magnitude, Latitude, Longitude, Depth): 1.04500, 0.02454 , 0.02718 , 0.08215, 4.04611
[INFO] In EPOCH 14 the loss value did not improve from 1.00210. This is the 2 EPOCH in a row.
[INFO] EPOCH: 15/50 ...
Train loss (General, Magnitude, Latitude, Longitude, Depth): 0.47669, 0.02950 , 0.02252 , 0.05046, 1.80427
Val loss (General, Magnitude, Latitude, Longitude, Depth): 1.09792, 0.02427 , 0.04290 , 0.33768, 3.98683
[INFO] In EPOCH 15 the loss value did not improve from 1.00210. This is the 3 EPOCH in a row.
[INFO] EPO

In [None]:
# plot the training and val losses
plt.style.use("ggplot")

# Plotting magnitude loss on train and evaluation
plt.figure("magnitude_loss").clear()
plt.plot(H["magnitude_train_loss"], label="magnitude_train_loss", linestyle="solid")
plt.plot(H["magnitude_val_loss"], label="magnitude_val_loss", linestyle="solid")
plt.title("Magnitude Loss on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="upper right")
plt.savefig(f"{plots_dir}/ResNet3_magnitude_loss.png")

# Plotting latitude loss on train and evaluation
plt.figure("latitude_loss").clear()
plt.plot(H["latitude_train_loss"], label="latitude_train_loss", linestyle="solid")
plt.plot(H["latitude_val_loss"], label="latitude_val_loss", linestyle="solid")
plt.title("Latitude Loss on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="upper right")
plt.savefig(f"{plots_dir}/ResNet3_latitude_loss.png")

# Plotting longitude loss on train and evaluation
plt.figure("longitude_loss").clear()
plt.plot(H["longitude_train_loss"], label="longitude_train_loss", linestyle="solid")
plt.plot(H["longitude_val_loss"], label="longitude_val_loss", linestyle="solid")
plt.title("Longitude Loss on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="upper right")
plt.savefig(f"{plots_dir}/ResNet3_longitude_loss.png")

# Plotting depth loss on train and evaluation
plt.figure("depth_loss").clear()
plt.plot(H["depth_train_loss"], label="depth_train_loss", linestyle="solid")
plt.plot(H["depth_val_loss"], label="depth_val_loss", linestyle="solid")
plt.title("Depth Loss on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="upper right")
plt.savefig(f"{plots_dir}/ResNet3_depth_loss.png")

In [None]:
# evaluation of the network in the test set
print("[INFO] evaluating network...")
r2_score = R2Score()
model.load_state_dict(torch.load(f"{models_dir}/ResNet_state_dict.pt"))

test_results = {
    "true_values":[],
    "pred_values":[]
}

# turn off autograd for testing evaluation
with torch.no_grad():
  # set the model in evaluation mode
  model.eval()

  # initialize the total training validation loss
  generalTestLoss = 0
  magnitudeTestLoss = 0
  latitudeTestLoss =  0
  longitudeTestLoss = 0
  depthTestLoss = 0

  # loop over the test set
  for sampled_batch in testDataLoader:
    if sampled_batch is None:
      continue

    # send the input to the device
    (x, y) = (sampled_batch['spectrograms'].to(device),sampled_batch['results'].to(device))
    test_results["true_values"].append(y.cpu().detach().numpy().tolist())

    # make the predictions and add them to the list
    pred = model(x)
    test_results["pred_values"].append(pred.cpu().detach().numpy().tolist())

    generalTestLoss += lossFn(pred, y).cpu().detach().numpy()
    magnitudeTestLoss += lossFn(pred[:,0], y[:,0]).cpu().detach().numpy()
    latitudeTestLoss += lossFn(pred[:,1], y[:,1]).cpu().detach().numpy()
    longitudeTestLoss += lossFn(pred[:,2], y[:,2]).cpu().detach().numpy()
    depthTestLoss += lossFn(pred[:,3], y[:,3]).cpu().detach().numpy()
    r2score_metric.update(pred, y)

  # generate test MSE and R2 Score
  avgGeneralTestLoss = generalTestLoss / testSteps
  avgMagnitudeTestLoss = magnitudeTestLoss / testSteps
  avgLatitudeTestLoss =  latitudeTestLoss / testSteps
  avgLongitudeTestLoss = longitudeTestLoss / testSteps
  avgDepthTestLoss =  depthTestLoss / testSteps

  r2score_value = r2score_metric.compute()

  print("[INFO] Loss/Accuracy values obtained on the test set")
  print("[INFO] Test loss (General, Magnitude, Latitude, Longitude, Depth): {:.5f}, {:.5f} , {:.5f} , {:.5f}, {:.5f}".format(
      avgGeneralTestLoss, avgMagnitudeTestLoss, avgLatitudeTestLoss, avgLongitudeTestLoss, avgDepthTestLoss))
  print("[INFO] R2 Score obtained on the test set: {}".format(r2score_value.cpu().detach().numpy()))

[INFO] evaluating network...
[INFO] Loss/Accuracy values obtained on the test set
[INFO] Test loss (General, Magnitude, Latitude, Longitude, Depth): 1.00401, 0.02468 , 0.02240 , 0.03391, 3.93505
[INFO] R2 Score obtained on the test set: [0.9100773  0.71742815 0.5779206  0.8045777 ]


In [None]:
# plotting and saving plots for Y_true - Y_pred
plt.style.use("ggplot")

"""
puncte = torch.load(f"{models_dir}/true_pred_points.pt")
"""
test_true = []
test_pred = []

for i in range(len(test_results["true_values"])):
  test_true.extend(test_results["true_values"][i])
  test_pred.extend(test_results["pred_values"][i])

test_true = np.array(test_true)
test_pred = np.array(test_pred)


torch.save({
            'true_points':test_true,
            'pred_points':test_pred
            }, f"{models_dir}/true_pred_points_resnet3.pt")


# Plotting magnitude test Y_true - Y_pred
plt.figure("magnitude_true-pred").clear()
plt.plot(test_true[:,0], test_pred[:,0], "ob")
m, b = np.polyfit(test_true[:,0], test_pred[:,0], 1)
plt.plot(test_true[:,0], m*test_true[:,0]+b,"--r")
plt.title("Magnitudine Y_estimat vs Y_observat")
plt.xlabel("Y_observat")
plt.ylabel("Y_estimat")
plt.savefig(f"{plots_dir}/ResNet3_magnitude_true-pred.png")

# Plotting latitude test Y_true - Y_pred
plt.figure("latitude_true-pred").clear()
plt.plot(test_true[:,1], test_pred[:,1], "ob")
m, b = np.polyfit(test_true[:,1], test_pred[:,1], 1)
plt.plot(test_true[:,1], m*test_true[:,1]+b,"--r")
plt.title("Latitudine Y_estimat vs Y_observat")
plt.xlabel("Y_observat")
plt.ylabel("Y_estimat")
plt.savefig(f"{plots_dir}/ResNet3_latitude_true-pred.png")

# Plotting longitude test Y_true - Y_pred
plt.figure("longitude_true-pred").clear()
plt.plot(test_true[:,2], test_pred[:,2], "ob")
m, b = np.polyfit(test_true[:,2], test_pred[:,2], 1)
plt.plot(test_true[:,2], m*test_true[:,2]+b,"--r")
plt.title("Longitudine Y_estimat vs Y_observat")
plt.xlabel("Y_observat")
plt.ylabel("Y_estimat")
plt.savefig(f"{plots_dir}/ResNet3_longitude_true-pred.png")

# Plotting depth test Y_true - Y_pred
plt.figure("depth_true-pred").clear()
plt.plot(test_true[:,3], test_pred[:,3], "ob")
m, b = np.polyfit(test_true[:,3], test_pred[:,3], 1)
plt.plot(test_true[:,3], m*test_true[:,3]+b,"--r")
plt.title("Adancime Y_estimat vs Y_observat")
plt.xlabel("Y_observat")
plt.ylabel("Y_estimat")
plt.savefig(f"{plots_dir}/ResNet3_depth_true-pred.png")


# generating table with best values obtained on train, evaluation and test
early_stopper.loadLossesLocally()

models_performance = [] # used to showcase the Loss/Accuracy values obtained
models_performance.append(["General MSE", round(early_stopper.getBestTrainLosses()[0], 5),  round(early_stopper.getBestValLosses()[0], 5), avgGeneralTestLoss])
models_performance.append(["Magnitude MSE", round(early_stopper.getBestTrainLosses()[1], 5),  round(early_stopper.getBestValLosses()[1], 5), avgMagnitudeTestLoss])
models_performance.append(["Latitude MSE", round(early_stopper.getBestTrainLosses()[2], 5),  round(early_stopper.getBestValLosses()[2], 5), avgLatitudeTestLoss])
models_performance.append(["Longitude MSE", round(early_stopper.getBestTrainLosses()[3], 5),  round(early_stopper.getBestValLosses()[3], 5), avgLongitudeTestLoss])
models_performance.append(["Depth MSE", round(early_stopper.getBestTrainLosses()[4], 5),  round(early_stopper.getBestValLosses()[4], 5), avgDepthTestLoss])

r2_score_list = r2score_value.cpu().detach().numpy().tolist()
models_performance.append(["R2 Score", r2_score_list[0], r2_score_list[1], r2_score_list[2], r2_score_list[3]])

print("----------------------------------------------------------------------")
print("Loss obtained on Train, Valdation and Test sets")
print(tabulate(models_performance[0:5], headers=["Metric", "Train", "Validation", "Test"], tablefmt="github"))

print("----------------------------------------------------------------------")
print("Accuracy obtained on the Test set")
print(tabulate([models_performance[5]], headers=["Metric", "Magnitude", "Latitude", "Longitude", "Depth"], tablefmt="github"))

# serialize the model to disk
torch.save(model, f"{models_dir}/ResNet3_model.pt")

# TODO: Justify hop_length, batch size
# TODO: Covariate shift and vanishing gradients


# De ce am loss mare pe train in prima epoca, indiferent de LR?
# Regularization - weight decay?
# Cum plotez dreapta de regresie pentru norul de puncte - numpy ployfit?
# Folosesc plotly ca sa creez harta interactiva sau pastrez versiunea cu Basemap?

# TODO: BatchNorm(and ResNet) - Caci din ce vad, eu am un plain CNN

# Concluzii rezultate obținute.
# Cum justific alegerea structurii retelei neuronale?
# Ma asteptam sa fac eu ceva de la 0 sau sa modific la o structura existenta,
# doar ca nu inteleg cum as putea justifica alegerea facuta?
# De invatat mai multe despre ResNets

# Cum justific alegerea batch size, epochs si hop_length?
# De cautat

# Tinand cont ca datasetul este de 41365, ar fi bine sa iau 41360 pentru a fi
# exacta impartirea 70% - 15% - 15%? Sau nu are relevanta
# Nu

# Este ok layer ul final adaugat de mine? (MLP (flattent)-2048-1024-3)
# Da

# Folosesc tot setul de date sau iau mai putin? Daca iau tot setul de date, ar fi
# ok sa iau si un batch size mai mare?
# Nu accelereaza antrenarea. Setul de date e perfect asa cum este. Teste pe 10000

NameError: ignored

In [None]:
import plotly.graph_objects as go
maps_dir = '/content/drive/My Drive/Map'
# Define the data for the ground truth and predicted epicenters
gt_magnitudes = test_true[:,0]
gt_latitudes = test_true[:,1]
gt_longitudes = test_true[:,2]
gt_depth = test_true[:,3]

pred_magnitudes = test_pred[:,0]
pred_latitudes = test_pred[:,1]
pred_longitudes = test_pred[:,2]
pred_depth = test_pred[:,3]

# Define the layout for the map
layout = go.Layout(
    mapbox=dict(
        center=dict(lat=33.5, lon=-116.8),
        zoom=8,
        style='stamen-terrain'
    ),
    title='Ground Truth and Predicted Epicenters')


# Define the hover text for the markers
gt_hover_text = ['Numar cutremur: {} <br>Latitudine: {}<br>Longitudine: {} <br>Magnitudine: {} <br>Adancime: {}'.format(num, lat, lon, magn, depth)
                 for num, (lat, lon, magn, depth) in enumerate(zip(gt_latitudes, gt_longitudes, gt_magnitudes, gt_depth))]
pred_hover_text = ['Numar cutremur: {} <br>Latitudine: {}<br>Longitudine: {} <br>Magnitudine: {} <br>Adancime: {}'.format(num, lat, lon, magn, depth)
                   for num, (lat, lon, magn, depth) in enumerate(zip(pred_latitudes, pred_longitudes, pred_magnitudes, pred_depth))]

# Create the scatter mapbox trace for the ground truth and predicted epicenters
gt_trace = go.Scattermapbox(
    lat=gt_latitudes,
    lon=gt_longitudes,
    mode='markers',
    marker=dict(
        color='blue'
    ),
    name='Ground Truth',
    hovertext=gt_hover_text,
    hoverinfo='text'
)

pred_trace = go.Scattermapbox(
    lat=pred_latitudes,
    lon=pred_longitudes,
    mode='markers',
    marker=dict(
        color='red'
    ),
    name='Predicted',
    hovertext=pred_hover_text,
    hoverinfo='text'
)

# Create the figure and add the traces and layout
fig = go.Figure(data=[gt_trace, pred_trace], layout=layout)

# Show the figure
fig.write_html(f"{maps_dir}/Test2_ResNet3_map.html")