In [0]:
#importing libraries
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from statistics import mean 
from sklearn.utils import resample
from sklearn.model_selection import train_test_split

In [0]:
#mounting drive
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [0]:
def data_preprocessing(X1, X2):
  INPUT_COUNT = X1.shape[1]
  #next two line of codes take mean of one input-feature set and subtracts it from all of the elements and the returns the absolute values for normalized phase diff
  X1 = X1 - np.transpose(X1.mean(axis=1).repeat(INPUT_COUNT).reshape(INPUT_COUNT, -1))
  X1 = np.absolute(X1)
  #next two line of codes take mean of one input-feature set and subtracts it from all of the elements and the returns the absolute values for normalized amplitudes
  X2 = X2 - np.transpose(X2.mean(axis=1).repeat(INPUT_COUNT).reshape(INPUT_COUNT, -1))
  X2 = np.absolute(X2)

  X = np.append(X1, X2, axis=1)

  del X1, X2
  return X

In [0]:
def data_processing(X, y, bagging=True, k=None, ratio=None, len_ensembled=None):


  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)#creating test train split

  del X, y

  X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.2)#creating test validation split

  INPUT_COUNT = X_test.shape[1]
  train_samples = np.append(X_train, y_train.reshape(-1, 1), axis=1 )#appending input features and labels into one numpy array
  del X_train
  
  train_samples_tensor = torch.tensor(train_samples)
  X_val = torch.tensor(X_val)
  X_test = torch.tensor(X_test)
  y_val = torch.tensor(y_val)
  y_test = torch.tensor(y_test)




  if bagging == False:
    return INPUT_COUNT, train_samples_tensor, X_val, X_test, y_val, y_test



  nonPS = train_samples[y_train==0]#saving the example sets with label nonPS into one array
  PS = train_samples[y_train==1]#saving the example sets with label PS into other array

  del y_train

  training_datasets_list=[]#intializing list to append all the different data-sets that will be created

  for i in range(len_ensembled):

    nonPS_sample = resample(nonPS, n_samples=int(k*ratio*len(PS)))#nonPS example-sets in the current data-set chosen with replacement, (k*ratio*(number of PS pixels)) in size
    PS_sample = resample(PS, n_samples=int(k*len(PS)))#PS example-sets in the current data-set chosen with replacement, (k * (number of PS pixels)) in size

    train_sample = np.append(nonPS_sample, PS_sample, axis=0)#appending PS and nonPS examples

    np.random.shuffle(train_sample)#shuffling the appended array

    training_datasets_list.append(train_sample)#appending the shuffled data-set to the list training_datasets_list

  del nonPS, PS

  training_datasets_list = torch.tensor(training_datasets_list)

  

  return INPUT_COUNT, training_datasets_list, X_val, X_test, y_val, y_test


In [0]:
#class to instantize network object with "h_layers" hidden layers, "nodes_phl" nodes per hidden layer  and dropout layer with probability "p" 
class Net(nn.Module):
    def __init__(self, h_layers, nodes_phl, dropout_p):
        super().__init__()

        self.h_layers = h_layers
        
        self.input = nn.Sequential(
            nn.Linear(INPUT_COUNT, nodes_phl),
            nn.BatchNorm1d(nodes_phl),
            nn.ReLU()
        )
        
        self.hidden = nn.ModuleList()
        for k in range(h_layers-1):
            self.hidden.append(nn.Sequential(
            nn.Linear(nodes_phl, nodes_phl),
            nn.BatchNorm1d(nodes_phl),
            nn.ReLU()
        ))
        
        self.output = nn.Linear(nodes_phl, 1) if h_layers>0 else nn.Linear(INPUT_COUNT, 1)
        
        self.drop_layer = nn.Dropout(p=dropout_p)
    
    def forward(self, X):#forward propogation through network

        if self.h_layers:
          X = self.input(X)
          X = self.drop_layer(X)
        for layer in self.hidden:
          X = layer(X)
          X = self.drop_layer(X)
        X = self.output(X)
        return torch.sigmoid(X)

In [0]:
# function to calculate metrics for current batch
def calc_metrics(y_pred, y_true, beta):
  epsilon = 1e-7
  '''rounded each element to either 0 for less than 0.5 or 1 for greater than 0.5'''
  y_pred = torch.round(y_pred)

  
  tp = (y_true * y_pred).sum() # true positive
  fp = ((1-y_true) * y_pred).sum() # false positive
  fn = (y_true * (1-y_pred)).sum()# calculating false negative

  precision = tp / (tp + fp + epsilon)
  recall = tp / (tp + fn + epsilon )
  fbeta = (1 + beta*beta)*tp / ((1 + beta*beta)*tp + (beta*beta)*fp + fn + epsilon)

  return fbeta, precision, recall


In [0]:
#class to create F1_loss object instance
class Fbeta_Loss(nn.Module):
    
    def __init__(self, beta=1, epsilon=1e-7):
        super().__init__()
        self.epsilon = epsilon
        self.beta = beta
        
    def forward(self, y_pred, y_true,):
        
        tp = (y_true * y_pred).sum()
        fp = ((1 - y_true) * y_pred).sum()
        fn = (y_true * (1 - y_pred)).sum()


        fbeta = (1 + self.beta*self.beta)*tp / ((1 + self.beta*self.beta)*tp + (self.beta*self.beta)*fp + fn + self.epsilon)
        return 1 - fbeta



In [0]:
#function to calculate loss and metrics
def calc_loss_n_metrics(outputs, batch_y, beta):
  outputs = outputs.view(-1)
  loss = loss_function(outputs.cpu(), batch_y.cpu())
  fbeta, precision, recall = calc_metrics(outputs.cpu(), batch_y.cpu(), beta)
  return loss, fbeta, precision, recall

In [0]:
#function to pass batch through network with some additional arguments
def fwd_pass(net, optimizer, batch_X, batch_y, train, beta):
    
    if train:
      net.train()
      net.zero_grad()
      outputs = net(batch_X)
      loss, fbeta, precision, recall = calc_loss_n_metrics(outputs, batch_y, beta)
      loss.backward()
      optimizer.step()
    
    else:
      net.eval()
      with torch.no_grad():
        outputs = net(batch_X)
      loss, fbeta, precision, recall = calc_loss_n_metrics(outputs, batch_y, beta)
    
    

    return loss, fbeta, precision, recall

In [0]:
# completing one epoch through the given dataset
def one_epoch(net, optimizer, X, y, BATCH_SIZE, beta, train=False):
  LOSS = []
  Fbeta = []
  PRECISION = []
  RECALL = []
  for i in range(0, len(X), BATCH_SIZE):
    batch_X = X[i:i+BATCH_SIZE].view(-1, INPUT_COUNT)
    batch_y = y[i:i+BATCH_SIZE].view(-1)
    
    batch_X, batch_y = batch_X.cuda(), batch_y.cuda()

    loss, fbeta, precision, recall = fwd_pass(net, optimizer, batch_X, batch_y, train, beta)
    LOSS.append(loss)
    Fbeta.append(fbeta)
    PRECISION.append(precision)
    RECALL.append(recall)
  LOSS = torch.mean(torch.stack(LOSS))
  Fbeta = torch.mean(torch.stack(Fbeta))
  PRECISION = torch.mean(torch.stack(PRECISION))
  RECALL = torch.mean(torch.stack(RECALL))

  return LOSS, Fbeta, PRECISION, RECALL


In [0]:
#function to train the network and save the list of best models
def train(training_datasets_list, metric_to_optimimze, number_of_training_datasets_to_train_on = 1, beta=1, threshold=200, h_layers=1, nodes_phl=1024, p=0.0):
    
    BATCH_SIZE = 32768
    net_list=[]

    for i in tqdm(range(number_of_training_datasets_to_train_on)):

      #instantizing network with 1 hidden layer, 1024 nodes per hidden layer and 0.0 dropout probability as obtained from hyper-parameter tuning
      net = Net(h_layers, nodes_phl, p).cuda()

      #intializing optimimzer instance
      optimizer = optim.Adam(net.parameters())

      best_val_fbeta=0
      best_val_precision=0
      best_val_recall=0

     
      best_net = Net(h_layers, nodes_phl, p).cuda()
      counter = 0


      #setting threshold epochs to terminate the training after which no improvement in the chosen validation metric is observed 
      THRESHOLD = threshold

      while(True):
        
        epoch_training_loss, epoch_training_fbeta, epoch_training_precision, epoch_training_recall = one_epoch(net, optimizer, training_datasets_list[i][:,0:-1], training_datasets_list[i][:,-1], BATCH_SIZE, beta, True)
        
        epoch_val_loss, epoch_val_fbeta, epoch_val_precision, epoch_val_recall = one_epoch(net, optimizer, X_val, y_val, BATCH_SIZE, beta, False)





        def update_counter(net, counter, epoch_val_metric, best_val_metric):

          is_best_metric = epoch_val_metric > best_val_metric
          best_val_metric = max(epoch_val_metric, best_val_metric)
          

          if is_best_metric:
            counter=0
            nonlocal best_net
            best_net.load_state_dict(net.state_dict())
             
          
          counter += 1

          return counter, best_val_metric

        
        if metric_to_optimimze=='fbeta':
          counter, best_val_fbeta = update_counter(net, counter, epoch_val_fbeta, best_val_fbeta)

        if metric_to_optimimze=='precision':
          counter, best_val_precision = update_counter(net, counter, epoch_val_precision, best_val_precision)

        if metric_to_optimimze=='recall':
          counter, best_val_recall = update_counter(net, counter, epoch_val_recall, best_val_recall)

    
        if(counter==THRESHOLD):
          net_list.append(best_net)
          break

    return net_list




In [0]:
#function to test our list of models on a test set which it has never seen before
def predict(combined_net, X_test, y_test, beta=1):
  combined_predictions_list=[]
  length = len(X_test)
  length_y = len(y_test)
  for net in combined_net:
    X_test=X_test.cuda()
    y_test=y_test.cuda()
    net.eval()
    with torch.no_grad():
      predictions = net(X_test)
    combined_predictions_list.append(predictions.cpu())
  for predictions in combined_predictions_list:
    torch.unsqueeze(predictions, 0)
  combined_predictions_tensor=torch.cat(combined_predictions_list)
  combined_predictions_tensor=combined_predictions_tensor.view(len(combined_predictions_list), length, 1)
  combined_predictions_tensor =  torch.mean(combined_predictions_tensor, 0)
  loss, fbeta, precision, recall = calc_loss_n_metrics(combined_predictions_tensor, y_test, beta)
  return combined_predictions_tensor, fbeta, precision, recall
  

In [0]:
#all hyperparameters obtained from hyperparameter tuning
ratio = 2#ratio of majority to minority class in the new data-set s created
k = 12.326192312106985#size of each data set would be k*(ratio+1)*total number of PS pixels in train data-set created
len_ensembled = 15#create 15 data-sets 15 being an arbitary number

In [0]:
#generating a list of data-set using bagging
GENERATE = False
if GENERATE:
  X1 =  np.load('/content/drive/My Drive/PSInSAR/normalized_phase_diff.npy')#loading normalized phase diff interferograms reshaped as (-1, 37) aka (-1, INPUT_CHANNELS)
  X2 = np.load('/content/drive/My Drive/PSInSAR/normalized_amplitude_reshaped.npy')#loading normalized amplitudes reshaped as (-1, 37) aka (-1, INPUT_CHANNELS)
  y =  np.load('/content/drive/My Drive/PSInSAR/labels.npy')
  X = data_preprocessing(X1, X2)
  del X1, X2
  INPUT_COUNT, training_datasets_list, X_val, X_test, y_val, y_test = data_processing(X, y, True, k, ratio, len_ensembled)
  del X, y
  torch.save({    
    'INPUT_COUNT': INPUT_COUNT,
    'training_datasets_list': training_datasets_list,
    'X_val': X_val,
    'X_test': X_test,
    'y_val':y_val,
    'y_test': y_test
            }, "/content/drive/My Drive/PSInSAR/data_splits.pt" )
else:
  data_splits = torch.load("/content/drive/My Drive/PSInSAR/data_splits.pt")
  INPUT_COUNT = data_splits['INPUT_COUNT']
  del data_splits['INPUT_COUNT']
  training_datasets_list = data_splits['training_datasets_list']
  del data_splits['training_datasets_list']
  X_val = data_splits['X_val']
  del data_splits['X_val']
  X_test = data_splits['X_test']
  del data_splits['X_test']
  y_val = data_splits['y_val']
  del data_splits['y_val']
  y_test = data_splits['y_test']
  del  data_splits['y_test']
  del data_splits

In [0]:
#instantizing loss_function 
f1_loss = Fbeta_Loss(1)
f2_loss = Fbeta_Loss(2)
f3_loss = Fbeta_Loss(3)

In [0]:
#instantizing loss_function 
fThreeFourthLoss = Fbeta_Loss(0.75)
fOneHalfLoss = Fbeta_Loss(0.5)
fOneThirdLoss = Fbeta_Loss(0.33)
fOneFourthLoss = Fbeta_Loss(0.25)

In [0]:
print(INPUT_COUNT)
print(training_datasets_list.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

74
torch.Size([15, 941880, 75])
torch.Size([954552, 74]) torch.Size([954552])
torch.Size([238639, 74]) torch.Size([238639])


In [0]:
#bagging fOneFourthscore fOneFourthloss threshold=200 data_size_len=1
loss_function = fOneFourthLoss
net_list_fOneFourth_1 = train(training_datasets_list, 'fbeta', number_of_training_datasets_to_train_on=1, beta=0.25)
_, fOneFourth, precision, recall = predict(net_list_fOneFourth_1, X_test, y_test, beta = 0.25)
print(fOneFourth, precision, recall)

100%|██████████| 1/1 [22:23<00:00, 1343.74s/it]

tensor(0.5056) tensor(0.1051) tensor(0.6637)





In [0]:
#bagging fOneThirdscore fOneThirdloss threshold=200 data_size_len=1
loss_function = fOneThirdLoss
net_list_fOneThird_1 = train(training_datasets_list, 'fbeta', number_of_training_datasets_to_train_on=1, beta=0.33)
_, fOneThird, precision, recall = predict(net_list_fOneThird_1, X_test, y_test, beta = 0.33)
print(fOneThird, precision, recall)

100%|██████████| 1/1 [16:24<00:00, 984.73s/it]

tensor(0.4641) tensor(0.1423) tensor(0.6158)





In [0]:
#bagging fOneHalfscore fOneHalfloss threshold=200 data_size_len=1
loss_function = fOneHalfLoss
net_list_fOneHalf_1 = train(training_datasets_list, 'fbeta', number_of_training_datasets_to_train_on=1, beta=0.5)
_, fOneHalf, precision, recall = predict(net_list_fOneHalf_1, X_test, y_test, beta = 0.5)
print(fOneHalf, precision, recall)

100%|██████████| 1/1 [12:46<00:00, 766.22s/it]

tensor(0.4022) tensor(0.1884) tensor(0.5615)





In [0]:
#bagging fThreeFourthscore fThreeFourthloss threshold=200 data_size_len=1
loss_function = fThreeFourthLoss
net_list_fThreeFourth_1 = train(training_datasets_list, 'fbeta', number_of_training_datasets_to_train_on=1, beta=0.75)
_, fThreeFourth, precision, recall = predict(net_list_fThreeFourth_1, X_test, y_test, beta = 0.75)
print(fThreeFourth, precision, recall)

100%|██████████| 1/1 [09:45<00:00, 585.87s/it]

tensor(0.3482) tensor(0.2228) tensor(0.5096)





In [0]:
#bagging f1score f1loss threshold=200 data_size_len=1
loss_function = f1_loss
net_list_f1_1 = train(training_datasets_list, 'fbeta', number_of_training_datasets_to_train_on=1, beta=1)
_, f1, precision, recall = predict(net_list_f1_1, X_test, y_test, beta = 1)
print(f1, precision, recall)

100%|██████████| 1/1 [07:53<00:00, 473.94s/it]

tensor(0.3188) tensor(0.2497) tensor(0.4409)





In [0]:
#bagging f2score f1loss threshold=200 data_size_len=1
loss_function = f2_loss
net_list_f2_1 = train(training_datasets_list, 'fbeta', number_of_training_datasets_to_train_on=1, beta=2)
_, f2, precision, recall = predict(net_list_f2_1, X_test, y_test, beta=2)
print(f2, precision, recall)

100%|██████████| 1/1 [10:31<00:00, 631.24s/it]

tensor(0.3063) tensor(0.2915) tensor(0.3842)





In [0]:
#bagging f3score f1loss threshold=200 data_size_len=1
loss_function = f3_loss
net_list_f3_1 = train(training_datasets_list, 'fbeta', number_of_training_datasets_to_train_on=1, beta=3)
_, f3, precision, recall = predict(net_list_f3_1, X_test, y_test, beta=3)
print(f3, precision, recall)

100%|██████████| 1/1 [06:47<00:00, 407.10s/it]

tensor(0.3208) tensor(0.3188) tensor(0.3395)





In [0]:
#bagging fOneFourthscore fONeFourthloss threshold=200 
loss_function = fOneFourthLoss
net_list_fbeta_15 = train(training_datasets_list, 'fbeta', number_of_training_datasets_to_train_on=15, beta=0.25)
state_dict_list=[]
for net in net_list_fbeta_15:
  state_dict_list.append(net.state_dict())
torch.save(state_dict_list, "/content/drive/My Drive/PSInSAR/net_list_fOneFourth_15.pt" )
_, fbeta, precision, recall = predict(net_list_fbeta_15, X_test, y_test, beta=0.25)
print(fbeta, precision, recall)

100%|██████████| 15/15 [4:34:25<00:00, 1051.32s/it]



tensor(0.5327) tensor(0.1227) tensor(0.6733)
tensor(0.5327) tensor(0.1227) tensor(0.6733)


In [0]:
#bagging fOneHalfscore fOneHalfLoss threshold=200 
loss_function = fOneHalfLoss
net_list_fbeta_15 = train(training_datasets_list, 'fbeta', number_of_training_datasets_to_train_on=15, beta=0.5)
state_dict_list=[]
for net in net_list_fbeta_15:
  state_dict_list.append(net.state_dict())
torch.save(state_dict_list, "/content/drive/My Drive/PSInSAR/net_list_fOneHalf_15.pt" )
_, fbeta, precision, recall = predict(net_list_fbeta_15, X_test, y_test, beta=0.5)
print(fbeta, precision, recall)

tensor(0.4235) tensor(0.2144) tensor(0.5599)


In [0]:
#bagging precision f1loss threshold=200 
net_list_precision_15 = train(training_datasets_list, 'precision', number_of_training_datasets_to_train_on=15)
state_dict_list=[]
for net in net_list_precision_15:
  state_dict_list.append(net.state_dict())
torch.save(state_dict_list, "/content/drive/My Drive/PSInSAR/net_list_precision_15.pt" )
_, f1, precision, recall = predict(net_list_precision_15, X_test, y_test)
print(f1, precision, recall)

In [0]:
#bagging f1score f1loss threshold=200 
net_list_f1_15 = train(training_datasets_list, 'f1', number_of_training_datasets_to_train_on=15)
state_dict_list=[]
for net in net_list_f1_15:
  state_dict_list.append(net.state_dict())
torch.save(state_dict_list, "/content/drive/My Drive/PSInSAR/net_list_f1_15.pt" )
_, f1, precision, recall = predict(net_list_f1_15, X_test, y_test)
print(f1, precision, recall)

100%|██████████| 15/15 [2:38:41<00:00, 678.52s/it]


tensor(0.3514) tensor(0.2877) tensor(0.4513)


In [0]:
#bagging recall f1loss threshold=200 
net_list_recall_15 = train(training_datasets_list, 'recall', number_of_training_datasets_to_train_on=15)
for net in net_list_recall_15:
  state_dict_list.append(net.state_dict())
torch.save(state_dict_list, "/content/drive/My Drive/PSInSAR/net_list_recall_15.pt" )
_, f1, precision, recall = predict(net_list_recall_15, X_test, y_test)
print(f1, precision, recall)

100%|██████████| 15/15 [35:29<00:00, 141.96s/it]


tensor(0.0104) tensor(0.0052) tensor(1.)


In [0]:
#Bagging precision f1loss threshold=500
loss_function = f1_loss
net_list_precision_15 = train(training_datasets_list, 'precision', number_of_training_datasets_to_train_on=15, threshold=500)
state_dict_list=[]
for net in net_list_precision_15:
  state_dict_list.append(net.state_dict())
torch.save(state_dict_list, "/content/drive/My Drive/PSInSAR/net_list_precision_15_thr_500.pt" )
_, f1, precision, recall = predict(net_list_precision_15, X_test, y_test)
print(f1, precision, recall)

100%|██████████| 15/15 [3:49:38<00:00, 853.02s/it]


tensor(0.3479) tensor(0.2985) tensor(0.4169)


In [0]:
#Bagging f1score f1loss threshold=500
net_list_f1_15 = train(training_datasets_list, 'f1', number_of_training_datasets_to_train_on=15, threshold=500)
state_dict_list=[]
for net in net_list_f1_15:
  state_dict_list.append(net.state_dict())
torch.save(state_dict_list, "/content/drive/My Drive/PSInSAR/net_list_f1_15_thr_500.pt" )
_, f1, precision, recall = predict(net_list_f1_15, X_test, y_test)
print(f1, precision, recall)

100%|██████████| 15/15 [4:57:55<00:00, 1244.46s/it]


tensor(0.3508) tensor(0.2869) tensor(0.4513)


In [0]:
#original dataset
X1 =  np.load('/content/drive/My Drive/PSInSAR/normalized_phase_diff.npy')#loading normalized phase diff interferograms reshaped as (-1, 37) aka (-1, INPUT_CHANNELS)
X2 = np.load('/content/drive/My Drive/PSInSAR/normalized_amplitude_reshaped.npy')#loading normalized amplitudes reshaped as (-1, 37) aka (-1, INPUT_CHANNELS)
y =  np.load('/content/drive/My Drive/PSInSAR/labels.npy')
X = data_preprocessing(X1, X2)
del X1, X2
INPUT_COUNT, training_datasets_list_no_baggaging, X_val, X_test, y_val, y_test = data_processing(X, y, False)
del X, y

In [0]:
#noBagging precision f1loss threshold=200
loss_function = f1_loss
net_list_precision_1_no_b = train([training_datasets_list_no_baggaging], 'precision')
state_dict_list=[]
for net in net_list_precision_1_no_b:
  state_dict_list.append(net.state_dict())
_, f1, precision, recall = predict(net_list_precision_1_no_b, X_test, y_test)
print(f1, precision, recall)

100%|██████████| 1/1 [35:23<00:00, 2123.43s/it]

tensor(0.0500) tensor(0.5000) tensor(0.0263)





In [0]:
#noBagging f1score f1loss threshold=200
loss_function = f1_loss
net_list_f1_1_no_b = train([training_datasets_list_no_baggaging], 'fbeta')
state_dict_list=[]
for net in net_list_f1_1_no_b:
  state_dict_list.append(net.state_dict())
_, f1, precision, recall = predict(net_list_f1_1_no_b, X_test, y_test)
print(f1, precision, recall)

100%|██████████| 1/1 [14:43<00:00, 883.83s/it]

tensor(0.2563) tensor(0.2269) tensor(0.2945)



