In [1]:
# Deep Learning
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio 

import os
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score,accuracy_score, precision_score, recall_score, classification_report, roc_auc_score
from scipy.special import softmax
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from tqdm import tqdm

## Define Model
    ### a 4-layer convolutional model applied to the melspectrogram. The prediction is for the central frame for a period of 3 seconds. Each frame is joined with 46 (1.5 seconds) frames before and after to add context. 

In [2]:
# VAD model
class Conv_2d(nn.Module):
    def __init__(self, input_channels, output_channels, shape=3, stride=1, pooling=2):
        super(Conv_2d, self).__init__()
        self.conv = nn.Conv2d(input_channels, output_channels, shape, stride=stride, padding=shape//2)
        self.bn = nn.BatchNorm2d(output_channels)
        self.relu = nn.ReLU()
        self.mp = nn.MaxPool2d(pooling)
    def forward(self, x):
        out = self.mp(self.relu(self.bn(self.conv(x))))
        #out = self.mp(self.relu(self.conv(x)))
        return out

class VAD(nn.Module):
    def __init__(self):
        super(VAD, self).__init__()
        self.a_norming = nn.BatchNorm2d(1) 
        self.to_db = torchaudio.transforms.AmplitudeToDB() 

        self.conv1 = Conv_2d(1,32)
        self.conv2 = Conv_2d(32,64)
        self.conv3 = Conv_2d(64,128)
        self.conv4 = Conv_2d(128,256)
        
        self.a_fc1 =  nn.Linear(10240, 512)
        self.a_fc2 = nn.Linear(512, 256)
        self.a_fc3 = nn.Linear(256, 128)       

        self.drop = nn.Dropout(p=0.3)
        self.logits  = nn.Linear(128, 1)
        
    def forward(self,audio_input):
        #Audio Branch 
        audio_db = self.to_db(audio_input) #[FIX! think need to upgrade torch]
        audio_norm = self.a_norming(audio_db) 
        
        x_audio = self.conv1(audio_norm)
        x_audio = self.conv2(x_audio)
        x_audio = self.conv3(x_audio)
        x_audio = self.conv4(x_audio)

        x_audio = x_audio.view(x_audio.size(0), -1)
        x_audio = F.relu(self.a_fc1(x_audio))
        x_audio = F.relu(self.a_fc2(x_audio))
        x_audio = F.relu(self.a_fc3(x_audio))
        
        #Merged Branch
        x_audio = self.drop(x_audio)
        logits = self.logits(x_audio)
        output = torch.sigmoid(logits)
        return output, logits

In [3]:
# get VAD
def get_VAD(device):
    # Define loss and optimizer
    vad_model = VAD()
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(vad_model.parameters(), lr=0.001, weight_decay=1e-4)
    vad_model.to(device)
    return vad_model, optimizer, criterion

In [52]:
# Training loop
def train_vad(vad_model, train_loader, optimizer, criterion):
    for epoch in range(NUM_EPOCHS):  # loop over the dataset multiple times
        vad_model.train()
        epoch_loss = 0.0
        correct = 0
        # iterate the training set
        with tqdm(train_loader, unit="batch") as tepoch:
            for data in tepoch:
                #tepoch.set_description(f"Epoch {epoch+1}")

                # I split each input into 3 second segments (those together will make a batch)
                mel_in = data[0].to(device)
                labels = torch.squeeze(data[1]).to(device)

                # Choosing 3 seconds partitioning -> 92 frames
                half_window = int(FRAMES_3SEC/2)
                padded_mel = torch.zeros(1,1,128,mel_in.shape[3] + FRAMES_3SEC) #Padding input with 3 seconds of silence
                padded_mel[:,:,:,half_window:mel_in.shape[3]+half_window] = mel_in

                #for batch in np.arange(0,num_batches):
                partitioned_mels_3secs = torch.zeros(mel_in.shape[3],1,128,FRAMES_3SEC)
                label_centerframe = torch.zeros(mel_in.shape[3], 1)

                # Process all the frames (which starts from half_window in the padded mel, and lasts for all frames)
                for idx, central_frame in enumerate(np.arange(half_window,mel_in.shape[3]+half_window-1,1)):
                    partitioned_mels_3secs[idx,:,:,:] = padded_mel[:,:,:,central_frame-half_window:central_frame+half_window]
                    label_centerframe[idx] = labels[idx] # I should just use labels, now they are identical [obsolete]
                    
                partitioned_mels_3secs = partitioned_mels_3secs.to(device)
                label_centerframe = label_centerframe.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs,logits = vad_model(partitioned_mels_3secs) # (here each "batch" is a partitioned frame)
                loss = criterion(logits, label_centerframe)
                loss.backward()
                optimizer.step()

                rounded_output = torch.round(outputs.data)
                sample_accuracy  = (rounded_output == label_centerframe).sum().item() / label_centerframe.shape[0]

                # compute epoch loss
                epoch_loss += loss.item()
                tepoch.set_postfix(loss=loss.item(), sample_accuracy=sample_accuracy)

    print('Finished Training')

In [19]:
# Evaluation scripts [Not used eventually]
def evaluate_model(test_pred_prob, test_pred, test_classes):
    # Accuracy
    accuracy = 100 * accuracy_score(test_classes, test_pred)
    print("Exact match accuracy is: " + str(accuracy) + "%")
    # Area Under the Receiver Operating Characteristic Curve (ROC AUC)
    auc_roc = roc_auc_score(test_classes, test_pred_prob)
    print("Macro Area Under the Curve (AUC) is: " + str(auc_roc))
    return accuracy, auc_roc

In [51]:
# Evaluation of the model
def test_VAD(vad_model, test_loader, results_path):
    # Initialize lists where we save all results and GT
    all_groundtruth = []
    all_predictions = []
    all_predictions_probs = []
    
    vad_model.eval()
    with torch.no_grad():
        for step, data in enumerate(test_loader):
            if(step%10 == 0):
                print("Sample number: " + str(step) + " out of: " + str(len(test_loader)))
            mel_in = data[0].to(device)
            labels = torch.squeeze(data[1]).to(device)
            
            # Choosing 3 seconds partitioning -> 92 frames
            half_window = int(FRAMES_3SEC/2)
            padded_mel = torch.zeros(1,1,128,mel_in.shape[3] + FRAMES_3SEC) #Padding input to have 3 seconds of silence
            padded_mel[:,:,:,half_window:mel_in.shape[3]+half_window] = mel_in

            #for batch in np.arange(0,num_batches):
            partitioned_mels_3secs = torch.zeros(mel_in.shape[3],1,128,FRAMES_3SEC)
            label_centerframe = torch.zeros(mel_in.shape[3], 1)

            # Process all the frames (which starts from half_window in the padded mel, and lasts for all frames)
            for idx, central_frame in enumerate(np.arange(half_window,mel_in.shape[3]+half_window-1,1)):
                partitioned_mels_3secs[idx,:,:,:] = padded_mel[:,:,:,central_frame-half_window:central_frame+half_window]
                label_centerframe[idx] = labels[idx] # I should just use labels, now they are identical

            partitioned_mels_3secs = partitioned_mels_3secs.to(device)
            label_centerframe = label_centerframe.to(device)
            
            outputs, logits = vad_model(partitioned_mels_3secs)
            
            rounded_output = torch.round(outputs.data)
            
            all_groundtruth.append(label_centerframe.cpu())
            all_predictions.append(rounded_output.cpu())
            all_predictions_probs.append(outputs.cpu())

    #accuracy_out, auc_roc = evaluate_model(all_predictions_probs,all_predictions, all_groundtruth)
    #results = create_analysis_report(test_pred_prob, test_labels, labels_list)
    
    #np.save(results_path + "VAD_test_gt.npy", test_labels)
    #np.save(results_path + "VAD_pred_prob.npy", test_pred_prob)
    #results.to_csv(results_path + "VAD_report.csv")
    return all_groundtruth, all_predictions, all_predictions_probs

## Make dataset pipeline

In [7]:
# Defining dataset pipeline 
class VAD_Dataset(torch.utils.data.Dataset):
    def __init__(self, data_directory,  device = 'cpu'):
        filenames = os.listdir(data_directory)
        self.df = pd.DataFrame(filenames)
        self.data_directory = data_directory
        self.device = device

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        file_id = self.df.loc[index].values[0]
        data = np.load(self.data_directory + str(file_id))
        spectrogram = torch.from_numpy(data['mel'])
        label = torch.from_numpy(data['labels']) 
        if(spectrogram.dim() == 2):
            spectrogram = torch.unsqueeze(spectrogram,0)
        return spectrogram , label

In [8]:
# initiating dataloader 
def initialize_dataloaders(trainDataDir, testDataDir):        
    train_instance = VAD_Dataset(trainDataDir)
    test_instance = VAD_Dataset(testDataDir)
    
    # I am setting the batch size to 1, because I will be batching each input file 
    # by partitioning around moving central frame
    train_loader = torch.utils.data.DataLoader(train_instance,batch_size=1,shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_instance,batch_size=1,shuffle=False)
    
    #validation_instance = VAD_Dataset("MAKE_VALIDATION")
    #valid_loader = torch.utils.data.DataLoader(validation_instance,batch_size=1,shuffle=True)

    return train_loader, test_loader

## Train on original recording without spatial processing

In [80]:
trainDataDir = "/srv/workspace/research/mounted/vad_train_set/original_mels_labels/"
testDataDir = "/srv/workspace/research/mounted/vad_test_set/original_mels_labels/" 
results_path = "/srv/workspace/research/mounted/results/"
model_save_path = "/srv/workspace/research/mounted/saved_models/"

In [81]:
NUM_EPOCHS = 10
#BATCH_SIZE = 32 # (Replaced batches with partitioned frames)
FRAMES_3SEC = 92
# Early stop parameters (not applied yet)
#min_val_loss = 10**5 #just initialize with random big number 
#epochs_no_improve = 5
#n_epochs_stop = 10

device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
print("Using device: " + str(device))

# Load datasets
train_loader, test_loader = initialize_dataloaders(trainDataDir, testDataDir)

# Get model and train
vad_model, optimizer, criterion = get_VAD(device)
train_vad(vad_model, train_loader, optimizer, criterion)

model_name = model_save_path + "noSpatialProcessing"
torch.save(vad_model.state_dict(),model_name)
torch.cuda.empty_cache()
print("================================================================")

  0%|          | 0/957 [00:00<?, ?batch/s]

Using device: cuda:2


100%|██████████| 957/957 [03:38<00:00,  4.31batch/s, loss=0.059, sample_accuracy=0.983] 
100%|██████████| 957/957 [03:39<00:00,  4.41batch/s, loss=0.0276, sample_accuracy=0.993]
100%|██████████| 957/957 [03:38<00:00,  4.30batch/s, loss=0.0469, sample_accuracy=0.987]
100%|██████████| 957/957 [03:39<00:00,  4.29batch/s, loss=0.0452, sample_accuracy=0.984]
100%|██████████| 957/957 [03:39<00:00,  4.78batch/s, loss=0.0425, sample_accuracy=0.986]
100%|██████████| 957/957 [03:39<00:00,  4.21batch/s, loss=0.0339, sample_accuracy=0.989]
100%|██████████| 957/957 [03:38<00:00,  4.83batch/s, loss=0.0155, sample_accuracy=0.996]
100%|██████████| 957/957 [03:38<00:00,  4.09batch/s, loss=0.0465, sample_accuracy=0.983]
100%|██████████| 957/957 [03:38<00:00,  4.59batch/s, loss=0.0351, sample_accuracy=0.99]  
100%|██████████| 957/957 [03:38<00:00,  4.22batch/s, loss=0.0783, sample_accuracy=0.975] 

Finished Training





In [82]:
# Run model on the testset
all_groundtruth, all_predictions, all_predictions_probs = test_VAD(vad_model, test_loader, results_path)

Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96


In [83]:
# Propoerly format output for evaluation
flat_gt = np.asarray([item.numpy()[0] for sublist in all_groundtruth for item in sublist])
flat_predictions_probs = np.asarray([item.numpy()[0] for sublist in all_predictions_probs for item in sublist])
flat_predictions = np.asarray([item.numpy()[0] for sublist in all_predictions for item in sublist])

In [84]:
# Evaluating the model
accuracy  = (flat_predictions == flat_gt).sum() / len(flat_gt)
print ("Accuracy = %0.2f%%" % (accuracy*100))

auc_roc = roc_auc_score(flat_gt, flat_predictions_probs)
print ("AUC = %0.3f" % auc_roc)

true_positives_ratio_perclass = sum((flat_predictions == flat_gt) * (flat_gt == 1)) / sum(flat_gt)
print ("SHR =  %0.3f" % true_positives_ratio_perclass)

true_negative_ratio_perclass = sum((flat_predictions == flat_gt) * (flat_gt == 0)) / (len(flat_gt) - sum(flat_gt))
print ("NHR =  %0.3f" % true_negative_ratio_perclass)

precision = precision_score(flat_gt, flat_predictions)
print ("Precision = %0.3f" % precision)

recall = recall_score(flat_gt, flat_predictions)
print ("Recall (same as SHR) =  %0.3f" % recall)

f1 = f1_score(flat_gt, flat_predictions)
print ("F1-score = %0.3f" % f1)

Accuracy = 92.64%
AUC = 0.985
SHR =  0.904
NHR =  0.966
Precision = 0.979
Recall (same as SHR) =  0.904
F1-score = 0.940


## Apply model on all possible spatial modules

In [86]:
# delay-and-sum based models (In this notebook I didn't do both together because the data wasn't ready yet)
spatial_modules = ["das", "das_spectral", "das_wiener", "das_spectral_filtered", "das_wiener_filtered"]
#                  ,"mvdr", "mvdr_spectral", "mvdr_wiener", "mvdr_spectral_filtered", "mvdr_wiener_filtered"]

In [87]:
# The main training and testing loop
NUM_EPOCHS = 5
FRAMES_3SEC = 92
results_path = "/srv/workspace/research/mounted/results/"
model_save_path = "/srv/workspace/research/mounted/saved_models/"
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
print("Using device: " + str(device))

# Iterate through all the spatial modules
for spatial_module in spatial_modules:
    print("================================================================")
    print("Running experiment: " + spatial_module)
    # Load the preprocessed datasets based on the module
    trainDataDir = "/srv/workspace/research/mounted/vad_train_set/" + spatial_module + "_mels_labels/"
    testDataDir = "/srv/workspace/research/mounted/vad_test_set/" + spatial_module + "_mels_labels/"
    train_loader, test_loader = initialize_dataloaders(trainDataDir, testDataDir)

    # Training the model
    vad_model, optimizer, criterion = get_VAD(device)
    train_vad(vad_model, train_loader, optimizer, criterion)
    model_name = model_save_path + spatial_module
    torch.save(vad_model.state_dict(),model_name)
    
    # Testing the model 
    all_groundtruth, all_predictions, all_predictions_probs = test_VAD(vad_model, test_loader, results_path)
    flat_gt = np.asarray([item.numpy()[0] for sublist in all_groundtruth for item in sublist])
    flat_predictions_probs = np.asarray([item.numpy()[0] for sublist in all_predictions_probs for item in sublist])
    flat_predictions = np.asarray([item.numpy()[0] for sublist in all_predictions for item in sublist])
    
    accuracy  = (flat_predictions == flat_gt).sum() / len(flat_gt)
    print ("Accuracy = %0.2f%%" % (accuracy*100))
    auc_roc = roc_auc_score(flat_gt, flat_predictions_probs)
    print ("AUC = %0.3f" % auc_roc)
    true_positives_ratio_perclass = sum((flat_predictions == flat_gt) * (flat_gt == 1)) / sum(flat_gt)
    print ("SHR (Recall) =  %0.3f" % true_positives_ratio_perclass)
    true_negative_ratio_perclass = sum((flat_predictions == flat_gt) * (flat_gt == 0)) / (len(flat_gt) - sum(flat_gt))
    print ("NHR =  %0.3f" % true_negative_ratio_perclass)
    precision = precision_score(flat_gt, flat_predictions)
    print ("Precision = %0.3f" % precision)
    f1 = f1_score(flat_gt, flat_predictions)
    print ("F1-score = %0.3f" % f1)
    
    torch.cuda.empty_cache()
    print("================================================================\n\n")

  0%|          | 0/957 [00:00<?, ?batch/s]

Using device: cuda:2
Running experiment: das


100%|██████████| 957/957 [03:41<00:00,  4.56batch/s, loss=0.0632, sample_accuracy=0.978]
100%|██████████| 957/957 [03:45<00:00,  4.11batch/s, loss=0.0656, sample_accuracy=0.977]
100%|██████████| 957/957 [03:43<00:00,  3.74batch/s, loss=0.0684, sample_accuracy=0.971]
100%|██████████| 957/957 [03:45<00:00,  4.07batch/s, loss=0.064, sample_accuracy=0.979] 
100%|██████████| 957/957 [03:45<00:00,  3.97batch/s, loss=0.0449, sample_accuracy=0.983]


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 94.72%
AUC = 0.991


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.936
NHR =  0.966
Precision = 0.980
F1-score = 0.958


Running experiment: das_spectral


100%|██████████| 957/957 [03:43<00:00,  4.11batch/s, loss=0.0641, sample_accuracy=0.978]
100%|██████████| 957/957 [03:44<00:00,  3.97batch/s, loss=0.067, sample_accuracy=0.983] 
100%|██████████| 957/957 [03:45<00:00,  3.68batch/s, loss=0.0676, sample_accuracy=0.978]
100%|██████████| 957/957 [03:45<00:00,  5.43batch/s, loss=0.0256, sample_accuracy=0.988] 
100%|██████████| 957/957 [03:45<00:00,  3.86batch/s, loss=0.0868, sample_accuracy=0.974] 


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 96.41%
AUC = 0.993


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.976
NHR =  0.943
Precision = 0.968
F1-score = 0.972


Running experiment: das_wiener


100%|██████████| 957/957 [03:44<00:00,  3.89batch/s, loss=0.0938, sample_accuracy=0.972]
100%|██████████| 957/957 [03:45<00:00,  5.09batch/s, loss=0.0309, sample_accuracy=0.99] 
100%|██████████| 957/957 [03:43<00:00,  6.02batch/s, loss=0.0839, sample_accuracy=0.964]
100%|██████████| 957/957 [03:43<00:00,  4.48batch/s, loss=0.0847, sample_accuracy=0.965]
100%|██████████| 957/957 [03:44<00:00,  3.94batch/s, loss=0.0753, sample_accuracy=0.966]


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 94.13%
AUC = 0.984


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.958
NHR =  0.912
Precision = 0.951
F1-score = 0.954


Running experiment: das_spectral_filtered


100%|██████████| 957/957 [03:45<00:00,  4.05batch/s, loss=0.256, sample_accuracy=0.955] 
100%|██████████| 957/957 [03:44<00:00,  4.42batch/s, loss=0.0481, sample_accuracy=0.983]
100%|██████████| 957/957 [03:44<00:00,  4.78batch/s, loss=0.0433, sample_accuracy=0.98] 
100%|██████████| 957/957 [03:43<00:00,  4.20batch/s, loss=0.0406, sample_accuracy=0.984]
100%|██████████| 957/957 [03:44<00:00,  4.06batch/s, loss=0.0664, sample_accuracy=0.979]


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 93.89%
AUC = 0.991
SHR (Recall) =  0.916
NHR =  0.980
Precision = 0.988
F1-score = 0.950


Running experiment: das_wiener_filtered


100%|██████████| 957/957 [03:43<00:00,  4.16batch/s, loss=0.0631, sample_accuracy=0.975]
100%|██████████| 957/957 [03:45<00:00,  4.27batch/s, loss=0.0363, sample_accuracy=0.991]
100%|██████████| 957/957 [03:44<00:00,  4.80batch/s, loss=0.0352, sample_accuracy=0.99] 
100%|██████████| 957/957 [03:44<00:00,  4.14batch/s, loss=0.0888, sample_accuracy=0.969]
100%|██████████| 957/957 [03:44<00:00,  4.42batch/s, loss=0.114, sample_accuracy=0.975] 


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 94.44%
AUC = 0.987
SHR (Recall) =  0.958
NHR =  0.920
Precision = 0.955
F1-score = 0.956




In [88]:
# mvdr based models (I reran the experiments once the data was ready)
spatial_modules = ["mvdr", "mvdr_spectral", "mvdr_wiener", "mvdr_spectral_filtered", "mvdr_wiener_filtered"]

In [90]:
NUM_EPOCHS = 5
FRAMES_3SEC = 92
results_path = "/srv/workspace/research/mounted/results/"
model_save_path = "/srv/workspace/research/mounted/saved_models/"
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
print("Using device: " + str(device))

for spatial_module in spatial_modules:
    print("================================================================")
    print("Running experiment: " + spatial_module)
    trainDataDir = "/srv/workspace/research/mounted/vad_train_set/" + spatial_module + "_mels_labels/"
    testDataDir = "/srv/workspace/research/mounted/vad_test_set/" + spatial_module + "_mels_labels/"

    train_loader, test_loader = initialize_dataloaders(trainDataDir, testDataDir)

    # Training the model
    vad_model, optimizer, criterion = get_VAD(device)
    train_vad(vad_model, train_loader, optimizer, criterion)
    model_name = model_save_path + spatial_module
    torch.save(vad_model.state_dict(),model_name)
    
    # Testing the model 
    all_groundtruth, all_predictions, all_predictions_probs = test_VAD(vad_model, test_loader, results_path)
    flat_gt = np.asarray([item.numpy()[0] for sublist in all_groundtruth for item in sublist])
    flat_predictions_probs = np.asarray([item.numpy()[0] for sublist in all_predictions_probs for item in sublist])
    flat_predictions = np.asarray([item.numpy()[0] for sublist in all_predictions for item in sublist])
    
    accuracy  = (flat_predictions == flat_gt).sum() / len(flat_gt)
    print ("Accuracy = %0.2f%%" % (accuracy*100))
    auc_roc = roc_auc_score(flat_gt, flat_predictions_probs)
    print ("AUC = %0.3f" % auc_roc)
    true_positives_ratio_perclass = sum((flat_predictions == flat_gt) * (flat_gt == 1)) / sum(flat_gt)
    print ("SHR (Recall) =  %0.3f" % true_positives_ratio_perclass)
    true_negative_ratio_perclass = sum((flat_predictions == flat_gt) * (flat_gt == 0)) / (len(flat_gt) - sum(flat_gt))
    print ("NHR =  %0.3f" % true_negative_ratio_perclass)
    precision = precision_score(flat_gt, flat_predictions)
    print ("Precision = %0.3f" % precision)
    f1 = f1_score(flat_gt, flat_predictions)
    print ("F1-score = %0.3f" % f1)
    
    torch.cuda.empty_cache()
    print("================================================================\n\n")

  0%|          | 0/957 [00:00<?, ?batch/s]

Using device: cuda:2
Running experiment: mvdr


100%|██████████| 957/957 [03:40<00:00,  4.05batch/s, loss=0.0575, sample_accuracy=0.98] 
100%|██████████| 957/957 [03:40<00:00,  4.07batch/s, loss=0.0813, sample_accuracy=0.97] 
100%|██████████| 957/957 [03:40<00:00,  3.93batch/s, loss=0.0443, sample_accuracy=0.981] 
100%|██████████| 957/957 [03:40<00:00,  4.12batch/s, loss=0.0687, sample_accuracy=0.972]
100%|██████████| 957/957 [03:40<00:00,  3.72batch/s, loss=0.056, sample_accuracy=0.98]  


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 94.75%
AUC = 0.992


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.935
NHR =  0.969
Precision = 0.982
F1-score = 0.958


Running experiment: mvdr_spectral


100%|██████████| 957/957 [03:41<00:00,  4.09batch/s, loss=0.236, sample_accuracy=0.905] 
100%|██████████| 957/957 [03:41<00:00,  4.60batch/s, loss=0.0535, sample_accuracy=0.979]
100%|██████████| 957/957 [03:40<00:00,  5.27batch/s, loss=0.0329, sample_accuracy=0.978] 
100%|██████████| 957/957 [03:41<00:00,  4.26batch/s, loss=0.165, sample_accuracy=0.939] 
100%|██████████| 957/957 [03:40<00:00,  3.93batch/s, loss=0.0752, sample_accuracy=0.979]


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 94.67%
AUC = 0.990


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.985
NHR =  0.880
Precision = 0.935
F1-score = 0.959


Running experiment: mvdr_wiener


100%|██████████| 957/957 [03:40<00:00,  4.88batch/s, loss=0.297, sample_accuracy=0.846] 
100%|██████████| 957/957 [03:40<00:00,  4.66batch/s, loss=0.0517, sample_accuracy=0.979]
100%|██████████| 957/957 [03:40<00:00,  4.13batch/s, loss=0.0805, sample_accuracy=0.975]
100%|██████████| 957/957 [03:40<00:00,  4.22batch/s, loss=0.104, sample_accuracy=0.937] 
100%|██████████| 957/957 [03:40<00:00,  4.20batch/s, loss=0.0721, sample_accuracy=0.973]


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 92.65%
AUC = 0.979


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.936
NHR =  0.911
Precision = 0.948
F1-score = 0.942


Running experiment: mvdr_spectral_filtered


100%|██████████| 957/957 [03:40<00:00,  4.52batch/s, loss=0.0394, sample_accuracy=0.979]
100%|██████████| 957/957 [03:40<00:00,  4.87batch/s, loss=0.0593, sample_accuracy=0.978]
100%|██████████| 957/957 [03:41<00:00,  4.46batch/s, loss=0.053, sample_accuracy=0.974] 
100%|██████████| 957/957 [03:40<00:00,  5.05batch/s, loss=0.0701, sample_accuracy=0.98] 
100%|██████████| 957/957 [03:40<00:00,  4.45batch/s, loss=0.12, sample_accuracy=0.945]   


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 93.84%
AUC = 0.982


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.957
NHR =  0.907
Precision = 0.947
F1-score = 0.952


Running experiment: mvdr_wiener_filtered


100%|██████████| 957/957 [03:41<00:00,  4.00batch/s, loss=0.362, sample_accuracy=0.861] 
100%|██████████| 957/957 [03:40<00:00,  4.04batch/s, loss=0.0654, sample_accuracy=0.979]
100%|██████████| 957/957 [03:40<00:00,  4.85batch/s, loss=0.13, sample_accuracy=0.961]  
100%|██████████| 957/957 [03:40<00:00,  3.89batch/s, loss=0.196, sample_accuracy=0.903] 
100%|██████████| 957/957 [03:40<00:00,  4.03batch/s, loss=0.0757, sample_accuracy=0.97] 


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 93.33%
AUC = 0.982
SHR (Recall) =  0.963
NHR =  0.881
Precision = 0.934
F1-score = 0.948




## Apply post-processing
Planned but not achieved