In [52]:
# Deep Learning
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio 

import os
import pandas as pd
import numpy as np
import time
from sklearn.metrics import f1_score,accuracy_score, precision_score, recall_score, classification_report, roc_auc_score
from scipy.special import softmax
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from tqdm import tqdm

## Define Model
    This model is identical to the CNN model, except for adding an LSTM layer instead of the dense layers

In [39]:
# VAD model
class Conv_2d(nn.Module):
    def __init__(self, input_channels, output_channels, shape=3, stride=1, pooling=2):
        super(Conv_2d, self).__init__()
        self.conv = nn.Conv2d(input_channels, output_channels, shape, stride=stride, padding=shape//2)
        self.bn = nn.BatchNorm2d(output_channels)
        self.relu = nn.ReLU()
        self.mp = nn.MaxPool2d(pooling)
    def forward(self, x):
        out = self.mp(self.relu(self.bn(self.conv(x))))
        #out = self.mp(self.relu(self.conv(x)))
        return out

class VAD(nn.Module):
    def __init__(self):
        super(VAD, self).__init__()
        self.a_norming = nn.BatchNorm2d(1) 
        self.to_db = torchaudio.transforms.AmplitudeToDB() 

        self.conv1 = Conv_2d(1,32)
        self.conv2 = Conv_2d(32,64)
        self.conv3 = Conv_2d(64,128)
        self.conv4 = Conv_2d(128,256)
        
        self.a_fc1 =  nn.Linear(10240, 512)
        
        self.lstm = nn.LSTM(512, 128, 2, batch_first = True)
        
        #self.a_fc2 = nn.Linear(512, 256)
        #self.a_fc3 = nn.Linear(256, 128)       

        self.drop = nn.Dropout(p=0.3)
        self.logits  = nn.Linear(128, 1)
        
    def forward(self,audio_input):
        #Audio Branch 
        audio_db = self.to_db(audio_input)
        audio_norm = self.a_norming(audio_db) 
        
        x_audio = self.conv1(audio_norm)
        x_audio = self.conv2(x_audio)
        x_audio = self.conv3(x_audio)
        x_audio = self.conv4(x_audio)

        x_audio = x_audio.view(x_audio.size(0), -1)
        x_audio = F.relu(self.a_fc1(x_audio))
        x_audio = torch.unsqueeze(x_audio, 0) # So what I do here is to swap the batch_dim with the seq_dim
        x_audio,_ = self.lstm(x_audio)
        #x_audio = F.relu(self.a_fc1(x_audio))
        #x_audio = F.relu(self.a_fc2(x_audio))
        #x_audio = F.relu(self.a_fc3(x_audio))
        
        x_audio = self.drop(x_audio)
        logits = self.logits(x_audio)
        output = torch.sigmoid(logits)
        output = torch.squeeze(output, 0)
        logits = torch.squeeze(logits, 0)
        return output, logits

In [40]:
# get VAD
def get_VAD(device):
    # Define loss and optimizer
    vad_model = VAD()
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(vad_model.parameters(), lr=0.001, weight_decay=1e-4)
    vad_model.to(device)
    return vad_model, optimizer, criterion

In [41]:
# Training loop
def train_vad(vad_model, train_loader, optimizer, criterion):
    for epoch in range(NUM_EPOCHS):  # loop over the dataset multiple times
        vad_model.train()
        epoch_loss = 0.0
        correct = 0
        # iterate the training set
        with tqdm(train_loader, unit="batch") as tepoch:
            for data in tepoch:
                #tepoch.set_description(f"Epoch {epoch+1}")

                # I split each input into 3 second segments (those together will make a batch)
                mel_in = data[0].to(device)
                labels = torch.squeeze(data[1]).to(device)

                # Choosing 3 seconds partitioning -> 92 frames
                half_window = int(FRAMES_3SEC/2)
                padded_mel = torch.zeros(1,1,128,mel_in.shape[3] + FRAMES_3SEC) #Padding input to have 3 seconds of silence
                padded_mel[:,:,:,half_window:mel_in.shape[3]+half_window] = mel_in
                #num_batches = (padded_mel.shape[3] - FRAMES_3SEC) / BATCH_SIZE # Because we will ignore the first 92 frames

                #for batch in np.arange(0,num_batches):
                partitioned_mels_3secs = torch.zeros(mel_in.shape[3],1,128,FRAMES_3SEC)
                label_centerframe = torch.zeros(mel_in.shape[3], 1)

                # Process all the frames (which starts from half_window in the padded mel, and lasts for all frames)
                for idx, central_frame in enumerate(np.arange(half_window,mel_in.shape[3]+half_window-1,1)):
                    partitioned_mels_3secs[idx,:,:,:] = padded_mel[:,:,:,central_frame-half_window:central_frame+half_window]
                    label_centerframe[idx] = labels[idx] # I should just use labels, now they are identical
                    
                partitioned_mels_3secs = partitioned_mels_3secs.to(device)
                label_centerframe = label_centerframe.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs,logits = vad_model(partitioned_mels_3secs) # (here each "batch" is a partitioned frame)
                loss = criterion(logits, label_centerframe)
                loss.backward()
                optimizer.step()

                rounded_output = torch.round(outputs.data)
                sample_accuracy  = (rounded_output == label_centerframe).sum().item() / label_centerframe.shape[0]

                # compute epoch loss
                epoch_loss += loss.item()
                tepoch.set_postfix(loss=loss.item(), sample_accuracy=sample_accuracy)

    print('Finished Training')

In [42]:
# Evaluation scripts
def evaluate_model(test_pred_prob, test_pred, test_classes):
    # Accuracy
    accuracy = 100 * accuracy_score(test_classes, test_pred)
    print("Exact match accuracy is: " + str(accuracy) + "%")
    # Area Under the Receiver Operating Characteristic Curve (ROC AUC)
    auc_roc = roc_auc_score(test_classes, test_pred_prob)
    print("Macro Area Under the Curve (AUC) is: " + str(auc_roc))
    return accuracy, auc_roc

In [43]:
# Evaluation of the model
def test_VAD(vad_model, test_loader, results_path):
    # Initialize lists where we save all results and GT
    all_groundtruth = []
    all_predictions = []
    all_predictions_probs = []
    
    vad_model.eval()
    with torch.no_grad():
        for step, data in enumerate(test_loader):
            if(step%10 == 0):
                print("Sample number: " + str(step) + " out of: " + str(len(test_loader)))
            mel_in = data[0].to(device)
            labels = torch.squeeze(data[1]).to(device)
            
            # Choosing 3 seconds partitioning -> 92 frames
            half_window = int(FRAMES_3SEC/2)
            padded_mel = torch.zeros(1,1,128,mel_in.shape[3] + FRAMES_3SEC) #Padding input to have 3 seconds of silence at the end
            padded_mel[:,:,:,half_window:mel_in.shape[3]+half_window] = mel_in
            #num_batches = (padded_mel.shape[3] - FRAMES_3SEC) / BATCH_SIZE # Because we will ignore the first 92 frames

            #for batch in np.arange(0,num_batches):
            partitioned_mels_3secs = torch.zeros(mel_in.shape[3],1,128,FRAMES_3SEC)
            label_centerframe = torch.zeros(mel_in.shape[3], 1)

            # Process all the frames (which starts from half_window in the padded mel, and lasts for all frames)
            for idx, central_frame in enumerate(np.arange(half_window,mel_in.shape[3]+half_window-1,1)):
                partitioned_mels_3secs[idx,:,:,:] = padded_mel[:,:,:,central_frame-half_window:central_frame+half_window]
                label_centerframe[idx] = labels[idx] # I should just use labels, now they are identical

            partitioned_mels_3secs = partitioned_mels_3secs.to(device)
            label_centerframe = label_centerframe.to(device)
            
            outputs, logits = vad_model(partitioned_mels_3secs)
            
            rounded_output = torch.round(outputs.data)
            
            all_groundtruth.append(label_centerframe.cpu())
            all_predictions.append(rounded_output.cpu())
            all_predictions_probs.append(outputs.cpu())

    #accuracy_out, auc_roc = evaluate_model(all_predictions_probs,all_predictions, all_groundtruth)
    #results = create_analysis_report(test_pred_prob, test_labels, labels_list)
    
    #np.save(results_path + "VAD_test_gt.npy", test_labels)
    #np.save(results_path + "VAD_pred_prob.npy", test_pred_prob)
    #results.to_csv(results_path + "VAD_report.csv")
    return all_groundtruth, all_predictions, all_predictions_probs

## Make dataset pipeline

In [44]:
# Defining dataset pipeline 
class VAD_Dataset(torch.utils.data.Dataset):
    def __init__(self, data_directory,  device = 'cpu'):
        filenames = os.listdir(data_directory)
        self.df = pd.DataFrame(filenames)
        self.data_directory = data_directory
        self.device = device

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        file_id = self.df.loc[index].values[0]
        data = np.load(self.data_directory + str(file_id))
        spectrogram = torch.from_numpy(data['mel'])
        label = torch.from_numpy(data['labels'])
        
        # this is to ensure all mels have same shape (padded if missing)
        #mel_spec = torch.zeros(1,128,1292) # SET TO MAX LENGTH
        #labels_stretched = torch.zeros(1,1292)
        if(spectrogram.dim() == 2):
            spectrogram = torch.unsqueeze(spectrogram,0)
        #mel_spec[:, :, :spectrogram.shape[2]] = spectrogram
        #labels_stretched[:, :label.shape[1]] = label

        return spectrogram , label

In [45]:
# initiating dataloader 
def initialize_dataloaders(trainDataDir, testDataDir):        
    train_instance = VAD_Dataset(trainDataDir)
    test_instance = VAD_Dataset(testDataDir)
    
    # I am setting the batch size to 1, because I will be batching each input file 
    # by partitioning around moving central frame
    train_loader = torch.utils.data.DataLoader(train_instance,batch_size=1,shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_instance,batch_size=1,shuffle=False)
    
    #validation_instance = VAD_Dataset("MAKE_VALIDATION")
    #valid_loader = torch.utils.data.DataLoader(validation_instance,batch_size=1,shuffle=True)

    return train_loader, test_loader

## Train on original recording without spatial processing 
(NOTE: I didn't rerun this part for the CRNN, the results presented are from the original CNN model)

In [80]:
trainDataDir = "/srv/workspace/research/mounted/vad_train_set/original_mels_labels/"
testDataDir = "/srv/workspace/research/mounted/vad_test_set/original_mels_labels/" 
results_path = "/srv/workspace/research/mounted/results/"
model_save_path = "/srv/workspace/research/mounted/saved_models/"

In [81]:
NUM_EPOCHS = 10
#BATCH_SIZE = 32 # (Replaced batches with partitioned frames)
FRAMES_3SEC = 92
# Early stop parameters (not applied yet)
#min_val_loss = 10**5 #just initialize with random big number 
#epochs_no_improve = 0
#n_epochs_stop = 10

device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
print("Using device: " + str(device))

train_loader, test_loader = initialize_dataloaders(trainDataDir, testDataDir)

vad_model, optimizer, criterion = get_VAD(device)
train_vad(vad_model, train_loader, optimizer, criterion)

model_name = model_save_path + "noSpatialProcessing"
torch.save(vad_model.state_dict(),model_name)
torch.cuda.empty_cache()
print("================================================================")

  0%|          | 0/957 [00:00<?, ?batch/s]

Using device: cuda:2


100%|██████████| 957/957 [03:38<00:00,  4.31batch/s, loss=0.059, sample_accuracy=0.983] 
100%|██████████| 957/957 [03:39<00:00,  4.41batch/s, loss=0.0276, sample_accuracy=0.993]
100%|██████████| 957/957 [03:38<00:00,  4.30batch/s, loss=0.0469, sample_accuracy=0.987]
100%|██████████| 957/957 [03:39<00:00,  4.29batch/s, loss=0.0452, sample_accuracy=0.984]
100%|██████████| 957/957 [03:39<00:00,  4.78batch/s, loss=0.0425, sample_accuracy=0.986]
100%|██████████| 957/957 [03:39<00:00,  4.21batch/s, loss=0.0339, sample_accuracy=0.989]
100%|██████████| 957/957 [03:38<00:00,  4.83batch/s, loss=0.0155, sample_accuracy=0.996]
100%|██████████| 957/957 [03:38<00:00,  4.09batch/s, loss=0.0465, sample_accuracy=0.983]
100%|██████████| 957/957 [03:38<00:00,  4.59batch/s, loss=0.0351, sample_accuracy=0.99]  
100%|██████████| 957/957 [03:38<00:00,  4.22batch/s, loss=0.0783, sample_accuracy=0.975] 

Finished Training





In [82]:
# Testing the model
all_groundtruth, all_predictions, all_predictions_probs = test_VAD(vad_model, test_loader, results_path)

Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96


In [83]:
# Formatting the output
flat_gt = np.asarray([item.numpy()[0] for sublist in all_groundtruth for item in sublist])
flat_predictions_probs = np.asarray([item.numpy()[0] for sublist in all_predictions_probs for item in sublist])
flat_predictions = np.asarray([item.numpy()[0] for sublist in all_predictions for item in sublist])

In [84]:
# Evaluation bit
accuracy  = (flat_predictions == flat_gt).sum() / len(flat_gt)
print ("Accuracy = %0.2f%%" % (accuracy*100))

auc_roc = roc_auc_score(flat_gt, flat_predictions_probs)
print ("AUC = %0.3f" % auc_roc)

true_positives_ratio_perclass = sum((flat_predictions == flat_gt) * (flat_gt == 1)) / sum(flat_gt)
print ("SHR =  %0.3f" % true_positives_ratio_perclass)

true_negative_ratio_perclass = sum((flat_predictions == flat_gt) * (flat_gt == 0)) / (len(flat_gt) - sum(flat_gt))
print ("NHR =  %0.3f" % true_negative_ratio_perclass)

precision = precision_score(flat_gt, flat_predictions)
print ("Precision = %0.3f" % precision)

recall = recall_score(flat_gt, flat_predictions)
print ("Recall (same as SHR) =  %0.3f" % recall)

f1 = f1_score(flat_gt, flat_predictions)
print ("F1-score = %0.3f" % f1)

Accuracy = 92.64%
AUC = 0.985
SHR =  0.904
NHR =  0.966
Precision = 0.979
Recall (same as SHR) =  0.904
F1-score = 0.940


## Apply model on all possible spatial modules

In [51]:
# delay-and-sum based models
spatial_modules = ["das", "das_spectral", "das_wiener", "das_spectral_filtered", "das_wiener_filtered"
                  ,"mvdr", "mvdr_spectral", "mvdr_wiener", "mvdr_spectral_filtered", "mvdr_wiener_filtered"]

In [53]:
# Training and testing the model with each spatial module
NUM_EPOCHS = 5
FRAMES_3SEC = 92
results_path = "/srv/workspace/research/mounted/results/"
model_save_path = "/srv/workspace/research/mounted/saved_models/crnn"
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
print("Using device: " + str(device))

for spatial_module in spatial_modules:
    print("================================================================")
    print("Running experiment: " + spatial_module)
    trainDataDir = "/srv/workspace/research/mounted/vad_train_set/" + spatial_module + "_mels_labels/"
    testDataDir = "/srv/workspace/research/mounted/vad_test_set/" + spatial_module + "_mels_labels/"

    train_loader, test_loader = initialize_dataloaders(trainDataDir, testDataDir)

    # Training the model
    vad_model, optimizer, criterion = get_VAD(device)
    train_vad(vad_model, train_loader, optimizer, criterion)
    model_name = model_save_path + spatial_module
    torch.save(vad_model.state_dict(),model_name)
    
    # Testing the model 
    all_groundtruth, all_predictions, all_predictions_probs = test_VAD(vad_model, test_loader, results_path)
    flat_gt = np.asarray([item.numpy()[0] for sublist in all_groundtruth for item in sublist])
    flat_predictions_probs = np.asarray([item.numpy()[0] for sublist in all_predictions_probs for item in sublist])
    flat_predictions = np.asarray([item.numpy()[0] for sublist in all_predictions for item in sublist])
    
    accuracy  = (flat_predictions == flat_gt).sum() / len(flat_gt)
    print ("Accuracy = %0.2f%%" % (accuracy*100))
    auc_roc = roc_auc_score(flat_gt, flat_predictions_probs)
    print ("AUC = %0.3f" % auc_roc)
    true_positives_ratio_perclass = sum((flat_predictions == flat_gt) * (flat_gt == 1)) / sum(flat_gt)
    print ("SHR (Recall) =  %0.3f" % true_positives_ratio_perclass)
    true_negative_ratio_perclass = sum((flat_predictions == flat_gt) * (flat_gt == 0)) / (len(flat_gt) - sum(flat_gt))
    print ("NHR =  %0.3f" % true_negative_ratio_perclass)
    precision = precision_score(flat_gt, flat_predictions)
    print ("Precision = %0.3f" % precision)
    f1 = f1_score(flat_gt, flat_predictions)
    print ("F1-score = %0.3f" % f1)
    
    torch.cuda.empty_cache()
    print("================================================================\n\n")

  0%|          | 0/957 [00:00<?, ?batch/s]

Using device: cuda:2
Running experiment: das


100%|██████████| 957/957 [04:00<00:00,  4.23batch/s, loss=0.0764, sample_accuracy=0.973]
100%|██████████| 957/957 [04:01<00:00,  3.70batch/s, loss=0.109, sample_accuracy=0.956] 
100%|██████████| 957/957 [04:01<00:00,  5.42batch/s, loss=0.035, sample_accuracy=0.979] 
100%|██████████| 957/957 [04:01<00:00,  4.03batch/s, loss=0.0294, sample_accuracy=0.991]
100%|██████████| 957/957 [04:02<00:00,  3.66batch/s, loss=0.0786, sample_accuracy=0.968]


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 91.51%
AUC = 0.985


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.875
NHR =  0.985
Precision = 0.990
F1-score = 0.929


Running experiment: das_spectral


100%|██████████| 957/957 [04:01<00:00,  3.76batch/s, loss=0.198, sample_accuracy=0.922] 
100%|██████████| 957/957 [04:01<00:00,  4.51batch/s, loss=0.0911, sample_accuracy=0.967]
100%|██████████| 957/957 [04:01<00:00,  4.83batch/s, loss=0.108, sample_accuracy=0.949] 
100%|██████████| 957/957 [04:01<00:00,  3.94batch/s, loss=0.0497, sample_accuracy=0.982]
100%|██████████| 957/957 [04:01<00:00,  3.64batch/s, loss=0.0424, sample_accuracy=0.985]


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 96.40%
AUC = 0.994


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.971
NHR =  0.951
Precision = 0.972
F1-score = 0.972


Running experiment: das_wiener


100%|██████████| 957/957 [04:01<00:00,  3.81batch/s, loss=0.0832, sample_accuracy=0.964]
 15%|█▌        | 147/957 [00:37<03:09,  4.28batch/s, loss=0.0973, sample_accuracy=0.97] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 957/957 [04:01<00:00,  4.41batch/s, loss=0.486, sample_accuracy=0.86]  


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 62.65%
AUC = 0.918


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.416
NHR =  0.997
Precision = 0.995
F1-score = 0.587


Running experiment: das_spectral_filtered


100%|██████████| 957/957 [04:01<00:00,  4.21batch/s, loss=0.0954, sample_accuracy=0.967]
  2%|▏         | 23/957 [00:05<04:03,  3.83batch/s, loss=0.27, sample_accuracy=0.914]  IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 957/957 [04:00<00:00,  3.67batch/s, loss=0.0565, sample_accuracy=0.982]
100%|██████████| 957/957 [04:00<00:00,  3.89batch/s, loss=0.147, sample_accuracy=0.941] 
100%|██████████| 957/957 [04:01<00:00,  3.83batch/s, loss=0.0453, sample_accuracy=0.984] 


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 96.21%
AUC = 0.993


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.965
NHR =  0.957
Precision = 0.975
F1-score = 0.970


Running experiment: das_wiener_filtered


100%|██████████| 957/957 [04:01<00:00,  3.79batch/s, loss=0.199, sample_accuracy=0.925] 
100%|██████████| 957/957 [04:01<00:00,  4.18batch/s, loss=0.1, sample_accuracy=0.96]    
100%|██████████| 957/957 [04:01<00:00,  3.58batch/s, loss=0.0869, sample_accuracy=0.965]
100%|██████████| 957/957 [04:01<00:00,  5.49batch/s, loss=0.151, sample_accuracy=0.938] 
100%|██████████| 957/957 [04:01<00:00,  3.59batch/s, loss=0.0762, sample_accuracy=0.979]


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 94.21%
AUC = 0.986


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.968
NHR =  0.896
Precision = 0.943
F1-score = 0.955


Running experiment: mvdr


100%|██████████| 957/957 [04:05<00:00,  3.52batch/s, loss=0.105, sample_accuracy=0.948] 
100%|██████████| 957/957 [04:02<00:00,  3.69batch/s, loss=0.0516, sample_accuracy=0.986]
100%|██████████| 957/957 [04:02<00:00,  4.82batch/s, loss=0.0298, sample_accuracy=0.986]
100%|██████████| 957/957 [04:02<00:00,  3.52batch/s, loss=0.217, sample_accuracy=0.931] 
100%|██████████| 957/957 [04:02<00:00,  3.81batch/s, loss=0.032, sample_accuracy=0.986] 


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 95.87%
AUC = 0.992


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.956
NHR =  0.964
Precision = 0.979
F1-score = 0.967


Running experiment: mvdr_spectral


100%|██████████| 957/957 [04:04<00:00,  3.61batch/s, loss=0.118, sample_accuracy=0.968] 
100%|██████████| 957/957 [04:01<00:00,  3.99batch/s, loss=0.206, sample_accuracy=0.906] 
100%|██████████| 957/957 [04:02<00:00,  3.77batch/s, loss=0.0959, sample_accuracy=0.97] 
100%|██████████| 957/957 [04:01<00:00,  3.70batch/s, loss=0.174, sample_accuracy=0.924] 
100%|██████████| 957/957 [04:01<00:00,  3.82batch/s, loss=0.0416, sample_accuracy=0.982]


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 95.01%
AUC = 0.988


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.971
NHR =  0.913
Precision = 0.952
F1-score = 0.961


Running experiment: mvdr_wiener


100%|██████████| 957/957 [04:04<00:00,  3.66batch/s, loss=0.0869, sample_accuracy=0.97] 
100%|██████████| 957/957 [04:01<00:00,  3.78batch/s, loss=0.234, sample_accuracy=0.913] 
100%|██████████| 957/957 [04:01<00:00,  4.13batch/s, loss=0.0782, sample_accuracy=0.976]
100%|██████████| 957/957 [04:01<00:00,  3.63batch/s, loss=0.0957, sample_accuracy=0.967]
100%|██████████| 957/957 [04:01<00:00,  3.89batch/s, loss=0.109, sample_accuracy=0.956] 


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 94.12%
AUC = 0.986


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.948
NHR =  0.928
Precision = 0.959
F1-score = 0.954


Running experiment: mvdr_spectral_filtered


100%|██████████| 957/957 [04:02<00:00,  4.49batch/s, loss=0.068, sample_accuracy=0.972] 
100%|██████████| 957/957 [04:01<00:00,  3.83batch/s, loss=0.173, sample_accuracy=0.932] 
100%|██████████| 957/957 [04:01<00:00,  3.74batch/s, loss=0.0478, sample_accuracy=0.981]
100%|██████████| 957/957 [04:01<00:00,  4.02batch/s, loss=0.19, sample_accuracy=0.947]  
100%|██████████| 957/957 [04:01<00:00,  4.18batch/s, loss=0.0447, sample_accuracy=0.989]


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 95.21%
AUC = 0.986


  0%|          | 0/957 [00:00<?, ?batch/s]

SHR (Recall) =  0.967
NHR =  0.927
Precision = 0.959
F1-score = 0.963


Running experiment: mvdr_wiener_filtered


100%|██████████| 957/957 [04:02<00:00,  4.57batch/s, loss=0.109, sample_accuracy=0.96]  
100%|██████████| 957/957 [04:01<00:00,  3.65batch/s, loss=0.104, sample_accuracy=0.972] 
100%|██████████| 957/957 [04:02<00:00,  3.82batch/s, loss=0.128, sample_accuracy=0.961] 
100%|██████████| 957/957 [04:01<00:00,  4.13batch/s, loss=0.111, sample_accuracy=0.958] 
100%|██████████| 957/957 [04:01<00:00,  4.05batch/s, loss=0.169, sample_accuracy=0.93]  


Finished Training
Sample number: 0 out of: 96
Sample number: 10 out of: 96
Sample number: 20 out of: 96
Sample number: 30 out of: 96
Sample number: 40 out of: 96
Sample number: 50 out of: 96
Sample number: 60 out of: 96
Sample number: 70 out of: 96
Sample number: 80 out of: 96
Sample number: 90 out of: 96
Accuracy = 92.70%
AUC = 0.977
SHR (Recall) =  0.970
NHR =  0.851
Precision = 0.920
F1-score = 0.944


