# Report: testes com modelo SPIRAConvV1 e SPIRAConvV2

In [None]:
from 

In [None]:
from test import *
from utils.generic_utils import load_config, save_config_file
from utils.generic_utils import set_init_dict
from utils.generic_utils import NoamLR, binary_acc
from utils.generic_utils import save_best_checkpoint
from utils.tensorboard import TensorboardWriter
from models.spiraconv import SpiraConvV1, SpiraConvV2, UTF_SPIRA_ConvLSTM_v1
from utils.audio_processor import AudioProcessor 

In [None]:
import itertools
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import matplotlib.pyplot as plt
import os
import math
import torch
import torch.nn as nn
import traceback
import pandas as pd
import time
import numpy as np

In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torch import stack
import numpy as np
import pandas as pd
import random
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
import torchaudio

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import soundfile as sf
import plotly.offline as py
import IPython
import IPython.display as ipd  # To play sound in the notebook
import plotly.graph_objs as go
import plotly.offline as offline
import plotly.tools as tls
import wave
import librosa
import librosa.display
from scipy.fftpack import fft
from scipy import signal
import pyloudnorm as pyln
import seaborn as sns

In [None]:
def plot_confusion_matrix(val_y, val_preds, unique_labels, show=True, output=None, 
                          title='Confusion matrix', cmap=plt.cm.Oranges):
    cm = confusion_matrix(val_y, val_preds)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(cm.shape[1])
    plt.xticks(tick_marks, rotation=45)
    ax = plt.gca()
    ax.set_xticklabels((ax.get_xticks() + 1).astype(str))
    plt.yticks(tick_marks)

    ax.set_xticklabels(unique_labels)
    ax.set_yticklabels(unique_labels)

    thresh = cm.max() / 1.4
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], '.1f'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    if output is not None:
        plt.savefig(output)
    if show:
        plt.show()
    plt.close()
    return output

In [None]:
class Dataset(Dataset):
    """
    Class for load a train and test from dataset generate by import_librispeech.py and others
    """
    def __init__(self, c, ap, train=True, max_seq_len=None, test=False):
        # set random seed
        random.seed(c['seed'])
        torch.manual_seed(c['seed'])
        torch.cuda.manual_seed(c['seed'])
        np.random.seed(c['seed'])
        self.c = c
        self.ap = ap
        self.train = train
        self.dataset_csv = c.dataset['train_csv'] if train else c.dataset['eval_csv']
        self.dataset_root = c.dataset['train_data_root_path'] if train else c.dataset['eval_data_root_path']
        if test:
            self.dataset_csv = c.dataset['test_csv']
            self.dataset_root = c.dataset['test_data_root_path']
        self.noise_csv = c.dataset['noise_csv'] 
        self.noise_root = c.dataset['noise_data_root_path']
        assert os.path.isfile(self.dataset_csv),"Test or Train CSV file don't exists! Fix it in config.json"
        assert os.path.isfile(self.noise_csv),"Noise CSV file don't exists! Fix it in config.json"
        
        # read csvs
        self.dataset_list = pd.read_csv(self.dataset_csv, sep=',').values
        self.noise_list = pd.read_csv(self.noise_csv, sep=',').values
        # noise config
        self.num_noise_files = len(self.noise_list)-1
        self.control_class = c.dataset['control_class']
        self.patient_class = c.dataset['patient_class']

        # get max seq lenght for padding 
        if self.c.dataset['padding_with_max_lenght'] and train and not self.c.dataset['max_seq_len']:
            self.max_seq_len = 0
            for idx in range(len(self.dataset_list)):
                wav = self.ap.load_wav(os.path.join(self.dataset_root, self.dataset_list[idx][0]))
                # calculate time step dim using hop lenght
                seq_len = int((wav.shape[1]/c.audio['hop_length'])+1)
                if seq_len > self.max_seq_len:
                    self.max_seq_len = seq_len
            print("The Max Time dim Lenght is: ", self.max_seq_len)
        else: # for eval set max_seq_len in train mode
            if self.c.dataset['max_seq_len']:
                self.max_seq_len = self.c.dataset['max_seq_len']
            else:
                self.max_seq_len = max_seq_len

    def get_max_seq_lenght(self):
        return self.max_seq_len
    
    def __getitem__(self, idx):
        path = os.path.join(self.dataset_root, self.dataset_list[idx][0])
        wav = self.ap.load_wav(path)
        class_name = self.dataset_list[idx][1]

        # its assume that noise file is biggest than wav file !!
        if self.c.data_aumentation['insert_noise']:
            if self.control_class == class_name: # if sample is a control sample
                #print('antes',wav.shape)
                # torchaudio.save('antes_control.wav', wav, self.ap.sample_rate)
                for _ in range(self.c.data_aumentation['num_noise_control']):
                    # choise random noise file
                    noise_wav = self.ap.load_wav(os.path.join(self.noise_root, self.noise_list[random.randint(0, self.num_noise_files)][0]))
                    noise_wav_len = noise_wav.shape[1]
                    wav_len = wav.shape[1]
                    noise_start_slice = random.randint(0,noise_wav_len-(wav_len+1))
                    # sum two diferents noise
                    noise_wav = noise_wav[:,noise_start_slice:noise_start_slice+wav_len]
                    # get random max amp for noise
                    max_amp = random.uniform(self.c.data_aumentation['noise_min_amp'], self.c.data_aumentation['noise_max_amp'])
                    reduct_factor = max_amp/float(noise_wav.max().numpy())
                    noise_wav = noise_wav*reduct_factor
                    wav = wav + noise_wav
                #torchaudio.save('depois_controle.wav', wav, self.ap.sample_rate)
                
            elif self.patient_class == class_name: # if sample is a patient sample
                for _ in range(self.c.data_aumentation['num_noise_patient']):
                    
                    # torchaudio.save('antes_patiente.wav', wav, self.ap.sample_rate)
                    # choise random noise file
                    noise_wav = self.ap.load_wav(os.path.join(self.noise_root, self.noise_list[random.randint(0, self.num_noise_files)][0]))
                    noise_wav_len = noise_wav.shape[1]
                    wav_len = wav.shape[1]
                    noise_start_slice = random.randint(0,noise_wav_len-(wav_len+1))
                    # sum two diferents noise
                    noise_wav = noise_wav[:,noise_start_slice:noise_start_slice+wav_len]
                    # get random max amp for noise
                    max_amp = random.uniform(self.c.data_aumentation['noise_min_amp'], self.c.data_aumentation['noise_max_amp'])
                    reduct_factor = max_amp/float(noise_wav.max().numpy())
                    noise_wav = noise_wav*reduct_factor
                    wav = wav + noise_wav
                
                #torchaudio.save('depois_patient.wav', wav, self.ap.sample_rate)
                
        # feature shape (Batch_size, n_features, timestamp)
        feature = self.ap.get_feature_from_audio(wav)
        # transpose for (Batch_size, timestamp, n_features)
        feature = feature.transpose(1,2)
        # remove batch dim = (timestamp, n_features)
        feature = feature.reshape(feature.shape[1:])
        if not self.c.dataset['padding_with_max_lenght']:
            # generate tensor with zeros for each timestep
            target = torch.zeros(feature.shape[0],1)+class_name
        else:
            # padding for max sequence 
            zeros = torch.zeros(self.max_seq_len - feature.size(0),feature.size(1))
            # append zeros before features
            feature = torch.cat([feature, zeros], 0)
            target = torch.FloatTensor([class_name])
        return feature, target, path

    def __len__(self):
        return len(self.dataset_list)

In [None]:
def own_collate_fn(batch):
    features = []
    targets = []
    paths = []
    for feature, target, path in batch:
        paths.append(path)
        features.append(feature)
        #print(target.shape)
        targets.append(target)
    # padding with zeros timestamp dim
    features = pad_sequence(features, batch_first=True, padding_value=0)

    # its padding with zeros but mybe its a problem because 
    targets = pad_sequence(targets, batch_first=True, padding_value=0)

    # list to tensor
    #targets = stack(targets, dim=0)
    #features = stack(features, dim=0)
    return features, targets, paths

In [None]:
def TestDataloader(c, ap, max_seq_len=None):
    return DataLoader(dataset=Dataset(c, ap, train=False, test=True, max_seq_len=max_seq_len),
                          collate_fn=own_collate_fn, batch_size=c.test_config['batch_size'], 
                          shuffle=False, num_workers=c.test_config['num_workers'])

In [None]:
def test(criterion, ap, model, c, testloader, step,  cuda, confusion_matrix=False):
    padding_with_max_lenght = c.dataset['padding_with_max_lenght']
    losses = []
    accs = []
    model.zero_grad()
    model.eval()
    loss = 0 
    acc = 0
    preds = []
    targets = []
    paths = []
    with torch.no_grad():
        for feature, target, path in testloader:       
            #try:
            if cuda:
                feature = feature.cuda()
                target = target.cuda()

            output = model(feature).float()
            paths.append(path)
            # output = torch.round(output * 10**4) / (10**4)

            # Calculate loss
            if not padding_with_max_lenght:
                target = target[:, :output.shape[1],:target.shape[2]]
            loss += criterion(output, target).item()

            # calculate binnary accuracy
            y_pred_tag = torch.round(output)
            acc += (y_pred_tag == target).float().sum().item()
            preds += y_pred_tag.reshape(-1).int().cpu().numpy().tolist()
            targets += target.reshape(-1).int().cpu().numpy().tolist()
        if confusion_matrix:
            print("======== Confusion Matrix ==========")
            y_target = pd.Series(targets, name='Target')
            y_pred = pd.Series(preds, name='Predicted')
            df_confusion = pd.crosstab(y_target, y_pred, rownames=['Target'], colnames=['Predicted'], margins=True)
            print(df_confusion)
            
        mean_acc = acc / len(testloader.dataset)
        mean_loss = loss / len(testloader.dataset)
    print("Test\n Loss:", mean_loss, "Acurracy: ", mean_acc)
    return mean_acc, df_confusion, preds, targets, paths

In [None]:
def run_test(config_path, cuda=True, *args, **kwargs):
    c = load_config(config_path)
    ap = AudioProcessor(**c.audio)

    if not no_insert_noise:
        c.data_aumentation['insert_noise'] = True
    else:
        c.data_aumentation['insert_noise'] = False
    print("Insert noise ?", c.data_aumentation['insert_noise'])

    c.dataset['test_csv'] = test_csv
    c.dataset['test_data_root_path'] = test_root_dir
    c.test_config['batch_size'] = batch_size
    c.test_config['num_workers'] = num_workers
    max_seq_len = c.dataset['max_seq_len'] 

    testloader = TestDataloader(c, ap, max_seq_len=max_seq_len)
    
  # define loss function
    criterion = nn.BCELoss(reduction='sum')

    padding_with_max_lenght = c.dataset['padding_with_max_lenght']
    if 'v1' in checkpoint_path:
        model = SpiraConvV1(c)
    elif 'v2' in checkpoint_path:
        model = SpiraConvV2(c)

    if c.train_config['optimizer'] == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=c.train_config['learning_rate'])
    else:
        raise Exception("The %s  not is a optimizer supported" % c.train['optimizer'])

    step = 0
    if checkpoint_path is not None:
        print("Loading checkpoint: %s" % checkpoint_path)
        try:
            checkpoint = torch.load(checkpoint_path, map_location='cpu')
            model.load_state_dict(checkpoint['model'])
            print("Model Sucessful Load !")
        except Exception as e:
            raise ValueError("You need pass a valid checkpoint, may be you need check your config.json because de the of this checkpoint cause the error: "+ e)       
        step = checkpoint['step']
    else:
        raise ValueError("You need pass a checkpoint_path")   

    # convert model from cuda
    if cuda:
        model = model.cuda()
    
    model.train(False)
    return test(criterion, ap, model, c, testloader, step, cuda=cuda, confusion_matrix=True)

In [None]:
def log_specgram(audio, sample_rate, window_size=20,
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    freqs, times, spec = signal.spectrogram(audio,
                                            fs=sample_rate,
                                            window='hann',
                                            nperseg=nperseg,
                                            noverlap=noverlap,
                                            detrend=False)
    return freqs, times, np.log(spec.T.astype(np.float32) + eps)

In [None]:
def mfcc_specgram(audio, sample_rate, nframes, n_mfcc=13):
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
#     pad_width = max_pad_len - mfccs.shape[1]
#     mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    return mfccs

In [None]:
def get_loudness(sr, b):
    meter = pyln.Meter(sr) # create BS.1770 meter
    loudness = meter.integrated_loudness(b) # measure loudness
    
def plot_waveform(name, b):
    fig = plt.figure(figsize=(10, 4))
    ax2 = fig.add_subplot(211)
    ax2.plot(b)
    ax2.set_title('Waveform of ' + name)
    ax2.set_ylabel('Amplitude')
    ax2.set_xlabel('Seconds')
    plt.show()
    plt.close()
    
def plot_spectrogram(name, sr, b):
    freqs, times, spectrogram = log_specgram(b, sr)
    print(spectrogram.shape, freqs.shape, times.shape)
    fig = plt.figure(figsize=(10, 5))
    ax2 = fig.add_subplot(211)
    ax2.imshow(spectrogram.T, aspect='auto', origin='lower', 
               extent=[times.min(), times.max(), freqs.min(), freqs.max()])
    ax2.set_yticks(freqs[::40])
    ax2.set_xticks(times[::400])
    ax2.set_title('Spectrogram of ' + name)
    ax2.set_ylabel('Freqs in Hz')
    ax2.set_xlabel('Seconds')
    plt.show()
    plt.close()  
    
def plot_mfcc(name, sr, b, nframes):
    spectrogram = mfcc_specgram(b, sr, nframes, n_mfcc=40)
    fig = plt.figure(figsize=(8, 3))
    librosa.display.specshow(spectrogram, x_axis='time')
    plt.colorbar()
    plt.title('MFCC')
    plt.tight_layout()
    plt.show()
    plt.close()
    
def load_wav_file(path):
    b, sr = sf.read(path) 
    return sr, b

def show_sample(path, target, pred, show_mfcc=True, show_spec=True, only_if_incorrect=True):
    if target == pred and only_if_incorrect:
        return
#     sr, b = load_wav_file(path)
    print("target:", "CONTROLE" if target==0 else "PACIENTE")
    print("pred:", "CONTROLE" if pred==0 else "PACIENTE")
#     print('Playing', path)
#     IPython.display.display(ipd.Audio(path))
#     plot_waveform(path, b)
#     if show_spec:
#         plot_spectrogram(path, sr, b)
#     if show_mfcc:
#         plot_mfcc(path, sr, b, len(b))  

In [None]:
!echo $CONDA_DEFAULT_ENV

In [None]:
!ls checkpoints/*

In [None]:
def get_faixa_etaria(idade, intervalo=10):
    for i in range(0, 100, 10):
        if i <= idade <= i+intervalo:
            return f'Entre {i} e {i+intervalo}'


def show_info_errors(csv, paths_errors):
    pass
#     data = pd.read_csv(csv)
#     data = data[~data['file_path'].isin(paths_errors)]
    
#     total = len(data['sexo'])
#     sizes = [len(data[data['sexo']=='F'])/total, len(data[data['sexo']=='M'])/total]
#     fig1, ax1 = plt.subplots()
#     ax1.pie(sizes, labels=["Feminino", "Masculino"], autopct='%1.1f%%',
#             shadow=True, startangle=90)
#     ax1.axis('equal')
#     plt.title("Gênero (erros)")
#     plt.show()
#     plt.close()
    
    
#     controle = data[data['class']==0]
#     total = len(controle['sexo'])
#     sizes = [len(controle[controle['sexo']=='F'])/total, len(controle[controle['sexo']=='M'])/total]
#     fig1, ax1 = plt.subplots()
#     ax1.pie(sizes, labels=["Feminino", "Masculino"], autopct='%1.1f%%',
#             shadow=True, startangle=90)
#     ax1.axis('equal')
#     plt.title("Gênero no controle (erros)")
#     plt.show()
#     plt.close()
    
    
#     pacientes = data[data['class']==1]
#     total = len(pacientes['sexo'])
#     sizes = [len(pacientes[pacientes['sexo']=='F'])/total, len(pacientes[pacientes['sexo']=='M'])/total]
#     fig1, ax1 = plt.subplots()
#     ax1.pie(sizes, labels=["Feminino", "Masculino"], autopct='%1.1f%%',
#             shadow=True, startangle=90)
#     ax1.axis('equal')
#     plt.title("Gênero em pacientes (erros)")
#     plt.show()
#     plt.close()
    
#     plt.figure(figsize=(8,3))
#     evals = data['faixa_etaria'] = data['idade'].apply(get_faixa_etaria).value_counts()
#     sns.barplot(evals.index, evals.values)
#     plt.xticks(rotation='vertical')
#     plt.xlabel('Idade')
#     plt.ylabel('Frequência')
#     plt.title("Faixa etária (erros)")
#     plt.show()
#     plt.close()

## SPIRAConvV1-MEL_SPEC

### Validation - Without noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_eval.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_75/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_75/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = True

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=False, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Validation - With noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_eval.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_75/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_75/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = False

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=False, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Test - Without noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_test.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_75/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_75/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = True

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=False, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Test - With noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_test.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_75/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_75/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = False

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=False, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

## SPIRAConvV1-MEL_MFCC

### Validation - Without noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_eval.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_75/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_75/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = True

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=False, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Validation - With noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_eval.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_75/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_75/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = False

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=False, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Test - Without noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_test.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_75/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_75/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = True

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=False, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Test - With noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_test.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_75/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_75/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = False

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=False, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

## SPIRAConvV2-MEL_MFCC

### Validation - Without noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_eval.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v2_78/best_checkpoint.pt"
config = "checkpoints/spiraconv_v2_78/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = True

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=True, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Validation - With noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_eval.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v2_78/best_checkpoint.pt"
config = "checkpoints/spiraconv_v2_78/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = False

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=True, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Test - Without noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_test.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v2_78/best_checkpoint.pt"
config = "checkpoints/spiraconv_v2_78/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = True

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=True, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Test - With noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_test.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v2_78/best_checkpoint.pt"
config = "checkpoints/spiraconv_v2_78/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = False

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=True, show_spec=True)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

## SPIRAConvV1-MFCC

### Validation - Without noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_eval.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_73/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_73/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = True

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=True, show_spec=False)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Validation - With noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_eval.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_73/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_73/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = False

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=True, show_spec=False)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Test - Without noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_test.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_73/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_73/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = True

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=True, show_spec=False)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)

### Test - With noise

In [None]:
test_csv = "../SPIRA_Dataset_V1/metadata_test.csv"
test_root_dir = "../SPIRA_Dataset_V1/"
checkpoint_path = "checkpoints/spiraconv_v1_73/best_checkpoint.pt"
config = "checkpoints/spiraconv_v1_73/config.json"
batch_size = 1
num_workers = 2
no_insert_noise = False

In [None]:
mean_acc, df_confusion, preds, targets, paths = run_test(config,
                                                         test_csv=test_csv, 
                                                         test_root_dir=test_root_dir,
                                                         batch_size=batch_size,
                                                         num_workers=num_workers,
                                                         no_insert_noise=no_insert_noise)

In [None]:
plot_confusion_matrix(targets, preds, unique_labels=["CONTROLE", "PACIENTE"])

In [None]:
paths_errors = []
for path, target, pred in zip(paths, targets, preds):
    show_sample(path[0], target, pred, show_mfcc=True, show_spec=False)
    if target!=pred:
        paths_errors.append(path)
show_info_errors(test_csv, paths_errors)