In [1]:
import librosa
import numpy as np 
import matplotlib.pyplot as plt
import os
import glob
import pandas as pd
from tensorflow.python.keras.models import Sequential
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Input, Dropout, BatchNormalization, Convolution2D, MaxPooling2D, GlobalMaxPool2D,Flatten
from keras import optimizers, losses, activations, models
from keras.callbacks import ModelCheckpoint, EarlyStopping
from random import shuffle
from sklearn.metrics import confusion_matrix
import noisereduce as nr

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
  from tqdm.autonotebook import tqdm


In [2]:
######################
input_length = 29000*5
batch_size = 20
n_mels = 500
validation_steps = 10
######################

In [3]:
def preprocess_audio_mel_T(audio, sample_rate=29000, window_size=4, #log_specgram
                 step_size=10, eps=1e-10):

    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels= n_mels)
    mel_db = (librosa.power_to_db(mel_spec, ref=np.max))

    return (mel_db).T

def load_audio_file(file_path, input_length=input_length):
    data = librosa.core.load(file_path, sr=29000)[0] #, sr=16000
    if len(data)>input_length:
        
        
        max_offset = len(data)-input_length
        
        offset = np.random.randint(max_offset)
        
        data = data[offset:(input_length+offset)]
        
        
    else:
        if input_length > len(data):
            max_offset = input_length - len(data)

            offset = np.random.randint(max_offset)
        else:
            offset = 0
        
        
        data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
        
    noisy_part=data
    noise_reduced = nr.reduce_noise(audio_clip=data, noise_clip=noisy_part) 
    data = preprocess_audio_mel_T(noise_reduced)
    return data

def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

def train_generator(list_files, batch_size=batch_size):
    while True:
        shuffle(list_files)
        for batch_files in chunker(list_files, size=batch_size):
            batch_data = [load_audio_file(fpath) for fpath in batch_files]
            batch_data = np.array(batch_data)[:, :, :,np.newaxis]
            batch_labels = [file_to_int[fpath] for fpath in batch_files]
            batch_labels = np.array(batch_labels)
            
            yield batch_data, batch_labels
        

In [4]:
train_files = glob.glob('/home/camilo/Documents/covid-tos/dataset/test/train/*.wav')
train_labels = pd.read_csv('../../metadata/dataset_train_cnn_test.csv')
file_to_label = {'/home/camilo/Documents/covid-tos/dataset/test/train/' + k:v for k,v in zip(train_labels.filename.values, train_labels.category.values)}
list_labels = sorted(list(set(train_labels.category.values)))
label_to_int = {k:v for v,k in enumerate(list_labels)}
int_to_label = {v:k for k,v in label_to_int.items()}
file_to_int = {k:label_to_int[v] for k,v in file_to_label.items()}
nclass = len(list_labels)
tr_files, val_files = train_test_split(sorted(train_files), test_size=0.1, random_state=42)

In [5]:
(load_audio_file(tr_files[0])).shape

(284, 500)

In [6]:
def get_model_mel():
    nclass = len(list_labels)
    inp = Input(shape=(284, 500, 1))
    norm_inp = BatchNormalization()(inp)
    img_1 = MaxPooling2D(pool_size=(2, 2))(norm_inp)
    img_1 = Convolution2D(50, kernel_size=(5, 5), activation=activations.relu)(img_1)
    img_1 = MaxPooling2D(pool_size=(4, 4))(img_1)
    img_1 = Convolution2D(50, kernel_size=(5, 5), activation=activations.relu)(img_1)
    img_1 = MaxPooling2D(pool_size=(2, 2))(img_1)
    img_1 = Flatten()(img_1)

    dense_1 = Dense(128, activation=activations.relu)(img_1)
    dense_1= Dropout(rate=0.5)(dense_1)
    dense_1 = Dense(128, activation=activations.relu)(dense_1)
    dense_1= Dropout(rate=0.5)(dense_1)
    predictions = Dense(nclass, activation=activations.softmax)(dense_1)

    model = models.Model(inputs=inp, outputs=predictions)
    opt = optimizers.Adam()

    model.compile(optimizer=opt, loss=losses.sparse_categorical_crossentropy, metrics=['acc'])
    model.summary()
    return model

In [7]:
model=get_model_mel()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 284, 500, 1)]     0         
_________________________________________________________________
batch_normalization (BatchNo (None, 284, 500, 1)       4         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 142, 250, 1)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 138, 246, 50)      1300      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 34, 61, 50)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 30, 57, 50)        62550     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 15, 28, 50)       

In [8]:
model.fit_generator(train_generator(tr_files),
                    steps_per_epoch=len(tr_files)//batch_size, 
                    epochs=20,
                    validation_data=train_generator(val_files),
                    validation_steps=len(val_files)//batch_size
                   )

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/20


  log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f9748239f50>

In [9]:
test_label=pd.read_csv('../../metadata/dataset_test_cnn_test.csv')
test_files = glob.glob('../../dataset/test/test_data/*.wav')

In [10]:
batch_data = [load_audio_file(fpath) for fpath in test_files]
batch_data = np.array(batch_data)[:, :, :,np.newaxis]
preds = model.predict(batch_data).tolist()
predict_select=[ int_to_label[np.argmax(i) ]  for  i  in  preds ]

In [14]:
test_label

Unnamed: 0,filename,category
0,1-53663-A-24.wav,cough
1,48_Positivo_48_m_17-6-20_2.wav,cough
2,3-125418-A-24.wav,cough
3,31_Positivo_34_f_19-6-20_2.wav,cough
4,10_Positivo_25_f_17-6-20_1.wav,cough
5,2-108017-A-24.wav,cough
6,1-58792-A-24.wav,cough
7,19_Positivo_47_f_21-6-20_1.wav,cough
8,2-123896-A-24.wav,cough
9,1-63679-A-24.wav,cough


In [16]:
predict_select

['no_cough',
 'no_cough',
 'cough',
 'no_cough',
 'no_cough',
 'cough',
 'no_cough',
 'no_cough',
 'no_cough',
 'cough',
 'no_cough',
 'no_cough',
 'no_cough',
 'no_cough',
 'no_cough',
 'no_cough',
 'no_cough',
 'no_cough',
 'no_cough',
 'no_cough',
 'cough',
 'no_cough']

In [11]:
cm=confusion_matrix(test_label.category,predict_select)
display(cm)
TN,FP,FN,TP = cm.ravel()
recall=TP/(TP+FN)
precision=TP/(TP +FP)

array([[ 3, 12],
       [ 1,  6]])

In [12]:
cm=confusion_matrix(test_label.category,predict_select)
display(cm)
TN,FP,FN,TP = cm.ravel()
recall=TP/(TP+FN)
precision=TP/(TP +FP)
accuracy=(TP+TN)/(TP+TN+FP+FN)
specifity=TN/(TN+FP)
FI_score=2 * (precision*recall / (precision + recall))
print('recall = ' + str(recall))
print('precision = ' +str(precision))
print('accuracy =' + str(accuracy))
print('specifity= ' +str(specifity))
print('FI_score =' + str(FI_score))

array([[ 3, 12],
       [ 1,  6]])

recall = 0.8571428571428571
precision = 0.3333333333333333
accuracy =0.4090909090909091
specifity= 0.2
FI_score =0.48
