In [1]:
import librosa
import numpy as np 
import matplotlib.pyplot as plt
import os
import glob
import pandas as pd
from tensorflow.python.keras.models import Sequential
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Input, Dropout, BatchNormalization, Convolution2D, MaxPooling2D, GlobalMaxPool2D,Flatten
from keras import optimizers, losses, activations, models
from keras.callbacks import ModelCheckpoint, EarlyStopping
from random import shuffle
from sklearn.metrics import confusion_matrix
import noisereduce as nr

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
  from tqdm.autonotebook import tqdm


In [22]:
######################
input_length = 29000*5
batch_size =  20
n_mels = 500
validation_steps = 50
######################

In [23]:
def preprocess_audio_mel_T(audio, sample_rate=29000, window_size=4, #log_specgram
                 step_size=10, eps=1e-10):

    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels= n_mels)
    mel_db = (librosa.power_to_db(mel_spec, ref=np.max))

    return (mel_db).T

def load_audio_file(file_path, input_length=input_length):
    data = librosa.core.load(file_path, sr=29000)[0] #, sr=16000
    if len(data)>input_length:
        
        
        max_offset = len(data)-input_length
        
        offset = np.random.randint(max_offset)
        
        data = data[offset:(input_length+offset)]
        
        
    else:
        if input_length > len(data):
            max_offset = input_length - len(data)

            offset = np.random.randint(max_offset)
        else:
            offset = 0
        
        
        data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
        

    data = preprocess_audio_mel_T(data)
    return data

def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

def train_generator(list_files, batch_size=batch_size):
    while True:
        shuffle(list_files)
        for batch_files in chunker(list_files, size=batch_size):
            batch_data = [load_audio_file(fpath) for fpath in batch_files]
            batch_data = np.array(batch_data)[:, :, :,np.newaxis]
            batch_labels = [file_to_int[fpath] for fpath in batch_files]
            batch_labels = np.array(batch_labels)
            
            yield batch_data, batch_labels

In [24]:
train_files = glob.glob('/home/camilo/Documents/covid-tos/dataset/test_covid/train/completa/*.wav')
train_labels = pd.read_csv('../../metadata/dataset_train_covid_test.csv')
file_to_label = {'/home/camilo/Documents/covid-tos/dataset/test_covid/train/completa/' + k:v for k,v in zip(train_labels.filename.values, train_labels.category.values)}
list_labels = sorted(list(set(train_labels.category.values)))
label_to_int = {k:v for v,k in enumerate(list_labels)}
int_to_label = {v:k for k,v in label_to_int.items()}
file_to_int = {k:label_to_int[v] for k,v in file_to_label.items()}
nclass = len(list_labels)
tr_files, val_files = train_test_split(sorted(train_files), test_size=0.1, random_state=42)

In [25]:
(load_audio_file(tr_files[0])).shape

(284, 500)

In [28]:
def get_model_mel():
    nclass = len(list_labels)
    inp = Input(shape=(284, 500, 1))
    norm_inp = BatchNormalization()(inp)
    img_1 = MaxPooling2D(pool_size=(2, 2))(norm_inp)
    img_1 = Convolution2D(20, kernel_size=(3, 2), activation=activations.relu)(img_1)
    img_1 = Convolution2D(20, kernel_size=(3, 2), activation=activations.relu)(img_1)
    img_1 = MaxPooling2D(pool_size=(2, 2))(img_1)
    img_1= Dropout(rate=0.15)(img_1)
    img_1 = Convolution2D(20, kernel_size=(3, 2), activation=activations.relu)(img_1)
    img_1 = Convolution2D(20, kernel_size=(3, 2), activation=activations.relu)(img_1)
    img_1 = MaxPooling2D(pool_size=(2, 2))(img_1)
    img_1= Dropout(rate=0.15)(img_1)

    img_1 = Flatten()(img_1)
    
    dense_1 = Dense(256, activation=activations.relu)(img_1)
    dense_1= Dropout(rate=0.15)(dense_1)
    
    predictions = Dense(nclass, activation=activations.softmax)(dense_1)

    model = models.Model(inputs=inp, outputs=predictions)
    opt = optimizers.Adam()

    model.compile(optimizer=opt, loss=losses.sparse_categorical_crossentropy, metrics=['acc'])
    model.summary()
    return model

In [29]:
model=get_model_mel()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 284, 500, 1)]     0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 284, 500, 1)       4         
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 142, 250, 1)       0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 140, 249, 20)      140       
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 138, 248, 20)      2420      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 69, 124, 20)       0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 69, 124, 20)      

In [30]:
model.fit_generator(train_generator(tr_files),
                    steps_per_epoch=len(tr_files)//batch_size, 
                    epochs=30,
                    validation_data=train_generator(val_files),
                    validation_steps=len(val_files)//batch_size
                   )

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f2a1c57b310>

In [31]:
test_label=pd.read_csv('../../metadata/dataset_test_covid_test.csv')
test_files = glob.glob('/home/camilo/Documents/covid-tos/dataset/test_covid/test/completa/*.wav')

In [32]:
batch_data = [load_audio_file(fpath) for fpath in test_files]
batch_data = np.array(batch_data)[:, :, :,np.newaxis]
preds = model.predict(batch_data).tolist()
predict_select=[ int_to_label[np.argmax(i) ]  for  i  in  preds ]

In [33]:
predict_select

['no_covid',
 'no_covid',
 'no_covid',
 'no_covid',
 'no_covid',
 'no_covid',
 'no_covid',
 'no_covid',
 'covid',
 'covid',
 'no_covid',
 'no_covid',
 'covid',
 'no_covid',
 'covid',
 'no_covid',
 'no_covid',
 'no_covid',
 'no_covid']

In [34]:
test_label.category

0        covid
1        covid
2        covid
3        covid
4        covid
5        covid
6        covid
7        covid
8        covid
9        covid
10    no_covid
11    no_covid
12    no_covid
13    no_covid
14    no_covid
15    no_covid
16    no_covid
17    no_covid
18    no_covid
Name: category, dtype: object

In [35]:
cm=confusion_matrix(test_label.category,predict_select)
display(cm)
TN,FP,FN,TP = cm.ravel()
recall=TP/(TP+FN)
precision=TP/(TP +FP)

array([[2, 8],
       [2, 7]])