In [1]:
import librosa
import numpy as np 
import matplotlib.pyplot as plt
import os
import glob
import pandas as pd
from tensorflow.python.keras.models import Sequential
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Input, Dropout, BatchNormalization, Convolution2D, MaxPooling2D, GlobalMaxPool2D,Flatten
from keras import optimizers, losses, activations, models
from keras.callbacks import ModelCheckpoint, EarlyStopping
from random import shuffle
from sklearn.metrics import confusion_matrix

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


In [2]:
######################
input_length = 29000*5
batch_size = 8
n_mels = 500
validation_steps = 10
######################

In [3]:
def preprocess_audio_mel_T(audio, sample_rate=29000, window_size=4, #log_specgram
                 step_size=10, eps=1e-10):

    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels= n_mels)
    mel_db = (librosa.power_to_db(mel_spec, ref=np.max))

    return (mel_db).T

def load_audio_file(file_path, input_length=input_length):
    data = librosa.core.load(file_path, sr=29000)[0] #, sr=16000
    if len(data)>input_length:
        
        
        max_offset = len(data)-input_length
        
        offset = np.random.randint(max_offset)
        
        data = data[offset:(input_length+offset)]
        
        
    else:
        if input_length > len(data):
            max_offset = input_length - len(data)

            offset = np.random.randint(max_offset)
        else:
            offset = 0
        
        
        data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
        
        
    data = preprocess_audio_mel_T(data)
    return data

def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

def train_generator(list_files, batch_size=batch_size):
    while True:
        shuffle(list_files)
        for batch_files in chunker(list_files, size=batch_size):
            batch_data = [load_audio_file(fpath) for fpath in batch_files]
            batch_data = np.array(batch_data)[:, :, :,np.newaxis]
            batch_labels = [file_to_int[fpath] for fpath in batch_files]
            batch_labels = np.array(batch_labels)
            
            yield batch_data, batch_labels
        

In [9]:
train_files = glob.glob('/home/camilo/Documents/covid-tos/dataset/test/train/*.wav')
train_labels = pd.read_csv('../../metadata/dataset_train_cnn_test.csv')
file_to_label = {'/home/camilo/Documents/covid-tos/dataset/test/train/' + k:v for k,v in zip(train_labels.filename.values, train_labels.category.values)}
list_labels = sorted(list(set(train_labels.category.values)))
label_to_int = {k:v for v,k in enumerate(list_labels)}
int_to_label = {v:k for k,v in label_to_int.items()}
file_to_int = {k:label_to_int[v] for k,v in file_to_label.items()}
nclass = len(list_labels)
tr_files, val_files = train_test_split(sorted(train_files), test_size=0.1, random_state=42)

In [10]:
(load_audio_file(tr_files[0])).shape

(284, 500)

In [11]:
def get_model_mel():
    nclass = len(list_labels)
    inp = Input(shape=(284, 500, 1))
    norm_inp = BatchNormalization()(inp)
    img_1 = MaxPooling2D(pool_size=(2, 2))(norm_inp)
    img_1 = Convolution2D(18, kernel_size=(5, 5), activation=activations.relu)(img_1)
    img_1 = MaxPooling2D(pool_size=(4, 4))(img_1)
    img_1 = Convolution2D(18, kernel_size=(5, 5), activation=activations.relu)(img_1)
    img_1 = MaxPooling2D(pool_size=(2, 2))(img_1)
    img_1 = Flatten()(img_1)

    dense_1 = Dense(128, activation=activations.relu)(img_1)
    dense_1= Dropout(rate=0.5)(dense_1)
    dense_1 = Dense(128, activation=activations.relu)(dense_1)
    dense_1= Dropout(rate=0.5)(dense_1)
    predictions = Dense(nclass, activation=activations.softmax)(dense_1)

    model = models.Model(inputs=inp, outputs=predictions)
    opt = optimizers.Adam()

    model.compile(optimizer=opt, loss=losses.sparse_categorical_crossentropy, metrics=['acc'])
    model.summary()
    return model

In [12]:
model=get_model_mel()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 284, 500, 1)]     0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 284, 500, 1)       4         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 142, 250, 1)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 138, 246, 18)      468       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 34, 61, 18)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 30, 57, 18)        8118      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 15, 28, 18)       

In [None]:
model.fit_generator(train_generator(tr_files),
                    steps_per_epoch=len(tr_files)//batch_size, 
                    epochs=20,
                    validation_data=train_generator(val_files),
                    validation_steps=len(val_files)//batch_size
                   )

Epoch 1/20

In [None]:
test_label=pd.read_csv('../../metadata/dataset_test_cnn_test.csv')
test_files = glob.glob('../../dataset/test/test_data/*.wav')

In [None]:
batch_data = [load_audio_file(fpath) for fpath in test_files]
batch_data = np.array(batch_data)[:, :, :,np.newaxis]
preds = model.predict(batch_data).tolist()
predict_select=[ int_to_label[np.argmax(i) ]  for  i  in  preds ]

In [None]:
cm=confusion_matrix(test_label.category,predict_select)
display(cm)
TN,FP,FN,TP = cm.ravel()
recall=TP/(TP+FN)
precision=TP/(TP +FP)

In [None]:
cm=confusion_matrix(test_label.category,predict_select)
display(cm)
TN,FP,FN,TP = cm.ravel()
recall=TP/(TP+FN)
precision=TP/(TP +FP)
accuracy=(TP+TN)/(TP+TN+FP+FN)
specifity=TN/(TN+FP)
FI_score=2 * (precision*recall / (precision + recall))
print('recall = ' + str(recall))
print('precision = ' +str(precision))
print('accuracy =' + str(accuracy))
print('specifity= ' +str(specifity))
print('FI_score =' + str(FI_score))