In [1]:
import librosa
import librosa.display
import numpy as np 
import matplotlib.pyplot as plt
import os
import glob
import pandas as pd
from tensorflow.python.keras.models import Sequential
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Input, Dropout, BatchNormalization, Convolution2D, MaxPooling2D, GlobalMaxPool2D,Flatten
from keras import optimizers, losses, activations, models
from keras.callbacks import ModelCheckpoint, EarlyStopping
from random import shuffle
from sklearn.metrics import confusion_matrix

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


In [2]:
######################
input_length = 22050*6
batch_size = 20
######################

In [3]:
def preprocess_audio_mel_T(audio, sample_rate=22050, window_size=20, #log_specgram
                 step_size=10, eps=1e-10):

    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels= n_mels)
    mel_db = (librosa.power_to_db(mel_spec, ref=np.max))

    return (mel_db.T)

def load_audio_file(file_path, input_length=input_length):
    data = librosa.core.load(file_path, sr=22050)[0] #, sr=16000
    if len(data)>input_length:
        
        
        max_offset = len(data)-input_length
        
        offset = np.random.randint(max_offset)
        
        data = data[offset:(input_length+offset)]
        
        
    else:
        if input_length > len(data):
            max_offset = input_length - len(data)

            offset = np.random.randint(max_offset)
        else:
            offset = 0
        
        
        data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
        
        
    data = preprocess_audio_mel_T(data)
    return data

def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

def train_generator(list_files, batch_size=batch_size):
    while True:
        shuffle(list_files)
        for batch_files in chunker(list_files, size=batch_size):
            batch_data = [load_audio_file(fpath) for fpath in batch_files]
            batch_data = np.array(batch_data)[:, :, :,np.newaxis]
            batch_labels = [file_to_int[fpath] for fpath in batch_files]
            batch_labels = np.array(batch_labels)
            
            yield batch_data, batch_labels
        

In [4]:
#########
n_mels = 432
validation_steps = 500
##### 
###3 parametros cnn
filtrosConv1 = 32
filtrosConv2 = 32
tamano_filtro1 = (5, 5)
tamano_filtro2 = (5, 5)
tamano_pool_1 = (2, 2)
tamano_pool_2 = (4, 4)
lr = 0.0004

In [5]:
train_files = glob.glob('../dataset/dataset_train_CNN_sigmo/*.wav')
train_labels = pd.read_csv('../metadata/dataset_train_cnn.csv')
file_to_label = {'../dataset/dataset_train_CNN_sigmo/' + k:v for k,v in zip(train_labels.filename.values, train_labels.target.values)}
list_labels = sorted(list(set(train_labels.target.values)))
label_to_int = {k:v for v,k in enumerate(list_labels)}
int_to_label = {v:k for k,v in label_to_int.items()}
file_to_int = {k:label_to_int[v] for k,v in file_to_label.items()}
nclass = len(list_labels)
tr_files, val_files = train_test_split(sorted(train_files), test_size=0.1, random_state=42)

In [6]:
def get_model_mel():
    nclass = len(list_labels)
    inp = Input(shape=(259, 432, 1))
    norm_inp = BatchNormalization()(inp)
    img_1 = MaxPooling2D(pool_size=(2, 2))(norm_inp)
    img_1 = Convolution2D(16, kernel_size=(5, 5), activation=activations.relu, padding="valid")(img_1)
    img_1 = MaxPooling2D(pool_size=(4, 4))(img_1)
    img_1 = Convolution2D(16, kernel_size=(5, 5), activation=activations.relu, padding="valid")(img_1)
    img_1 = MaxPooling2D(pool_size=(2, 2))(img_1)
    img_1 = Flatten()(img_1)

    dense_1 = Dense(128, activation=activations.relu)(img_1)
    dense_1= Dropout(rate=0.5)(dense_1)
    dense_1 = Dense(128, activation=activations.relu)(dense_1)
    dense_1= Dropout(rate=0.5)(dense_1)
    predictions = Dense(nclass, activation=activations.softmax)(dense_1)

    model = models.Model(inputs=inp, outputs=predictions)
    opt = optimizers.Adam()

    model.compile(optimizer=opt, loss=losses.sparse_categorical_crossentropy, metrics=['acc'])
    model.summary()
    return model

In [7]:
model=get_model_mel()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 259, 432, 1)]     0         
_________________________________________________________________
batch_normalization (BatchNo (None, 259, 432, 1)       4         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 129, 216, 1)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 125, 212, 16)      416       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 31, 53, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 27, 49, 16)        6416      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 24, 16)       

In [8]:
model.fit_generator(train_generator(tr_files),
                    steps_per_epoch=len(tr_files)//batch_size, 
                    epochs=30,
                    validation_data=train_generator(val_files),
                    validation_steps=len(val_files)//batch_size
                   )

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f9c707dd110>

In [9]:
test_label=pd.read_csv('../metadata/dataset_test_cnn.csv')
test_files = glob.glob('../dataset/dataset_test_CNN_sigmo/*.wav')

In [10]:
batch_data = [load_audio_file(fpath) for fpath in test_files]
batch_data = np.array(batch_data)[:, :, :,np.newaxis]
preds = model.predict(batch_data).tolist()
predict_select=[ int_to_label[np.argmax(i) ]  for  i  in  preds ]

In [11]:
cm=confusion_matrix(test_label.target,predict_select)
display(cm)
TN,FP,FN,TP = cm.ravel()
recall=TP/(TP+FN)
precision=TP/(TP +FP)

array([[ 5, 10],
       [10,  9]])

In [12]:
cm=confusion_matrix(test_label.target,predict_select)
display(cm)
TN,FP,FN,TP = cm.ravel()
recall=TP/(TP+FN)
precision=TP/(TP +FP)
accuracy=(TP+TN)/(TP+TN+FP+FN)
specifity=TN/(TN+FP)
FI_score=2 * (precision*recall / (precision + recall))
print('recall = ' + str(recall))
print('precision = ' +str(precision))
print('accuracy =' + str(accuracy))
print('specifity= ' +str(specifity))
print('FI_score =' + str(FI_score))

array([[ 5, 10],
       [10,  9]])

recall = 0.47368421052631576
precision = 0.47368421052631576
accuracy =0.4117647058823529
specifity= 0.3333333333333333
FI_score =0.47368421052631576
