In [1]:
import tensorflow as tf
import numpy as np
import librosa as li
import matplotlib.pyplot as plt
import IPython.display as ipd
import time
from concurrent.futures import ThreadPoolExecutor
from preprocessing import Audio

%matplotlib inline
GPU = tf.config.list_physical_devices('GPU')
CPU = tf.config.list_physical_devices('CPU')
DEVICE = GPU[0].name if GPU else CPU[0].name
DEVICE

'/physical_device:GPU:0'

In [2]:
def to_len(y, sec=1.0, sr=44100):
    y, _ = li.effects.trim(y)
    x = tf.keras.preprocessing.sequence.pad_sequences(
        [y],
        maxlen=int(sr * sec),
        padding='post',
        truncating='post',
        dtype='float32'
    )[0]
    return x

In [3]:
def get_audio(path):
    audio = Audio(path)
    augmented = audio.augmented_source()
    data = list(map(to_len, augmented))
    labels = [audio.label] * len(augmented)
    return [data, labels]

In [4]:
def get_labeled_data(nums=1500):
    FALSE_DATA = 'AudioData/0 Данные'
    NOISE = 'AudioData/0 Шум'
    JARVIS = 'AudioData/1 Джарвис'
    DIO = 'AudioData/2 Дио'
    ITAN = 'AudioData/3 Итан'
    LADA = 'AudioData/4 Лада'
    MIRA = 'AudioData/5 Мира'
    
    p = li.util.find_files(FALSE_DATA)
    np.random.shuffle(p)
    PATHS = li.util.find_files(MIRA) + li.util.find_files(NOISE) + p[:nums]
    
    np.random.shuffle(PATHS)
    
    START_TIME = time.time()
    
    data = list()
    labels = list()
    
    with ThreadPoolExecutor(8) as pool:
        DATA = list(pool.map(get_audio, PATHS))
    
    for d, l in DATA:
        data += d
        labels += l

    data = np.array(data)
    labels = np.array(labels)
    data = np.reshape(data, [*data.shape, 1])
    labels = np.reshape(labels, [*labels.shape, 1])
    print('Loading at {:.2f} seconds'.format(time.time()-START_TIME))
    return (data, labels)

In [5]:
data, labels = get_labeled_data(10000)

Loading at 705.56 seconds


In [6]:
data.shape, labels.shape

((11411, 44100, 1), (11411, 1))

In [38]:
i = 259
print(labels[i])
ipd.Audio(data=np.reshape(data[i], (44100)), rate=44100)

[1]


In [82]:
TRAIN = 10000
tf.math.reduce_sum(labels[:TRAIN]), tf.math.reduce_sum(labels[TRAIN:])

(<tf.Tensor: shape=(), dtype=int32, numpy=1080>,
 <tf.Tensor: shape=(), dtype=int32, numpy=180>)

In [83]:
train_dataset = tf.data.Dataset.from_tensor_slices((data[:TRAIN], labels[:TRAIN]))
train_dataset = train_dataset.batch(64)
print(train_dataset)

validation_dataset = tf.data.Dataset.from_tensor_slices((data[TRAIN:], labels[TRAIN:]))
validation_dataset = validation_dataset.batch(64)
print(validation_dataset)

<BatchDataset shapes: ((None, 44100, 1), (None, 1)), types: (tf.float32, tf.int32)>
<BatchDataset shapes: ((None, 44100, 1), (None, 1)), types: (tf.float32, tf.int32)>


In [84]:
print(len(train_dataset) * 64)
print(len(validation_dataset) * 64)

10048
1472


In [47]:
root_model = tf.keras.models.load_model('models/conv/RootModel-v1.1.h5')

In [48]:
model = tf.keras.models.clone_model(root_model)
model._name = 'MiraConvV2.1'

model.summary()

Model: "MiraConvV2.1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 44100, 20)         200       
_________________________________________________________________
dropout_6 (Dropout)          (None, 44100, 20)         0         
_________________________________________________________________
re_lu (ReLU)                 (None, 44100, 20)         0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 44100, 20)         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 100, 40)           352800    
_________________________________________________________________
dropout_7 (Dropout)          (None, 100, 40)           0         
_________________________________________________________________
re_lu_1 (ReLU)               (None, 100, 40)          

In [49]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.AUC(name='auc'),
        tf.keras.metrics.BinaryAccuracy(name='acc'),
    ]
)

In [88]:
model.fit(
    train_dataset,
    epochs=10,
    verbose=2
)

Epoch 1/10
157/157 - 29s - loss: 0.0163 - recall: 0.9833 - precision: 0.9991 - auc: 0.9917 - acc: 0.9981
Epoch 2/10
157/157 - 28s - loss: 0.0159 - recall: 0.9833 - precision: 1.0000 - auc: 0.9916 - acc: 0.9982
Epoch 3/10
157/157 - 30s - loss: 0.0156 - recall: 0.9833 - precision: 1.0000 - auc: 0.9917 - acc: 0.9982
Epoch 4/10
157/157 - 30s - loss: 0.0160 - recall: 0.9815 - precision: 1.0000 - auc: 0.9916 - acc: 0.9980
Epoch 5/10
157/157 - 28s - loss: 0.0150 - recall: 0.9833 - precision: 1.0000 - auc: 0.9916 - acc: 0.9982
Epoch 6/10
157/157 - 29s - loss: 0.0146 - recall: 0.9833 - precision: 1.0000 - auc: 0.9916 - acc: 0.9982
Epoch 7/10
157/157 - 29s - loss: 0.0143 - recall: 0.9833 - precision: 1.0000 - auc: 0.9916 - acc: 0.9982
Epoch 8/10
157/157 - 29s - loss: 0.0140 - recall: 0.9833 - precision: 1.0000 - auc: 0.9916 - acc: 0.9982
Epoch 9/10
157/157 - 29s - loss: 0.0137 - recall: 0.9833 - precision: 1.0000 - auc: 0.9917 - acc: 0.9982
Epoch 10/10
157/157 - 30s - loss: 0.0137 - recall: 0.98

<tensorflow.python.keras.callbacks.History at 0x20b0082e508>

In [89]:
model.evaluate(validation_dataset, verbose=2)

23/23 - 1s - loss: 0.0034 - recall: 1.0000 - precision: 0.9836 - auc: 1.0000 - acc: 0.9979


[0.0033952337689697742, 1.0, 0.9836065769195557, 1.0, 0.997873842716217]

In [90]:
model.save('models/conv/Mira/{}.h5'.format(model.name))

In [66]:
model.evaluate(validation_dataset, verbose=2)

54/54 - 4s - loss: 0.0992 - recall: 0.9524 - precision: 0.9809 - auc: 0.9760 - acc: 0.9927


[0.09918632358312607,
 0.9523809552192688,
 0.9809264540672302,
 0.9760447144508362,
 0.9926707744598389]

In [44]:
tested_model = tf.keras.models.load_model('models/conv/Mira/MiraConv-v1.4.h5')

In [45]:
tested_model.summary()

Model: "MiraConv-v1.4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 44100, 20)         200       
_________________________________________________________________
re_lu (ReLU)                 (None, 44100, 20)         0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 44100, 20)         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 100, 40)           352800    
_________________________________________________________________
re_lu_1 (ReLU)               (None, 100, 40)           0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 20, 40)            0         
_________________________________________________________________
global_average_pooling1d (Gl (None, 20)              

In [43]:
tested_model.evaluate(train_dataset)



[0.017806600779294968,
 1.0,
 0.9660459756851196,
 0.9992795586585999,
 0.9961249828338623]

In [46]:
tested_model.evaluate(train_dataset)



[0.010499184019863605,
 1.0,
 0.9735099077224731,
 0.9999659061431885,
 0.996999979019165]