In [1]:
import tensorflow as tf
import numpy as np
import librosa as li
import matplotlib.pyplot as plt
import IPython.display as ipd
import time
from concurrent.futures import ThreadPoolExecutor
from preprocessing import Audio

%matplotlib inline
GPU = tf.config.list_physical_devices('GPU')
CPU = tf.config.list_physical_devices('CPU')
DEVICE = GPU[0].name if GPU else CPU[0].name
DEVICE

'/physical_device:GPU:0'

In [2]:
def to_len(y, sec=1.0, sr=16000):
    y, _ = li.effects.trim(y)
    x = tf.keras.preprocessing.sequence.pad_sequences(
        [y],
        maxlen=int(sr * sec),
        padding='post',
        truncating='post',
        dtype='float32'
    )[0]
    return x

In [5]:
def get_audio(path):
    audio = Audio(path)
    augmented = audio.augmented()
    #augmented = [audio.data]
    #data = list(map(to_len, augmented))
    labels = [audio.label] * len(augmented)
    return [augmented, labels]

In [6]:
def get_labeled_data(nums=1500):
    FALSE_DATA = 'AudioData/0 Данные'
    NOISE = 'AudioData/0 Шум'
    JARVIS = 'AudioData/1 Джарвис'
    DIO = 'AudioData/2 Дио'
    ITAN = 'AudioData/3 Итан'
    LADA = 'AudioData/4 Лада'
    MIRA = 'AudioData/5 Мира'
    
    p = li.util.find_files(FALSE_DATA)
    np.random.shuffle(p)
    PATHS = li.util.find_files(MIRA) + li.util.find_files(NOISE) + p[:nums]
    
    np.random.shuffle(PATHS)
    
    START_TIME = time.time()
    
    data = list()
    labels = list()
    
    with ThreadPoolExecutor(32) as pool:
        DATA = list(pool.map(get_audio, PATHS))
    
    for d, l in DATA:
        data += d
        labels += l

    data = np.array(data)
    labels = np.array(labels)
    data = np.reshape(data, [*data.shape, 1])
    labels = np.reshape(labels, [*labels.shape, 1])
    print('Loading at {:.2f} seconds'.format(time.time()-START_TIME))
    return (data, labels)

In [7]:
data, labels = get_labeled_data(10000)

Loading at 292.34 seconds


In [9]:
data.shape, labels.shape

((11411, 200, 1), (11411, 1))

In [39]:
TRAIN = 9000
tf.math.reduce_sum(labels[:TRAIN]), tf.math.reduce_sum(labels[TRAIN:])

(<tf.Tensor: shape=(), dtype=int32, numpy=936>,
 <tf.Tensor: shape=(), dtype=int32, numpy=324>)

In [40]:
train_dataset = tf.data.Dataset.from_tensor_slices((data[:TRAIN], labels[:TRAIN]))
train_dataset = train_dataset.batch(64)
print(train_dataset)

validation_dataset = tf.data.Dataset.from_tensor_slices((data[TRAIN:], labels[TRAIN:]))
validation_dataset = validation_dataset.batch(64)
print(validation_dataset)

<BatchDataset shapes: ((None, 200, 1), (None, 1)), types: (tf.float64, tf.int32)>
<BatchDataset shapes: ((None, 200, 1), (None, 1)), types: (tf.float64, tf.int32)>


In [41]:
print(len(train_dataset) * 64)
print(len(validation_dataset) * 64)

9024
2432


In [28]:
root_model = tf.keras.models.load_model('models/conv/RootModel-v3.0.h5')

In [29]:
model = tf.keras.models.clone_model(root_model)
model._name = 'MiraConvV3.0'

model.summary()

Model: "MiraConvV3.0"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_3 (Conv1D)            (None, 16000, 1)          10        
_________________________________________________________________
dropout (Dropout)            (None, 16000, 1)          0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 16000, 20)         2000      
_________________________________________________________________
dropout_1 (Dropout)          (None, 16000, 20)         0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 40, 10)            80000     
_________________________________________________________________
dropout_2 (Dropout)          (None, 40, 10)            0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 400)              

In [145]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer((200, 1)),
    tf.keras.layers.experimental.preprocessing.Normalization(),
    tf.keras.layers.GRU(200, kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    
    tf.keras.layers.Dense(1, activation='sigmoid')
], name='MiraGRUV4.3')

model.summary()

Model: "MiraGRUV4.3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization_2 (Normalizati (None, 200, 1)            3         
_________________________________________________________________
gru_22 (GRU)                 (None, 200)               121800    
_________________________________________________________________
dense_22 (Dense)             (None, 1)                 201       
Total params: 122,004
Trainable params: 122,001
Non-trainable params: 3
_________________________________________________________________


In [166]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[
        tf.keras.metrics.Recall(name='rec'),
        tf.keras.metrics.Precision(name='prec'),
    ]
)

In [179]:
model.fit(
    train_dataset,
    epochs=5,
    verbose=2,
)

Epoch 1/5
141/141 - 2s - loss: 0.0069 - rec: 1.0000 - prec: 0.9926
Epoch 2/5
141/141 - 2s - loss: 0.0110 - rec: 0.9936 - prec: 0.9820
Epoch 3/5
141/141 - 2s - loss: 0.0094 - rec: 0.9925 - prec: 0.9862
Epoch 4/5
141/141 - 2s - loss: 0.0201 - rec: 0.9861 - prec: 0.9585
Epoch 5/5
141/141 - 2s - loss: 0.0109 - rec: 0.9915 - prec: 0.9820


<tensorflow.python.keras.callbacks.History at 0x1d73b4790c8>

In [180]:
model.evaluate(validation_dataset, verbose=2)

38/38 - 0s - loss: 0.0624 - rec: 0.8920 - prec: 0.9797


[0.062373436987400055, 0.8919752836227417, 0.9796609878540039]

In [171]:
model.save('models/v4/{}.3.h5'.format(model.name))

In [100]:
model.evaluate(validation_dataset, verbose=2)

38/38 - 0s - loss: 0.1175 - rec: 0.7623 - prec: 1.0000


[0.11749754846096039, 0.7623456716537476, 1.0]

In [201]:
tested_model = tf.keras.models.load_model('models/v4/MiraGRUV4.3.2.h5')

In [202]:
tested_model.summary()

Model: "MiraGRUV4.3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization_2 (Normalizati (None, 200, 1)            3         
_________________________________________________________________
gru_22 (GRU)                 (None, 200)               121800    
_________________________________________________________________
dense_22 (Dense)             (None, 1)                 201       
Total params: 122,004
Trainable params: 122,001
Non-trainable params: 3
_________________________________________________________________


In [239]:
tested_model.evaluate(train_dataset)



[0.044500984251499176, 0.9572649598121643, 0.9323621392250061]

In [240]:
tested_model.evaluate(validation_dataset)



[0.04298418387770653, 0.9629629850387573, 0.9483282566070557]

In [232]:
tested_model.predict(np.reshape(data[10000],(1, 200)))

array([[0.9827519]], dtype=float32)

In [210]:
tested_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[
        tf.keras.metrics.Recall(name='rec'),
        tf.keras.metrics.Precision(name='prec'),
    ]
)

In [209]:
data[0].shape

(200, 1)

In [233]:
labels[10000]

array([1])

In [237]:
tested_model.layers[0].reset_after = False

In [238]:
tested_model.save('models/v4/MiraGRUV4.3.2-1.h5')