In [1]:
import tensorflow as tf
import numpy as np
import librosa as li
import matplotlib.pyplot as plt
import IPython.display as ipd
import time
from concurrent.futures import ThreadPoolExecutor
from preprocessing import Audio

%matplotlib inline
GPU = tf.config.list_physical_devices('GPU')
CPU = tf.config.list_physical_devices('CPU')
DEVICE = GPU[0].name if GPU else CPU[0].name
DEVICE

'/physical_device:GPU:0'

In [2]:
def to_len(y, sec=1.0, sr=44100):
    y, _ = li.effects.trim(y)
    x = tf.keras.preprocessing.sequence.pad_sequences(
        [y],
        maxlen=int(sr * sec),
        padding='post',
        truncating='post',
        dtype='float32'
    )[0]
    return x

In [3]:
def get_audio(path):
    audio = Audio(path)
    augmented = audio.augmented_source()
    data = list(map(to_len, augmented))
    labels = [audio.label] * len(augmented)
    return [data, labels]

In [19]:
def get_labeled_data(nums=1500):
    FALSE_DATA = 'AudioData/0 Данные'
    NOISE = 'AudioData/0 Шум'
    JARVIS = 'AudioData/1 Джарвис'
    DIO = 'AudioData/2 Дио'
    ITAN = 'AudioData/3 Итан'
    LADA = 'AudioData/4 Лада'
    MIRA = 'AudioData/5 Мира'
    
    p = li.util.find_files(FALSE_DATA)
    np.random.shuffle(p)
    PATHS = li.util.find_files(LADA) + li.util.find_files(NOISE) + p[:nums]
    
    np.random.shuffle(PATHS)
    
    START_TIME = time.time()
    
    data = list()
    labels = list()
    
    with ThreadPoolExecutor(32) as pool:
        DATA = list(pool.map(get_audio, PATHS))
    
    for d, l in DATA:
        data += d
        labels += l

    data = np.array(data)
    labels = np.array(labels)
    data = np.reshape(data, [*data.shape, 1])
    labels = np.reshape(labels, [*labels.shape, 1])
    print('Loading at {:.2f} seconds'.format(time.time()-START_TIME))
    return (data, labels)

In [20]:
data, labels = get_labeled_data(10000)

Loading at 203.13 seconds


In [21]:
data.shape, labels.shape

((11447, 44100, 1), (11447, 1))

In [23]:
TRAIN = 9000
tf.math.reduce_sum(labels[:TRAIN]), tf.math.reduce_sum(labels[TRAIN:])

(<tf.Tensor: shape=(), dtype=int32, numpy=1008>,
 <tf.Tensor: shape=(), dtype=int32, numpy=288>)

In [24]:
train_dataset = tf.data.Dataset.from_tensor_slices((data[:TRAIN], labels[:TRAIN]))
train_dataset = train_dataset.batch(64)
print(train_dataset)

validation_dataset = tf.data.Dataset.from_tensor_slices((data[TRAIN:], labels[TRAIN:]))
validation_dataset = validation_dataset.batch(64)
print(validation_dataset)

<BatchDataset shapes: ((None, 44100, 1), (None, 1)), types: (tf.float32, tf.int32)>
<BatchDataset shapes: ((None, 44100, 1), (None, 1)), types: (tf.float32, tf.int32)>


In [25]:
print(len(train_dataset) * 64)
print(len(validation_dataset) * 64)

9024
2496


In [26]:
root_model = tf.keras.models.load_model('models/conv/RootModel-v2.0.h5')

In [27]:
model = tf.keras.models.clone_model(root_model)
model._name = 'LadaConvV2.2'

model.summary()

Model: "LadaConvV2.2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_10 (Reshape)         (None, 4410, 10)          0         
_________________________________________________________________
conv1d_21 (Conv1D)           (None, 441, 10)           1000      
_________________________________________________________________
dropout_17 (Dropout)         (None, 441, 10)           0         
_________________________________________________________________
re_lu_17 (ReLU)              (None, 441, 10)           0         
_________________________________________________________________
conv1d_22 (Conv1D)           (None, 21, 10)            2100      
_________________________________________________________________
dropout_18 (Dropout)         (None, 21, 10)            0         
_________________________________________________________________
re_lu_18 (ReLU)              (None, 21, 10)           

In [28]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[
        tf.keras.metrics.Recall(name='rec'),
        tf.keras.metrics.Precision(name='prec'),
        tf.keras.metrics.AUC(name='auc'),
        tf.keras.metrics.BinaryAccuracy(name='acc'),
    ]
)

In [35]:
model.fit(
    train_dataset,
    epochs=30,
    verbose=2
)

Epoch 1/30
141/141 - 2s - loss: 0.0168 - rec: 0.9762 - prec: 0.9830 - auc: 0.9954 - acc: 0.9954
Epoch 2/30
141/141 - 2s - loss: 0.0164 - rec: 0.9792 - prec: 0.9890 - auc: 0.9967 - acc: 0.9964
Epoch 3/30
141/141 - 2s - loss: 0.0153 - rec: 0.9802 - prec: 0.9950 - auc: 0.9945 - acc: 0.9972
Epoch 4/30
141/141 - 2s - loss: 0.0143 - rec: 0.9802 - prec: 0.9910 - auc: 0.9965 - acc: 0.9968
Epoch 5/30
141/141 - 2s - loss: 0.0149 - rec: 0.9812 - prec: 0.9851 - auc: 0.9965 - acc: 0.9962
Epoch 6/30
141/141 - 2s - loss: 0.0137 - rec: 0.9802 - prec: 0.9940 - auc: 0.9956 - acc: 0.9971
Epoch 7/30
141/141 - 2s - loss: 0.0142 - rec: 0.9812 - prec: 0.9930 - auc: 0.9957 - acc: 0.9971
Epoch 8/30
141/141 - 2s - loss: 0.0135 - rec: 0.9782 - prec: 0.9950 - auc: 0.9961 - acc: 0.9970
Epoch 9/30
141/141 - 2s - loss: 0.0137 - rec: 0.9821 - prec: 0.9861 - auc: 0.9977 - acc: 0.9964
Epoch 10/30
141/141 - 2s - loss: 0.0132 - rec: 0.9821 - prec: 0.9910 - auc: 0.9976 - acc: 0.9970
Epoch 11/30
141/141 - 2s - loss: 0.0142

<tensorflow.python.keras.callbacks.History at 0x2926d991508>

In [36]:
model.evaluate(validation_dataset, verbose=2)

39/39 - 1s - loss: 0.0482 - rec: 0.8785 - prec: 0.9961 - auc: 0.9996 - acc: 0.9853


[0.04815615341067314,
 0.8784722089767456,
 0.9960629940032959,
 0.999626874923706,
 0.9852880835533142]

In [38]:
model.save('models/conv/Lada/{}.h5'.format(model.name))

In [None]:
model.evaluate(validation_dataset, verbose=2)

In [10]:
tested_model = tf.keras.models.load_model('models/conv/Mira/MiraConvV2.2.h5')

In [11]:
tested_model.summary()

Model: "MiraConvV2.2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_10 (Reshape)         (None, 4410, 10)          0         
_________________________________________________________________
conv1d_21 (Conv1D)           (None, 441, 10)           1000      
_________________________________________________________________
dropout_17 (Dropout)         (None, 441, 10)           0         
_________________________________________________________________
re_lu_17 (ReLU)              (None, 441, 10)           0         
_________________________________________________________________
conv1d_22 (Conv1D)           (None, 21, 10)            2100      
_________________________________________________________________
dropout_18 (Dropout)         (None, 21, 10)            0         
_________________________________________________________________
re_lu_18 (ReLU)              (None, 21, 10)           

In [12]:
tested_model.evaluate(train_dataset)



[0.018816368654370308,
 1.0,
 0.971222996711731,
 0.9989398121833801,
 0.9968000054359436]

In [13]:
tested_model.evaluate(validation_dataset)



[0.016146354377269745,
 1.0,
 0.9729729890823364,
 0.9995825886726379,
 0.9964563846588135]

In [24]:
tested_model.input_shape

(None, 44100, 1)