In [2]:
import tensorflow as tf
import numpy as np
import librosa as li
import matplotlib.pyplot as plt
import IPython.display as ipd
import time
from preprocessing import Audio

%matplotlib inline
GPU = tf.config.list_physical_devices('GPU')
CPU = tf.config.list_physical_devices('CPU')
DEVICE = GPU[0].name if GPU else CPU[0].name
DEVICE

'/physical_device:GPU:0'

In [2]:
def to_len(y, sec=1.0, sr=44100):
    y, _ = li.effects.trim(y)
    x = tf.keras.preprocessing.sequence.pad_sequences(
        [y],
        maxlen=int(sr * sec),
        padding='post',
        truncating='post',
        dtype='float32'
    )[0]
    return x

In [None]:
def get_audio(path):
    audio = Audio(path)
    augmented = audio.augmented_source()
    data = list(map(to_len, augmented))
    labels = [audio.label] * len(augmented)
    return [data, labels]

In [3]:
def get_labeled_data(nums=1500):
    FALSE_DATA = 'AudioData/0 Данные'
    NOISE = 'AudioData/0 Шум'
    JARVIS = 'AudioData/1 Джарвис'
    DIO = 'AudioData/2 Дио'
    ITAN = 'AudioData/3 Итан'
    LADA = 'AudioData/4 Лада'
    MIRA = 'AudioData/5 Мира'
    
    p = li.util.find_files(FALSE_DATA)
    np.random.shuffle(p)
    PATHS = li.util.find_files(JARVIS) + li.util.find_files(NOISE) + p[:nums]
    
    np.random.shuffle(PATHS)
    
    START_TIME = time.time()
    
    data = list()
    labels = list()
    
    with ThreadPoolExecutor(8) as pool:
        DATA = list(pool.map(get_audio, PATHS))
    
    for d, l in DATA:
        data += d
        labels += l

    data = np.array(data)
    labels = np.array(labels)
    data = np.reshape(data, [*data.shape, 1])
    labels = np.reshape(labels, [*labels.shape, 1])
    print('Loading at {:.2f} seconds'.format(time.time()-START_TIME))
    return (data, labels)

In [4]:
data, labels = get_labeled_data(5000)

Loading 100.0% at 617.91 seconds


In [5]:
data.shape, labels.shape

((6609, 44100, 1), (6609, 1))

In [6]:
TRAIN = 4000
tf.math.reduce_sum(labels[:TRAIN]), tf.math.reduce_sum(labels[TRAIN:])

(<tf.Tensor: shape=(), dtype=int32, numpy=954>,
 <tf.Tensor: shape=(), dtype=int32, numpy=504>)

In [7]:
train_dataset = tf.data.Dataset.from_tensor_slices((data[:TRAIN], labels[:TRAIN]))
train_dataset = train_dataset.batch(64)
print(train_dataset)

validation_dataset = tf.data.Dataset.from_tensor_slices((data[TRAIN:], labels[TRAIN:]))
validation_dataset = validation_dataset.batch(64)
print(validation_dataset)

<BatchDataset shapes: ((None, 44100, 1), (None, 1)), types: (tf.float32, tf.int32)>
<BatchDataset shapes: ((None, 44100, 1), (None, 1)), types: (tf.float32, tf.int32)>


In [8]:
print(len(train_dataset) * 64)
print(len(validation_dataset) * 64)

4032
2624


In [9]:
root_model = tf.keras.models.load_model('models/conv/RootModel-v1.1.h5')

In [10]:
model = tf.keras.models.clone_model(root_model)
model._name = 'JarvisConvV1.0'

model.summary()

Model: "JarvisConvV1.0"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 44100, 20)         200       
_________________________________________________________________
dropout_6 (Dropout)          (None, 44100, 20)         0         
_________________________________________________________________
re_lu (ReLU)                 (None, 44100, 20)         0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 44100, 20)         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 100, 40)           352800    
_________________________________________________________________
dropout_7 (Dropout)          (None, 100, 40)           0         
_________________________________________________________________
re_lu_1 (ReLU)               (None, 100, 40)        

In [11]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.AUC(name='auc'),
        tf.keras.metrics.BinaryAccuracy(name='acc'),
    ]
)

In [12]:
model.fit(
    train_dataset,
    epochs=50,
    verbose=2
)

Epoch 1/50
63/63 - 10s - loss: 0.6654 - recall: 0.0566 - precision: 0.3293 - auc: 0.4732 - acc: 0.7475
Epoch 2/50
63/63 - 10s - loss: 0.6048 - recall: 0.0000e+00 - precision: 0.0000e+00 - auc: 0.4775 - acc: 0.7615
Epoch 3/50
63/63 - 10s - loss: 0.5666 - recall: 0.0000e+00 - precision: 0.0000e+00 - auc: 0.4711 - acc: 0.7615
Epoch 4/50
63/63 - 11s - loss: 0.5541 - recall: 0.0000e+00 - precision: 0.0000e+00 - auc: 0.4491 - acc: 0.7615
Epoch 5/50
63/63 - 11s - loss: 0.5513 - recall: 0.0000e+00 - precision: 0.0000e+00 - auc: 0.4304 - acc: 0.7615
Epoch 6/50
63/63 - 11s - loss: 0.3099 - recall: 0.3595 - precision: 0.8052 - auc: 0.9409 - acc: 0.8265
Epoch 7/50
63/63 - 11s - loss: 0.1185 - recall: 0.8637 - precision: 0.8692 - auc: 0.9864 - acc: 0.9365
Epoch 8/50
63/63 - 11s - loss: 0.0776 - recall: 0.9528 - precision: 0.9439 - auc: 0.9950 - acc: 0.9753
Epoch 9/50
63/63 - 11s - loss: 0.0576 - recall: 0.9759 - precision: 0.9628 - auc: 0.9967 - acc: 0.9852
Epoch 10/50
63/63 - 11s - loss: 0.0469 - 

<tensorflow.python.keras.callbacks.History at 0x1f9bbcddc08>

In [13]:
model.evaluate(validation_dataset, verbose=2)

41/41 - 3s - loss: 0.0117 - recall: 0.9980 - precision: 0.9863 - auc: 0.9996 - acc: 0.9969


[0.01166579220443964,
 0.9980158805847168,
 0.9862744808197021,
 0.9996418356895447,
 0.9969336986541748]

In [15]:
model.save('models/conv/Jarvis/{}.h5'.format(model.name))

In [94]:
model.evaluate(validation_dataset, verbose=2)

38/38 - 3s - loss: 0.0499 - recall: 0.9167 - precision: 0.9875 - auc: 0.9990 - acc: 0.9829


[0.049875952303409576,
 0.9166666865348816,
 0.9875311851501465,
 0.9990178346633911,
 0.9828667044639587]

In [12]:
a = np.array([])

In [14]:
a = np.append(a, [1, 2, 3])
a

array([1., 2., 3., 1., 2., 3.])