In [1]:
import tensorflow as tf
import numpy as np
import librosa as li
import matplotlib.pyplot as plt
import IPython.display as ipd
import time
from concurrent.futures import ThreadPoolExecutor
from preprocessing import Audio

%matplotlib inline
GPU = tf.config.list_physical_devices('GPU')
CPU = tf.config.list_physical_devices('CPU')
DEVICE = GPU[0].name if GPU else CPU[0].name
DEVICE

'/physical_device:GPU:0'

In [2]:
def to_len(y, sec=1.0, sr=44100):
    y, _ = li.effects.trim(y)
    x = tf.keras.preprocessing.sequence.pad_sequences(
        [y],
        maxlen=int(sr * sec),
        padding='post',
        truncating='post',
        dtype='float32'
    )[0]
    return x

In [3]:
def get_audio(path):
    audio = Audio(path)
    #augmented = audio.augmented_source()
    augmented = [audio.data]
    data = list(map(to_len, augmented))
    labels = [audio.label] * len(augmented)
    return [data, labels]

In [8]:
def get_labeled_data(nums=1500):
    FALSE_DATA = 'AudioData/0 Данные'
    NOISE = 'AudioData/0 Шум'
    JARVIS = 'AudioData/1 Джарвис'
    DIO = 'AudioData/2 Дио'
    ITAN = 'AudioData/3 Итан'
    LADA = 'AudioData/4 Лада'
    MIRA = 'AudioData/5 Мира'
    
    p = li.util.find_files(FALSE_DATA)
    np.random.shuffle(p)
    PATHS = li.util.find_files(LADA) + li.util.find_files(NOISE) + p[:nums]
    
    np.random.shuffle(PATHS)
    
    START_TIME = time.time()
    
    data = list()
    labels = list()
    
    with ThreadPoolExecutor(1) as pool:
        DATA = list(pool.map(get_audio, PATHS))
    
    for d, l in DATA:
        data += d
        labels += l

    data = np.array(data)
    labels = np.array(labels)
    data = np.reshape(data, [*data.shape, 1])
    labels = np.reshape(labels, [*labels.shape, 1])
    print('Loading at {:.2f} seconds'.format(time.time()-START_TIME))
    return (data, labels)

In [9]:
data, labels = get_labeled_data(100)

Loading at 37.31 seconds


In [10]:
data.shape, labels.shape

((323, 44100, 1), (323, 1))

In [12]:
TRAIN = 200
tf.math.reduce_sum(labels[:TRAIN]), tf.math.reduce_sum(labels[TRAIN:])

(<tf.Tensor: shape=(), dtype=int32, numpy=50>,
 <tf.Tensor: shape=(), dtype=int32, numpy=22>)

In [13]:
train_dataset = tf.data.Dataset.from_tensor_slices((data[:TRAIN], labels[:TRAIN]))
train_dataset = train_dataset.batch(32)
print(train_dataset)

validation_dataset = tf.data.Dataset.from_tensor_slices((data[TRAIN:], labels[TRAIN:]))
validation_dataset = validation_dataset.batch(32)
print(validation_dataset)

<BatchDataset shapes: ((None, 44100, 1), (None, 1)), types: (tf.float32, tf.int32)>
<BatchDataset shapes: ((None, 44100, 1), (None, 1)), types: (tf.float32, tf.int32)>


In [14]:
print(len(train_dataset) * 64)
print(len(validation_dataset) * 64)

448
256


In [15]:
root_model = tf.keras.models.load_model('models/conv/RootModel-v3.0.h5')

In [16]:
model = tf.keras.models.clone_model(root_model)
model._name = 'LadaConvV3.0'

model.summary()

Model: "LadaConvV3.0"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_68 (Conv1D)           (None, 44100, 10)         1000      
_________________________________________________________________
dropout_68 (Dropout)         (None, 44100, 10)         0         
_________________________________________________________________
re_lu_68 (ReLU)              (None, 44100, 10)         0         
_________________________________________________________________
conv1d_69 (Conv1D)           (None, 44100, 1)          100       
_________________________________________________________________
dropout_69 (Dropout)         (None, 44100, 1)          0         
_________________________________________________________________
re_lu_69 (ReLU)              (None, 44100, 1)          0         
_________________________________________________________________
conv1d_70 (Conv1D)           (None, 100, 20)          

In [17]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[
        tf.keras.metrics.Recall(name='rec'),
        tf.keras.metrics.Precision(name='prec'),
        tf.keras.metrics.AUC(name='auc'),
        tf.keras.metrics.BinaryAccuracy(name='acc'),
    ]
)

In [22]:
model.fit(
    train_dataset,
    epochs=50,
    verbose=2
)

Epoch 1/50
7/7 - 0s - loss: 0.1097 - rec: 0.9200 - prec: 0.9200 - auc: 0.9906 - acc: 0.9600
Epoch 2/50
7/7 - 0s - loss: 0.0752 - rec: 0.9400 - prec: 0.9592 - auc: 0.9946 - acc: 0.9750
Epoch 3/50
7/7 - 0s - loss: 0.1103 - rec: 0.9600 - prec: 0.8727 - auc: 0.9915 - acc: 0.9550
Epoch 4/50
7/7 - 0s - loss: 0.0674 - rec: 0.9400 - prec: 0.9792 - auc: 0.9924 - acc: 0.9800
Epoch 5/50
7/7 - 0s - loss: 0.1313 - rec: 0.9000 - prec: 0.9574 - auc: 0.9822 - acc: 0.9650
Epoch 6/50
7/7 - 0s - loss: 0.0657 - rec: 0.9800 - prec: 0.9423 - auc: 0.9957 - acc: 0.9800
Epoch 7/50
7/7 - 0s - loss: 0.0749 - rec: 0.9600 - prec: 0.9600 - auc: 0.9919 - acc: 0.9800
Epoch 8/50
7/7 - 0s - loss: 0.0706 - rec: 0.9600 - prec: 0.9796 - auc: 0.9865 - acc: 0.9850
Epoch 9/50
7/7 - 0s - loss: 0.0498 - rec: 0.9800 - prec: 0.9608 - auc: 0.9949 - acc: 0.9850
Epoch 10/50
7/7 - 0s - loss: 0.0606 - rec: 0.9600 - prec: 0.9796 - auc: 0.9952 - acc: 0.9850
Epoch 11/50
7/7 - 0s - loss: 0.0996 - rec: 0.9400 - prec: 0.9400 - auc: 0.9909 

<tensorflow.python.keras.callbacks.History at 0x209c40962c8>

In [23]:
model.evaluate(validation_dataset, verbose=2)

4/4 - 0s - loss: 0.0814 - rec: 1.0000 - prec: 0.7857 - auc: 1.0000 - acc: 0.9512


[0.08136299252510071,
 1.0,
 0.7857142686843872,
 0.9999999403953552,
 0.9512194991111755]

In [24]:
model.save('models/conv/Lada/{}.h5'.format(model.name))

In [None]:
model.evaluate(validation_dataset, verbose=2)

In [10]:
tested_model = tf.keras.models.load_model('models/conv/Mira/MiraConvV2.2.h5')

In [11]:
tested_model.summary()

Model: "MiraConvV2.2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_10 (Reshape)         (None, 4410, 10)          0         
_________________________________________________________________
conv1d_21 (Conv1D)           (None, 441, 10)           1000      
_________________________________________________________________
dropout_17 (Dropout)         (None, 441, 10)           0         
_________________________________________________________________
re_lu_17 (ReLU)              (None, 441, 10)           0         
_________________________________________________________________
conv1d_22 (Conv1D)           (None, 21, 10)            2100      
_________________________________________________________________
dropout_18 (Dropout)         (None, 21, 10)            0         
_________________________________________________________________
re_lu_18 (ReLU)              (None, 21, 10)           

In [12]:
tested_model.evaluate(train_dataset)



[0.018816368654370308,
 1.0,
 0.971222996711731,
 0.9989398121833801,
 0.9968000054359436]

In [13]:
tested_model.evaluate(validation_dataset)



[0.016146354377269745,
 1.0,
 0.9729729890823364,
 0.9995825886726379,
 0.9964563846588135]

In [24]:
tested_model.input_shape

(None, 44100, 1)