In [1]:
from preprocessing import Audio
import tensorflow as tf
import tf_train as tft
import numpy as np
import os
import matplotlib.pyplot as plt
import scipy as sp
import librosa as li
import librosa.display
import IPython.display as ipd

%matplotlib inline
#tf.debugging.set_log_device_placement(True)
GPU = tf.config.list_physical_devices('GPU')
CPU = tf.config.list_physical_devices('CPU')
DEVICE = GPU[0].name if GPU else CPU[0].name
DEVICE

'/physical_device:GPU:0'

<h1>Подготовка данных для обучения и тестирования</h1>

In [15]:
def get_data():
    def norm(x):
        std = np.std(x, ddof=1)
        return (x - np.mean(x)) / std

    PATH_0 = r'AudioData/0 Данные' # negative class
    PATH_1 = r'AudioData/0 Шум' # noise
    PATH_2 = r'AudioData/1 Дио' # positive class

    PATHS = li.util.find_files(PATH_0)[:1000] + li.util.find_files(PATH_1) + li.util.find_files(PATH_2)
    N = len(PATHS)

    np.random.shuffle(PATHS)

    DATA = []
    LABELS = []
    for i, path in enumerate(PATHS):
        sound = Audio(path)
        e_parts = sound.augmented()
        for e in e_parts: 
            DATA.append(norm(e))
            LABELS.append(sound.label)
        print('Loading {:.1f}%'.format(i / N * 100))
        ipd.clear_output(wait=True)

    return (np.array(DATA), np.array(LABELS))

In [16]:
data, labels = get_data()

Loading 99.9%


In [17]:
data.shape, labels.shape

((2429, 20), (2429,))

In [18]:
BATCH_SIZE = len(data)
test_data = np.reshape(data, [BATCH_SIZE, 1, 20])
test_labels = np.reshape(labels, [1, BATCH_SIZE])

dataset = tf.data.Dataset.from_tensor_slices((test_data, labels))
dataset = dataset.batch(BATCH_SIZE)
dataset

<BatchDataset shapes: ((None, 1, 20), (None,)), types: (tf.float64, tf.int32)>

<h1>Создание модели нейронной сети</h1>

In [6]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(1, 20,)),
    tf.keras.layers.GRU(20, kernel_regularizer=tf.keras.regularizers.l2(0.01), reset_after=False),
    tf.keras.layers.Dense(2, activation='softmax')
], name='Jarvis_GRU-v3')

model.summary()

Model: "Jarvis_GRU-v3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, 20)                2460      
_________________________________________________________________
dense (Dense)                (None, 2)                 42        
Total params: 2,502
Trainable params: 2,502
Non-trainable params: 0
_________________________________________________________________


<h1>Обучение</h1>

In [7]:
tft.train(
    train_dataset=dataset,
    model=model,
    epochs=5000,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=[
        tft.metrics.Recall(),
        tft.metrics.Precision(),
        tft.metrics.AUC(),
    ],
    frequency=100
)

Epoch 0 -- loss_value: 0.717118, Recall: 0.716049, Precision: 0.480000, AUC: 0.552514, time - 0.322s
Epoch 100 -- loss_value: 0.440858, Recall: 0.837768, Precision: 0.662034, AUC: 0.750447, time - 3.390s
Epoch 200 -- loss_value: 0.357950, Recall: 0.849579, Precision: 0.715751, AUC: 0.791909, time - 3.591s
Epoch 300 -- loss_value: 0.310656, Recall: 0.857168, Precision: 0.746872, AUC: 0.814170, time - 4.029s
Epoch 400 -- loss_value: 0.277048, Recall: 0.864629, Precision: 0.766421, AUC: 0.828533, time - 3.994s
Epoch 500 -- loss_value: 0.250520, Recall: 0.873483, Precision: 0.781556, AUC: 0.840590, time - 3.932s
Epoch 600 -- loss_value: 0.227603, Recall: 0.881939, Precision: 0.794275, AUC: 0.851004, time - 3.948s
Epoch 700 -- loss_value: 0.206738, Recall: 0.890739, Precision: 0.805365, AUC: 0.860588, time - 3.910s
Epoch 800 -- loss_value: 0.187917, Recall: 0.898662, Precision: 0.815384, AUC: 0.869196, time - 3.924s
Epoch 900 -- loss_value: 0.171810, Recall: 0.905800, Precision: 0.824357, A

In [8]:
model.save(r'models\v3\{}.h5'.format(model.name))

In [13]:
model = tf.keras.models.load_model(r'models/v3/Jarvis_GRU-v3.h5')
model.summary()

Model: "Jarvis_GRU-v3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, 20)                2460      
_________________________________________________________________
dense (Dense)                (None, 2)                 42        
Total params: 2,502
Trainable params: 2,502
Non-trainable params: 0
_________________________________________________________________


In [26]:
tft.evaluate(
    dataset,
    model,
    loss=tft.losses.SparseCategoricalCrossentropy(),
    metrics=[
        tft.metrics.Recall(),
        tft.metrics.Precision(),
        tft.metrics.AUC(),
    ],
)

loss_value: 1.870327, Recall: 0.462441, Precision: 1.000000, AUC: 0.731221, 


In [22]:
n = 0

for i in range(len(labels)):
    n += int(labels[i] == predictions[i])

print(n, n / len(labels) * 100)

NameError: name 'predictions' is not defined

In [41]:
predictions[60:70], labels[60:70]

(<tf.Tensor: shape=(10,), dtype=int64, numpy=array([0, 0, 1, 1, 0, 0, 0, 0, 3, 0], dtype=int64)>,
 array([0, 0, 1, 1, 0, 0, 0, 0, 3, 0]))

In [40]:
predictions[:15], labels[:15]

(<tf.Tensor: shape=(15,), dtype=int64, numpy=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], dtype=int64)>,
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]))

In [30]:
labels[60:70]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [69]:
filename = 'test.tfrecord'
writer_data = tf.data.experimental.TFRecordWriter("datasets\Mira\data.tfrecord")
writer_labels = tf.data.experimental.TFRecordWriter("datasets\Mira\labels.tfrecord")
test_data = tf.data.Dataset.from_tensor_slices(data)
test_labels = tf.data.Dataset.from_tensor_slices(labels)
test_data = test_data.map(lambda x: str(x))
test_labels = test_labels.map(lambda x: str(x))
writer_data.write(test_data)
writer_labels.write(test_labels)

In [75]:
loaded_dataset = tf.data.TFRecordDataset(["datasets\Mira\labels.tfrecord"])

In [76]:
for d in loaded_dataset.take(1):
    print(d)

tf.Tensor(b'Tensor("args_0:0", shape=(), dtype=int32)', shape=(), dtype=string)


In [77]:
loaded_dataset

<TFRecordDatasetV2 shapes: (), types: tf.string>

In [83]:
for d in test_data.take(1):
    print(d.numpy())

b'Tensor("args_0:0", shape=(20,), dtype=float64)'


In [74]:
m = tf.keras.metrics.Recall(class_id=1)
m.update_state([1, 0, 1, 2], [1, 0, 1, 2])
m.result().numpy()

InvalidArgumentError: predictions must be <= 1
Condition x <= y did not hold.
First 3 elements of x:
[1. 0. 1.]
First 1 elements of y:
[1.]

In [82]:
y_pred = tf.nn.softmax(model(test_data[:1]))[0]
y_pred

<tf.Tensor: shape=(6,), dtype=float32, numpy=
array([0.98039216, 0.00272254, 0.00372326, 0.00742231, 0.00221274,
       0.00352691], dtype=float32)>

In [98]:
m = tf.keras.metrics.TruePositives()
m.update_state([1, 2, 3], [1, 2, 3])
m.result().numpy()

InvalidArgumentError: predictions must be <= 1
Condition x <= y did not hold.
First 3 elements of x:
[1. 2. 3.]
First 1 elements of y:
[1.]

In [94]:
predictions[:100], labels[:100]

(<tf.Tensor: shape=(100,), dtype=int64, numpy=
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 3,
        0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
        0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)>,
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 3,
        0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
        0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0]))