In [2]:
from preprocessing import Audio
import tensorflow as tf
import tf_train as tft
import numpy as np
import os
import matplotlib.pyplot as plt
import scipy as sp
import librosa as li
import librosa.display
import IPython.display as ipd

%matplotlib inline
#tf.debugging.set_log_device_placement(True)

In [3]:
GPU = tf.config.list_logical_devices('GPU')
CPU = tf.config.list_logical_devices('CPU')
DEVICE = GPU[0].name if GPU else CPU[0].name
DEVICE

'/device:GPU:0'

<h1>Подготовка данных для обучения и тестирования</h1>

In [4]:
def get_data():
    def norm(x):
        std = np.std(x ,ddof=1)
        return (x - np.mean(x)) / std

    PATH_0 = r'AudioData/0 Данные' # negative class
    PATH_1 = r'AudioData/1 Мира' # positive class

    PATHS = li.util.find_files(PATH_0)[:1000] + li.util.find_files(PATH_1)
    N = len(PATHS)

    np.random.shuffle(PATHS)

    SOUNDS = []
    for path in PATHS:
        SOUNDS.append(Audio(path))
        print('Loading {:.1f}%'.format(len(SOUNDS) / N * 100))
        ipd.clear_output(wait=True)
    DATA = []
    LABELS = []

    for sound in SOUNDS:
        DATA.append(norm(sound.e_parts))
        LABELS.append(sound.label)

    return (np.array(DATA), np.array(LABELS))

In [19]:
data, labels = get_data()

Loading 100.0%


In [6]:
data.shape, labels.shape

((1070, 20), (1070,))

In [7]:
dataset = tf.data.Dataset.from_tensor_slices((data, labels))
dataset = dataset.batch(107)
dataset

<BatchDataset shapes: ((None, 20), (None,)), types: (tf.float64, tf.int32)>

In [188]:
data = np.reshape(data, (10070, 20))

In [211]:
labels = np.reshape(labels, (10070,))

In [209]:
labels

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]])

<h1>Создание модели нейронной сети</h1>

In [8]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(12, activation='tanh', kernel_regularizer=tf.keras.regularizers.l2(0.01),
                          input_shape=(20,)),
    tf.keras.layers.Dense(2, activation='softmax')
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 12)                252       
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 26        
Total params: 278
Trainable params: 278
Non-trainable params: 0
_________________________________________________________________


In [204]:
y_pred = model.predict(data)
print(y_pred[:10])
y_pred = np.argmax(y_pred, axis=-1)
print(y_pred[:10])
print(labels[:10])

[[9.9888867e-01 1.1113590e-03]
 [9.9939823e-01 6.0171861e-04]
 [9.7778863e-01 2.2211321e-02]
 [9.9991632e-01 8.3627907e-05]
 [9.5554715e-01 4.4452846e-02]
 [9.9310726e-01 6.8927575e-03]
 [9.9917573e-01 8.2422997e-04]
 [9.9672639e-01 3.2735907e-03]
 [9.9872667e-01 1.2733137e-03]
 [9.9947780e-01 5.2216806e-04]]
[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]


# Клонирование модели для transfer learning

In [9]:
model = tf.keras.models.load_model('Джарвис.h5')
model = tf.keras.models.clone_model(model)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 12)                252       
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 26        
Total params: 278
Trainable params: 278
Non-trainable params: 0
_________________________________________________________________


<h1>Обучение</h1>

In [17]:
tft.train(
    train_dataset=dataset,
    model=model,
    epochs=200,
    loss=tf.keras.losses.SparseCategoricalCrossentropy()
)

Epoch 000 -- Loss(SparseCategoricalCrossentropy): 0.002, Recall: 0.8710607886314392, Precision: 0.9353457689285278
Epoch 010 -- Loss(SparseCategoricalCrossentropy): 0.001, Recall: 0.8718108534812927, Precision: 0.9357494711875916
Epoch 020 -- Loss(SparseCategoricalCrossentropy): 0.001, Recall: 0.8725522756576538, Precision: 0.9361481666564941
Epoch 030 -- Loss(SparseCategoricalCrossentropy): 0.001, Recall: 0.8732851147651672, Precision: 0.9365419149398804
Epoch 040 -- Loss(SparseCategoricalCrossentropy): 0.001, Recall: 0.8740096092224121, Precision: 0.9369308352470398
Epoch 050 -- Loss(SparseCategoricalCrossentropy): 0.001, Recall: 0.874725878238678, Precision: 0.937315046787262
Epoch 060 -- Loss(SparseCategoricalCrossentropy): 0.001, Recall: 0.8754340410232544, Precision: 0.9376946091651917
Epoch 070 -- Loss(SparseCategoricalCrossentropy): 0.001, Recall: 0.8761342763900757, Precision: 0.9380695819854736
Epoch 080 -- Loss(SparseCategoricalCrossentropy): 0.001, Recall: 0.876826643943786

In [18]:
model.save('Mira-v1.0.h5')

In [20]:
predictions = tft.predict_classes(model=model, features=data)

In [22]:
n = 0

for i in range(len(labels)):
    n += int(labels[i] == predictions[i])

print(n, n / len(labels))

1070 1.0
