# Загрузка сохраненной модели

In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
import librosa
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

model = keras.models.load_model('voice_model1.keras')

In [2]:
def test_on_filepath(path):
    global  model
    max_len = 1505
    X = []
    audio, sr = librosa.load(path)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr)
    if mfccs.shape[1] < max_len:
        pad_width = max_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    X.append(mfccs)

    X_arr = np.array(X)
    X_2d = X_arr.reshape(X_arr.shape[0], X_arr.shape[1] * X_arr.shape[2])

    return model.predict(X_2d)

In [3]:
first_test = 'parts/out00.wav'
prediction = test_on_filepath(first_test)
prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step


array([[0.99579376]], dtype=float32)

# Проверка на тишине

In [4]:
path = 'big_test/dictophone_silence.wav'

max_len = 1505
X = []
audio, sr = librosa.load(path)
mfccs = librosa.feature.mfcc(y=audio, sr=sr)
if mfccs.shape[1] < max_len:
    pad_width = max_len - mfccs.shape[1]
    mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
X.append(mfccs)
X_arr = np.array(X)

X_arr

array([[[-794.1729   , -719.861    , -696.18726  , ...,    0.       ,
            0.       ,    0.       ],
        [ 108.04326  ,  113.1749   ,  118.83409  , ...,    0.       ,
            0.       ,    0.       ],
        [ -10.671621 ,  -16.215992 ,  -19.375954 , ...,    0.       ,
            0.       ,    0.       ],
        ...,
        [  -4.8956213,   -8.358231 ,   -5.128322 , ...,    0.       ,
            0.       ,    0.       ],
        [   2.2934937,   -2.5246234,   -3.7941484, ...,    0.       ,
            0.       ,    0.       ],
        [   2.4417331,    5.2074404,    2.4346085, ...,    0.       ,
            0.       ,    0.       ]]], dtype=float32)

In [5]:
prd1 = test_on_filepath(path)
prd1
prd1[0][0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


0.99579376

# Проверки

In [6]:
import os

goal = 0
succ = []
fail = []
total = []
for filename in os.listdir('big_test/real'):
    if os.path.isfile(os.path.join('big_test/real', filename)):
        path = os.path.join('big_test/real', filename)
        prediction = test_on_filepath(path)
        prediction_int = prediction[0][0]
        if prediction_int < 0.2:
            succ.append((filename, prediction_int))
        else:
            fail.append((filename, prediction_int))
        total.append((filename, prediction_int))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18

In [7]:
len(succ)

5

In [8]:
succ

[('audiobook.wav', 0.0003006101),
 ('karamishev_short.wav', 0.0003006101),
 ('Stengach.wav', 0.04712905),
 ('triplewipe.wav', 0.00030074822),
 ('voeka.ogg', 0.04712905)]

In [9]:
fail

[('Aleksandr.ogg', 0.99579376),
 ('cats_real.wav', 0.99579376),
 ('cats_real1.wav', 0.99579376),
 ('common_voice_en_39586349.mp3', 0.99564725),
 ('common_voice_en_39586438.mp3', 0.9941737),
 ('common_voice_en_39586526.mp3', 0.99579376),
 ('common_voice_en_39586661.mp3', 0.99579376),
 ('common_voice_ru_39586754.mp3', 0.99579376),
 ('common_voice_ru_39602077.mp3', 0.99579376),
 ('common_voice_ru_39604682.mp3', 0.99564725),
 ('common_voice_ru_39622313.mp3', 0.99417126),
 ('common_voice_ru_39622341.mp3', 0.9941755),
 ('common_voice_ru_39802831.mp3', 0.99579376),
 ('common_voice_ru_39980145.mp3', 0.99579376),
 ('common_voice_ru_39980181.mp3', 0.99579376),
 ('common_voice_ru_40179331.mp3', 0.99417126),
 ('holochka.wav', 0.99579376),
 ('Koshkina_shorter.wav', 0.99579376),
 ('maelstorm.wav', 0.97348094),
 ('stopgame.wav', 0.99579376)]

In [10]:
succ2 = []
fail2 = []
for filename in os.listdir('big_test/false'):
    if os.path.isfile(os.path.join('big_test/false', filename)):
        path = os.path.join('big_test/false', filename)
        prediction = test_on_filepath(path)
        prediction_int = prediction[0][0]
        if prediction_int > 0.8:
            succ2.append((filename, prediction_int))
        else:
            fail2.append((filename, prediction_int))
        total.append((filename, prediction_int))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19

In [11]:
succ

[('audiobook.wav', 0.0003006101),
 ('karamishev_short.wav', 0.0003006101),
 ('Stengach.wav', 0.04712905),
 ('triplewipe.wav', 0.00030074822),
 ('voeka.ogg', 0.04712905)]

In [12]:
len(succ2)

22

In [13]:
succ2

[('elevenlabs.mp3', 0.99579376),
 ('sber_salut1_short.wav', 0.99579376),
 ('sber_salut3_short.wav', 0.97124964),
 ('sber_salut4.wav', 0.99579376),
 ('sber_salut5.wav', 0.99579376),
 ('tts_online_short.wav', 0.99579376),
 ('voicemaker1.wav', 0.99579376),
 ('voicemaker2.wav', 0.99579376),
 ('voicemaker3.wav', 0.99579376),
 ('voicemaker4.wav', 0.99579376),
 ('vosktts1.wav', 0.99579376),
 ('vosktts2_short.wav', 0.99579376),
 ('vosktts3.wav', 0.99579376),
 ('vosktts5.wav', 0.99579376),
 ('yandex_tts1.ogg', 0.99579376),
 ('yandex_tts_2.ogg', 0.99579376),
 ('yandex_tts_3.ogg', 0.99579376),
 ('Алиса1.wav', 0.99579376),
 ('Алиса2.wav', 0.99579376),
 ('Алиса3.wav', 0.99579376),
 ('Алиса4.wav', 0.99579376),
 ('Алиса5.wav', 0.99579376)]

In [14]:
fail2

[('sber_salut2_short.wav', 0.0003006101),
 ('tts_short.wav', 0.04105812),
 ('vosktts4.wav', 0.0003006101)]

In [15]:
(len(succ) + len(succ2)) / 50

0.54