In [2]:
#  В этом ноутбуке по предоставленным данным мы обучим сверточную сеть для распознавания 
#  пола говорящего
#  В работе будем применять пакет librosa и, в частности, функцию кепстральных коэффициентов
#  поскольку кепстральное преобразование позволяет произвести декомпозицию акустических составляющих
#  речи на два вида: возникающие в голосовых связках и речевом тракте

In [8]:
# загрузим библиотеки
import librosa
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

In [3]:
# прочитаем id пользователей с классом
df = pd.read_csv('train/targets.tsv', sep = "\t", header = None)

In [773]:
# no comment
df.columns = ['name', 'label']
df.head()

Unnamed: 0,name,label
0,5d1f7e43366513a1d0a6ec5640c3dc24,1
1,9a701a4536a05b6610a590a9fe702ed8,1
2,cad0b8547008d1524c1a0e5fd51f9908,1
3,4bbe607e7dc95460e2cc1a6ee5f4dfa6,0
4,30fb32cba90b34af26f3f14f5d636805,0


In [5]:
# создадим список обхода аудиофайлов 
paths = sorted(Path('train').glob('**/*.wav'))
paths = list(map(str, paths))

In [569]:
# на некотором участке данных определим размерность входных данных 
# это позволит выбрать оптимальный размер обучающего вектора
shape_list = []
for k in range(12):
    print('now k is:', k)
    maxima = 0
    for i in range(k*1000,1000+k*1000):
        audio_data = paths[i]
        x , sr = librosa.load(audio_data)
        martyshka = librosa.feature.mfcc(y = x, hop_length = 2048,n_mfcc = 20)
        if martyshka.shape[1]>maxima:
            maxima = martyshka.shape[1]
    shape_list.append(maxima)

now k is: 0
now k is: 1
now k is: 2
now k is: 3
now k is: 4
now k is: 5
now k is: 6
now k is: 7
now k is: 8
now k is: 9
now k is: 10
now k is: 11


In [570]:
shape_list

[110, 106, 107, 106, 101, 112, 105, 107, 106, 133, 111, 121]

In [778]:
df

Unnamed: 0,name,label
0,5d1f7e43366513a1d0a6ec5640c3dc24,1
1,9a701a4536a05b6610a590a9fe702ed8,1
2,cad0b8547008d1524c1a0e5fd51f9908,1
3,4bbe607e7dc95460e2cc1a6ee5f4dfa6,0
4,30fb32cba90b34af26f3f14f5d636805,0
...,...,...
13931,215443bcce2003d58d7dc6cd53af73ed,1
13932,d26e5ebee3589530a4ec735ba1baab83,0
13933,b9ae9428d4d7d279068aea06365d5736,1
13934,3da22010bcdd5b47d7f2d3ceea190195,0


In [281]:
# эта функция приводит все образцы к одному размеру
pad2d = lambda a, i: a[:, 0: i] if a.shape[1] > i else np.hstack((a, np.zeros((a.shape[0],i - a.shape[1]))))

In [758]:
# обойдем файлы, выделим из них кепстральные коэффициенты и сформируем массив
features = np.zeros((df.shape[0],20*120))
id_list = []
for i in range(df.shape[0]):
    if i//500 == i/500:
        print('interation n:', i)
    id_list.append(paths[i][5:37])
    audio_data = paths[i]
    x , sr = librosa.load(audio_data)
    mfccs = librosa.feature.mfcc(y = x, hop_length = 2048, n_mfcc = 20)
#     mfccs = librosa.feature.melspectrogram(y = x, hop_length =4096, n_mels = 30)
    mfc = np.reshape(pad2d(mfccs,120),(1,-1))
    features[i] = np.reshape(mfc,(1,-1))

interation n: 0
interation n: 500
interation n: 1000
interation n: 1500
interation n: 2000
interation n: 2500
interation n: 3000
interation n: 3500
interation n: 4000
interation n: 4500
interation n: 5000
interation n: 5500
interation n: 6000
interation n: 6500
interation n: 7000
interation n: 7500
interation n: 8000
interation n: 8500
interation n: 9000
interation n: 9500
interation n: 10000
interation n: 10500
interation n: 11000
interation n: 11500
interation n: 12000
interation n: 12500
interation n: 13000
interation n: 13500


In [781]:
# создадим список ID пользователей, он понадобится нам на следующем шаге
id_list = []
for i in range(df.shape[0]):
    id_list.append(paths[i][6:38])
    

In [782]:
# создаем dataframe с признаками
tr = pd.DataFrame(features)

In [783]:
tr

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2390,2391,2392,2393,2394,2395,2396,2397,2398,2399
0,-628.833801,-628.833801,-628.833801,-628.833801,-606.691711,-485.312347,-485.061340,-543.183411,-550.882263,-550.932373,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-611.677490,-611.677490,-611.677490,-611.677490,-598.715271,-487.092682,-535.162170,-546.028564,-547.844604,-551.484680,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-670.714111,-670.714111,-670.714111,-670.714111,-670.714111,-589.636475,-614.829773,-614.987488,-616.455261,-608.108643,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-678.388733,-678.388733,-678.388733,-678.388733,-657.981812,-585.499695,-602.251282,-598.230957,-608.392822,-609.428345,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-592.195129,-592.195129,-592.195129,-592.195129,-592.195129,-492.486145,-553.229370,-561.879639,-562.618469,-557.126709,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13931,-634.443115,-634.443115,-634.443115,-634.443115,-634.443115,-559.771912,-590.565125,-584.975403,-580.895142,-577.573120,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13932,-667.716309,-667.716309,-667.716309,-667.716309,-628.381775,-606.434082,-607.160278,-610.619263,-610.280334,-606.393311,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13933,-699.919617,-699.919617,-699.919617,-699.919617,-675.964294,-590.528870,-602.023193,-615.354248,-624.475586,-620.382446,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13934,-745.411438,-745.411438,-745.411438,-745.411438,-708.458862,-626.179810,-637.009583,-646.173462,-649.032166,-654.421204,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [784]:
# добавляем к нему ID пользователей
tr["id"] = id_list

In [785]:
tr

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2391,2392,2393,2394,2395,2396,2397,2398,2399,id
0,-628.833801,-628.833801,-628.833801,-628.833801,-606.691711,-485.312347,-485.061340,-543.183411,-550.882263,-550.932373,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0006238dc99eaf68957dfc81826d1071
1,-611.677490,-611.677490,-611.677490,-611.677490,-598.715271,-487.092682,-535.162170,-546.028564,-547.844604,-551.484680,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0007b9d41374f46f25cb35f13dc97e5e
2,-670.714111,-670.714111,-670.714111,-670.714111,-670.714111,-589.636475,-614.829773,-614.987488,-616.455261,-608.108643,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,000ad36ce0dcbc1032a606312d5e787d
3,-678.388733,-678.388733,-678.388733,-678.388733,-657.981812,-585.499695,-602.251282,-598.230957,-608.392822,-609.428345,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0012112033f3f6c687c003cee20d0ba2
4,-592.195129,-592.195129,-592.195129,-592.195129,-592.195129,-492.486145,-553.229370,-561.879639,-562.618469,-557.126709,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00122174f87c115e13f69cd685477387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13931,-634.443115,-634.443115,-634.443115,-634.443115,-634.443115,-559.771912,-590.565125,-584.975403,-580.895142,-577.573120,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fff5a2701953ff9df1533fe3fb3f210b
13932,-667.716309,-667.716309,-667.716309,-667.716309,-628.381775,-606.434082,-607.160278,-610.619263,-610.280334,-606.393311,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fff802e714e6b3d255228a1587e4c28d
13933,-699.919617,-699.919617,-699.919617,-699.919617,-675.964294,-590.528870,-602.023193,-615.354248,-624.475586,-620.382446,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fffad2684cd243805a27efc8ee351d4c
13934,-745.411438,-745.411438,-745.411438,-745.411438,-708.458862,-626.179810,-637.009583,-646.173462,-649.032166,-654.421204,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fffb44f5a8f5bf40200830db37dfcc96


In [786]:
# объединяем два датафрейма по ID 
trainset = tr.merge(df, left_on = "id", right_on = 'name')

In [787]:
# и выкидываем name чтобы не мешался под ногами
trainset = trainset.drop('name', axis = 1)

In [803]:
# данные почти подготовлены, остается разделить на X,y и масштабировать признаки
trainset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2392,2393,2394,2395,2396,2397,2398,2399,id,label
0,-628.833801,-628.833801,-628.833801,-628.833801,-606.691711,-485.312347,-485.061340,-543.183411,-550.882263,-550.932373,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0006238dc99eaf68957dfc81826d1071,1
1,-611.677490,-611.677490,-611.677490,-611.677490,-598.715271,-487.092682,-535.162170,-546.028564,-547.844604,-551.484680,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0007b9d41374f46f25cb35f13dc97e5e,1
2,-670.714111,-670.714111,-670.714111,-670.714111,-670.714111,-589.636475,-614.829773,-614.987488,-616.455261,-608.108643,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,000ad36ce0dcbc1032a606312d5e787d,1
3,-678.388733,-678.388733,-678.388733,-678.388733,-657.981812,-585.499695,-602.251282,-598.230957,-608.392822,-609.428345,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0012112033f3f6c687c003cee20d0ba2,0
4,-592.195129,-592.195129,-592.195129,-592.195129,-592.195129,-492.486145,-553.229370,-561.879639,-562.618469,-557.126709,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00122174f87c115e13f69cd685477387,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13931,-634.443115,-634.443115,-634.443115,-634.443115,-634.443115,-559.771912,-590.565125,-584.975403,-580.895142,-577.573120,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fff5a2701953ff9df1533fe3fb3f210b,0
13932,-667.716309,-667.716309,-667.716309,-667.716309,-628.381775,-606.434082,-607.160278,-610.619263,-610.280334,-606.393311,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fff802e714e6b3d255228a1587e4c28d,0
13933,-699.919617,-699.919617,-699.919617,-699.919617,-675.964294,-590.528870,-602.023193,-615.354248,-624.475586,-620.382446,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fffad2684cd243805a27efc8ee351d4c,1
13934,-745.411438,-745.411438,-745.411438,-745.411438,-708.458862,-626.179810,-637.009583,-646.173462,-649.032166,-654.421204,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fffb44f5a8f5bf40200830db37dfcc96,1


In [804]:
# выделяем y
y = trainset[['label']]

In [805]:
# выделяем X
X = trainset.drop('label', axis = 1)

In [806]:
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2391,2392,2393,2394,2395,2396,2397,2398,2399,id
0,-628.833801,-628.833801,-628.833801,-628.833801,-606.691711,-485.312347,-485.061340,-543.183411,-550.882263,-550.932373,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0006238dc99eaf68957dfc81826d1071
1,-611.677490,-611.677490,-611.677490,-611.677490,-598.715271,-487.092682,-535.162170,-546.028564,-547.844604,-551.484680,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0007b9d41374f46f25cb35f13dc97e5e
2,-670.714111,-670.714111,-670.714111,-670.714111,-670.714111,-589.636475,-614.829773,-614.987488,-616.455261,-608.108643,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,000ad36ce0dcbc1032a606312d5e787d
3,-678.388733,-678.388733,-678.388733,-678.388733,-657.981812,-585.499695,-602.251282,-598.230957,-608.392822,-609.428345,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0012112033f3f6c687c003cee20d0ba2
4,-592.195129,-592.195129,-592.195129,-592.195129,-592.195129,-492.486145,-553.229370,-561.879639,-562.618469,-557.126709,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00122174f87c115e13f69cd685477387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13931,-634.443115,-634.443115,-634.443115,-634.443115,-634.443115,-559.771912,-590.565125,-584.975403,-580.895142,-577.573120,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fff5a2701953ff9df1533fe3fb3f210b
13932,-667.716309,-667.716309,-667.716309,-667.716309,-628.381775,-606.434082,-607.160278,-610.619263,-610.280334,-606.393311,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fff802e714e6b3d255228a1587e4c28d
13933,-699.919617,-699.919617,-699.919617,-699.919617,-675.964294,-590.528870,-602.023193,-615.354248,-624.475586,-620.382446,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fffad2684cd243805a27efc8ee351d4c
13934,-745.411438,-745.411438,-745.411438,-745.411438,-708.458862,-626.179810,-637.009583,-646.173462,-649.032166,-654.421204,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fffb44f5a8f5bf40200830db37dfcc96


In [302]:
# скопипастим чей-то импорт библиотек чтобы не сходить с ума )
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras import regularizers
import warnings
warnings.filterwarnings('ignore')

In [807]:
# тут внезапно обнаружим, что id мы не выкинули и сделаем-таки это
Xnoid = X.drop("id", axis = 1)

In [808]:
# давно обещанный StandartScaler
scaler = StandardScaler()
X = scaler.fit_transform(Xnoid)

In [354]:
# попробуем полносвязную сеть, которая призвана привести нас к светлому будущему (НЕТ)
model = Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X2.shape[1],)))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [822]:
# тут все понятно 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle = True, random_state = 2)

In [306]:
# подгрузим метрики
from sklearn import datasets, linear_model
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

In [None]:
# учиться и еще раз учиться
classifier = model.fit(X2,
                    y,
                    epochs=50,
                    batch_size=128)

In [150]:
# я угадаю эту мелодию с ..
y_pred = model.predict(X2)

In [166]:
y_pred_log = [int(i>0.5) for i in y_pred]
y_pred_log

[1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,


In [170]:
y_lst = y['label'].tolist()
y_lst

[1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,


In [171]:
accuracy_score(y_pred_log,y_lst)

1.0

In [582]:
X_train.shape

(11646, 2400)

In [652]:
# ну, в общем, как и следовало ожидать, нужной точности в 98% у нас не вышло
# поэтому мы пойдем в тренажерку и нарастим мяса!!
# слоев и побольше, побольше!!
model2 = Sequential()
model2.add(layers.Dense(512,bias_regularizer=regularizers.l2(1e-5),
                        kernel_regularizer=regularizers.l1_l2(l1=1e-6, l2=1e-5),
                        activity_regularizer=regularizers.l2(1e-5),
                        activation='relu',input_shape=(X_train.shape[1], )))
model2.add(layers.Dense(512,bias_regularizer=regularizers.l2(1e-5), activation='relu',
                        activity_regularizer=regularizers.l2(1e-5),
                        kernel_regularizer=regularizers.l1_l2(l1=1e-6, l2=1e-5)))

model2.add(layers.Dense(512,bias_regularizer=regularizers.l2(1e-5), activation='relu',
                        activity_regularizer=regularizers.l2(1e-5),
                        kernel_regularizer=regularizers.l1_l2(l1=1e-6, l2=1e-5)))
                        
model2.add(layers.Dense(1, bias_regularizer=regularizers.l2(1e-5),activation='sigmoid',
                        activity_regularizer=regularizers.l2(1e-5),
                        kernel_regularizer=regularizers.l1_l2(l1=1e-6, l2=1e-5)) )
model2.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [815]:
# повторение мать 
classifier2 = model2.fit(X_train,
                    y_train,
                    epochs=40,
                    batch_size=128,
                    validation_data=(X_test, y_test),
                        shuffle = True)

Train on 11148 samples, validate on 2788 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [635]:
# почти 95%, уже веселее?
y_pred2 = model2.predict(X_test)
y_lst2 = y_test['label'].tolist()
y_pred2log = [int(i>0.5) for i in y_pred2]
accuracy_score(y_pred2log,y_lst2)

0.9486133768352365

In [141]:
# тут что-то невразумительное, разбираться лениво, а убирать страшно 
# едем дальше
features2 = np.zeros((len(paths2),2000))
id_list2 = []
for i in range(len(paths2)):
    if i//500 == i/500:
        print('interation n:', i)
    id_list2.append(paths2[i].rstrip(".wav").lstrip("train\\"))
    audio_data = paths2[i]
    x , sr = librosa.load(audio_data)
    mfccs = librosa.feature.mfcc(y = x, hop_length = 512)
    mfc = np.reshape(pad2d(mfccs,100),(1,-1))
    features2[i] = np.reshape(mfc,(1,-1))

interation n: 0
interation n: 500
interation n: 1000
interation n: 1500
interation n: 2000
interation n: 2500
interation n: 3000


In [692]:
id_list3 = []
for i in range(len(paths2)):
    if len(nme) != 36:
        nme = paths2[i].rstrip(".wav").lstrip("train\\")
        print(len(nme))
        print(paths2[i])
#     id_list3.append(paths2[i].rstrip(".wav").lstrip("train\\"))

In [655]:
X_train.shape[0]

7355

In [823]:
# вот про кого буду рассказывать
# после неудачных попыток построить классификатор на полносвязных сетях
# мы психанем и применим сверточную сеть
# переформатируем наши входные данные в 2D формат, каким он, в общем-то, и был по сути
# (20 коэффициентов на временной шкале с разбиением на 120 участков)
X_train_3D = np.reshape(X_train,(X_train.shape[0],20,120,1))
X_test_3D = np.reshape(X_test,(X_test.shape[0],20,120,1))

In [824]:
# первая сверточная сеть
# сворачиваем, сворачиваем, потом плющим и кормим полносвязному слою

input_img = keras.Input(shape=(20, 120, 1))

x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)

output = layers.Dense(1,activation='sigmoid')(x)
encoder = keras.Model(input_img, output)
encoder.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])

In [674]:
# уже здесь видно, что точность выросла радикально
encoder.fit(X_train_3D, y_train,
                epochs=5,
                batch_size=64,
                shuffle=True,
                validation_data = (X_test_3D, y_test))

Train on 7355 samples, validate on 4904 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x17cb4c3cdc8>

In [825]:
# поскольку прошлая модель достигла необходимого уровня, терять ее было страшно
# и мы сделали еще одну такую же чтобы запустить процесс дальше
encoder2 = keras.Model(input_img, output)
encoder2.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])

In [826]:
# увеличили количество эпох, в остальном все то же самое
encoder2.fit(X_train_3D, y_train,
                epochs=15,
                batch_size=64,
                shuffle=True,
                validation_data = (X_test_3D, y_test))

Train on 11148 samples, validate on 2788 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x17cdface8c8>

In [827]:
# ну, собственно, вот он долгожданный результат
# 98.42 процента точности
y_pred3 = encoder2.predict(X_test_3D)
y_lst3 = y_test['label'].tolist()
y_pred3log = [int(i>0.5) for i in y_pred3]
accuracy_score(y_pred3log,y_lst3)

0.9842180774748924

In [None]:
# Если вы дочитали до сюда, честь и хвала.
# Дальше можно не читать. Там будет война начинающего DSа с индексами, 
# Загрузка тестовых неразмеченных данных и запись их в файл для отправки во 
# Всеми любимый ******

In [814]:
y_test.shape

(2788, 1)

In [678]:
paths2 = (Path('test').glob('**/*.wav'))
paths2 = list(map(str, paths2))

In [679]:
paths2

['test\\00100026dbdffcd01cde6ee9b9a9d273.wav',
 'test\\0014278276a6cc05fe8c522af0a677df.wav',
 'test\\0026e20266ceba9cdda2c116e89d2f3b.wav',
 'test\\008b37fc832d3dfad9105961c5801c02.wav',
 'test\\00b1d7d0d6fdb25050041f6e2ae2871e.wav',
 'test\\00b23e5cbc6f652b55d0754ed5ce300f.wav',
 'test\\00d314de185e2e2425590e18e88c969f.wav',
 'test\\00d9d606720bf95d08546c4bcb19ba71.wav',
 'test\\00f3de3001b7f177384897278f905f83.wav',
 'test\\0113ca85c57aeafd828acc1e765c058b.wav',
 'test\\011ffa717faf73648cd2bc746d2ef9d7.wav',
 'test\\012d337e20f5427f550163b486f7ca00.wav',
 'test\\0148a8055c5987082bf1fd1d816943ac.wav',
 'test\\0166018d693bb79c1e6203d4a048b003.wav',
 'test\\016d86a309e33615738e0af7abdc7dc8.wav',
 'test\\017e03b789bef812d30b2e5783bbfdf2.wav',
 'test\\018205a4a74a654927ff80ca4ba892d7.wav',
 'test\\0197e87b1967402d4f331e656d5aa04d.wav',
 'test\\01a14dbda7060f8b654f4a81566ecb0e.wav',
 'test\\01a488d98948d02d376ea8848f92036f.wav',
 'test\\01a4e8ff8bd45861e6da907f97b6f024.wav',
 'test\\01bde

In [680]:
test_features = np.zeros((len(paths2),20*120))
id_test = []
for i in range(len(paths2)):
    if i//500 == i/500:
        print('interation n:', i)
    id_test.append(paths2[i].rstrip(".wav").lstrip("test\\"))
    audio_data = paths2[i]
    x , sr = librosa.load(audio_data)
    mfccs = librosa.feature.mfcc(y = x, hop_length = 2048, n_mfcc = 20)
#     mfccs = librosa.feature.melspectrogram(y = x, hop_length =4096, n_mels = 30)
    mfc = np.reshape(pad2d(mfccs,120),(1,-1))
    test_features[i] = np.reshape(mfc,(1,-1))

interation n: 0
interation n: 500
interation n: 1000
interation n: 1500
interation n: 2000
interation n: 2500
interation n: 3000


In [794]:
test_features_scaled = scaler.transform(test_features)

In [795]:
test_features3D = np.reshape(test_features_scaled, (test_features.shape[0],20,120,1))

In [796]:
y_ans = encoder2.predict(test_features3D)

y_anslog = [int(i>0.5) for i in y_ans]

In [797]:
y_anslog

[1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,


In [738]:
id_test2 = []
for i in range(len(paths2)):
    id_test2.append(paths2[i][5:37])


In [703]:
ans_list = []
for i in range(len(y_anslog)):
    ans_list.append(id_test2[i] + "\t" + str(y_anslog[i]))

In [747]:
ans_list_try = []
for i in range(len(y_anslog)):
    ans_list_try.append(str(id_test2[i]) + "\t" + str(y_anslog[i]))

In [728]:
with open("ans2.tsv", "w") as file:
    for  line in ans_list_try:
        file.write(line + '\n')
file.close()        

In [748]:
ans_list_try

['00100026dbdffcd01cde6ee9b9a9d273\t0',
 '0014278276a6cc05fe8c522af0a677df\t1',
 '0026e20266ceba9cdda2c116e89d2f3b\t1',
 '008b37fc832d3dfad9105961c5801c02\t1',
 '00b1d7d0d6fdb25050041f6e2ae2871e\t1',
 '00b23e5cbc6f652b55d0754ed5ce300f\t1',
 '00d314de185e2e2425590e18e88c969f\t0',
 '00d9d606720bf95d08546c4bcb19ba71\t0',
 '00f3de3001b7f177384897278f905f83\t1',
 '0113ca85c57aeafd828acc1e765c058b\t1',
 '011ffa717faf73648cd2bc746d2ef9d7\t1',
 '012d337e20f5427f550163b486f7ca00\t1',
 '0148a8055c5987082bf1fd1d816943ac\t0',
 '0166018d693bb79c1e6203d4a048b003\t1',
 '016d86a309e33615738e0af7abdc7dc8\t1',
 '017e03b789bef812d30b2e5783bbfdf2\t1',
 '018205a4a74a654927ff80ca4ba892d7\t1',
 '0197e87b1967402d4f331e656d5aa04d\t1',
 '01a14dbda7060f8b654f4a81566ecb0e\t1',
 '01a488d98948d02d376ea8848f92036f\t0',
 '01a4e8ff8bd45861e6da907f97b6f024\t0',
 '01bde3f1091f5982c0c15da9cab2ccf7\t1',
 '01d68545cdd3ecfe9fc4d23f507245a5\t0',
 '01edf1b74fb4c52ed07e2eb6f0f9ddcc\t0',
 '02047926236d84ebc92c66d66cf29a89\t0',


In [741]:
id_test2[37]

'034f9dc288f38c4d1b8b843395eb860a'

In [700]:
paths2[37]

'test\\034f9dc288f38c4d1b8b843395eb860a.wav'

In [740]:
paths2[37][5:37]

'034f9dc288f38c4d1b8b843395eb860a'

In [798]:
ans_list4 = pd.DataFrame(
    { "id":map(str,id_test2),
    "ans":map(str,y_anslog),
    } )

In [730]:
ans_list5 = pd.DataFrame(ans_list_try)

In [799]:
ans_list4.to_csv("abyrvalg.tsv", header=False, index=False,  sep = "\t")

In [735]:
ans_list5.to_csv("abyrvalg2.txt", header=False, index=False)

In [746]:
len(id_test2[0])

32

In [749]:
len(id_test2)

3413