In [170]:
import scipy as sc
import scipy.signal
import scipy.io.wavfile
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.decomposition import PCA
from keras.models import Sequential, Model
from keras.layers import *
from keras.initializers import glorot_uniform
from keras.optimizers import Adam
import keras.backend as K
import tensorflow as tf
import skimage as ski
import skimage.color
import skimage.io
import random
import os
import pickle

seed=42

%matplotlib inline

Вкратце принцип такой:
- в части train была предобработка аудио и обучение CNN и PCA
- в начале этой части будет загрузка моделей и считывание мета файлов для работы с предобработанными аудио
- далее из CNN берутся мета-фичи и спектр сжимается по PCA
- эти два признака конкатенируются и подаются на полносвязную сеть
- далее делается предсказание и сохранается результат

In [2]:
df_meta = pd.read_csv('data/meta/meta.txt', header=None)
df_meta.columns = ['filename', 'type', 'idk', 'duration', 'label']

In [3]:
class_num2str = pd.Series(df_meta.label.unique())
class_str2num = pd.Series(range(0,len(class_num2str)))
class_str2num.index = class_num2str.values

df_meta.label.value_counts()

door             3416
tool             1659
knocking_door    1656
bags             1236
keyboard         1225
background       1126
ring              713
speech            276
Name: label, dtype: int64

### ---------------------------------------------------------------------------------------------------

In [4]:
df_proc_meta = pd.read_csv('proc_meta.csv')
df_proc_meta['group'] = df_proc_meta.filename.apply(lambda x: str(x).split('.')[0])

In [5]:
np.random.seed(seed)
val_file = df_meta.loc[np.sort(np.random.choice(range(len(df_meta)), size=3000))]
val_file.label.value_counts()

door             882
tool             471
knocking_door    444
keyboard         338
bags             308
background       302
ring             188
speech            67
Name: label, dtype: int64

In [6]:
BATCH_SIZE = 48
seed=42

input_layer = Input(shape=(300,257))

conv1 = Conv1D(64, 5, padding='same', strides=2, kernel_initializer=glorot_uniform(seed=seed))(input_layer)
relu_1 = Activation('relu')(conv1)
max_pool1 = MaxPooling1D(pool_size=2)(relu_1)

conv2 = Conv1D(64, 5, padding='same', strides=1, kernel_initializer=glorot_uniform(seed=seed+1))(max_pool1)
relu_2 = Activation('relu')(conv2)
max_pool2 = MaxPooling1D(pool_size=2)(relu_2)

conv3 = Conv1D(128, 5, padding='same', strides=1, kernel_initializer=glorot_uniform(seed=seed+2))(max_pool2)
relu_3 = Activation('relu')(conv3)
max_pool3 = MaxPooling1D(pool_size=2)(relu_3)

conv4 = Conv1D(128, 3, padding='same', strides=1, kernel_initializer=glorot_uniform(seed=seed+3))(max_pool3)
relu_4 = Activation('relu')(conv4)
max_pool4 = MaxPooling1D(pool_size=2)(relu_4)

conv5 = Conv1D(256, 3, padding='same', strides=1, kernel_initializer=glorot_uniform(seed=seed+4))(max_pool4)
relu_5 = Activation('relu')(conv5)
max_pool5 = MaxPooling1D(pool_size=2)(relu_5)

flatten = Flatten()(max_pool5)

dense_0 = Dense(1024, activation='sigmoid', kernel_initializer=glorot_uniform(seed=seed+5))(flatten)
dense_out = Dense(8, activation='softmax', kernel_initializer=glorot_uniform(seed=seed+6))(dense_0)

model = Model(inputs=[input_layer], outputs=[dense_out])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 300, 257)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 150, 64)           82304     
_________________________________________________________________
activation_1 (Activation)    (None, 150, 64)           0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 75, 64)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 75, 64)            20544     
_________________________________________________________________
activation_2 (Activation)    (None, 75, 64)            0         
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 37, 64)            0         
__________

In [7]:
model.load_weights('2to5_1to2epochs.hdf5')

#### --- TEST ---

In [8]:
test_path='./data/test'

In [9]:
test_labels = []
test_files = []
for file in os.listdir(test_path):
    if file.find('unknown') == -1:
        test_files.append(file)
    if file.find('background') == 0:
        test_labels.append(0)
    if file.find('bags') == 0:
        test_labels.append(1)
    if file.find('door') == 0:
        test_labels.append(2)
    if file.find('keyboard') == 0:
        test_labels.append(3)
    if file.find('knocking_door') == 0:
        test_labels.append(4)
    if file.find('ring') == 0:
        test_labels.append(5)
    if file.find('speech') == 0:
        test_labels.append(6)
    if file.find('tool') == 0:
        test_labels.append(7)

In [10]:
temp = dict()
temp['filename'] = test_files[1:]
temp['label'] = test_labels
df_test_meta = pd.DataFrame.from_dict(temp)

In [11]:
df_proc_test_meta = pd.read_csv('proc_test_meta.csv')
df_proc_test_meta['group'] = df_proc_test_meta.filename.apply(lambda x: str(x).split('.')[0])

In [12]:
path = './data/processed_test/'
predictions = []
files_queue = []
p = np.zeros((1,300,257))
for group in tqdm_notebook(df_proc_test_meta.group.unique()):
    file_groups = df_proc_test_meta[df_proc_test_meta.group == group].filename
    n = len(file_groups)
    pred_file = np.zeros((n,8))
    for i,file in enumerate(file_groups):
        files_queue.append(file+'.png')
        img = plt.imread(path+file+'.png')[:,:,0]
        img = np.rot90(img)
        p[0] = img
        pred_file[i] = model.predict(p)
    predictions.append(pred_file)

Widget Javascript not detected.  It may not be installed or enabled properly.





In [16]:
features = np.zeros(shape=(len(df_meta), 8000))

meaning = sc.io.wavfile.read(PATH+df_meta.filename[0]).

PATH = 'data/audio/'
labels = []
files = []
for i,file in enumerate(tqdm_notebook(df_meta.filename)):
    sr,wave_data = sc.io.wavfile.read(PATH+file)
    spectr = np.abs(np.fft.rfft(wave_data, n=sr))+10
    spectr = spectr[1:]/np.max(spectr[1:])
    labels.append(df_meta.iloc[i].label)
    files.append(df_meta.iloc[i].filename)
    if sr != 16000:
        features[i] = spectr[:8000]
    else:
        features[i] = spectr
    
PATH = 'data/audio/'
data_dict = dict()
for i in tqdm_notebook(range(features.shape[1])):
    data_dict[i] = features[:,i]
    
data_dict['label'] = labels
data_dict['filename'] = files

df_features = pd.DataFrame.from_dict(data_dict)
df_features.label = df_features.label.map(class_str2num)

Widget Javascript not detected.  It may not be installed or enabled properly.





Widget Javascript not detected.  It may not be installed or enabled properly.





In [274]:
with open('pca_params.pkl', 'rb') as handle:
    pca = pickle.load(handle)

In [275]:
train_features_transf = pca.transform(df_features.drop(labels=['label', 'filename'], axis=1))
train_features_transf = (train_features_transf - train_features_transf.min())/(train_features_transf - train_features_transf.min()).max()

In [18]:
intermediate_layer_model = Model(inputs=model.layers[0].input,
                                 outputs=model.layers[-2].output)

In [19]:
path = './data/processed_audio/'
train_meta_features = []
files_queue = []
p = np.zeros((1,300,257))
for group in tqdm_notebook(df_proc_meta.group.unique()):
    file_groups = df_proc_meta[df_proc_meta.group == group].filename
    n = len(file_groups)
    pred_file = np.zeros((n,1024))
    for i,file in enumerate(file_groups):
        files_queue.append(file+'.png')
        img = plt.imread(path+file+'.png')[:,:,0]
        img = np.rot90(img)
        p[0] = img
        pred_file[i] = intermediate_layer_model.predict(p)
    train_meta_features.append(pred_file)

Widget Javascript not detected.  It may not be installed or enabled properly.





In [25]:
train_mean_meta_features = np.zeros((len(train_meta_features), 1024))
for i in range(len(train_meta_features)):
    train_mean_meta_features[i] = train_meta_features[i].mean(axis=0)

In [35]:
path = './data/processed_test/'
test_meta_features = []
files_queue = []
p = np.zeros((1,300,257))
for group in tqdm_notebook(df_proc_test_meta.group.unique()):
    file_groups = df_proc_test_meta[df_proc_test_meta.group == group].filename
    n = len(file_groups)
    pred_file = np.zeros((n,1024))
    for i,file in enumerate(file_groups):
        files_queue.append(file+'.png')
        img = plt.imread(path+file+'.png')[:,:,0]
        img = np.rot90(img)
        p[0] = img
        pred_file[i] = intermediate_layer_model.predict(p)
    test_meta_features.append(pred_file)

Widget Javascript not detected.  It may not be installed or enabled properly.





In [276]:
X = np.hstack((train_features_transf, train_mean_meta_features))
y = pd.get_dummies(df_features.label)

In [277]:
train_X, test_X, train_Y, test_Y = train_test_split(X, y, test_size=0.2, random_state=seed)

In [278]:
test_mean_meta_features = np.zeros((len(test_meta_features), 1024))
for i in range(len(test_meta_features)):
    test_mean_meta_features[i] = test_meta_features[i].mean(axis=0)

In [37]:
features = np.zeros(shape=(len(df_test_meta), 8000))

PATH = 'data/test/'
labels = []
files = []
for i,file in enumerate(tqdm_notebook(df_test_meta.filename)):
    sr,wave_data = sc.io.wavfile.read(PATH+file)
    spectr = np.abs(np.fft.rfft(wave_data, n=sr))+10
    spectr = spectr[1:]/np.max(spectr[1:])
    labels.append(df_test_meta.iloc[i].label)
    files.append(df_test_meta.iloc[i].filename)
    if sr != 16000:
        features[i] = spectr[:8000]
    else:
        features[i] = spectr
    
PATH = 'data/test/'
data_dict = dict()
for i in tqdm_notebook(range(features.shape[1])):
    data_dict[i] = features[:,i]
    
data_dict['label'] = labels
data_dict['filename'] = files

df_test_features = pd.DataFrame.from_dict(data_dict)
df_test_features.label = df_features.label.map(class_str2num)

Widget Javascript not detected.  It may not be installed or enabled properly.





Widget Javascript not detected.  It may not be installed or enabled properly.





In [280]:
test_features_transf = pca.transform(df_test_features.drop(labels=['label', 'filename'], axis=1))
test_features_transf = (test_features_transf - test_features_transf.min())/(test_features_transf - test_features_transf.min()).max()

In [282]:
X_TEST = np.hstack((test_features_transf, test_mean_meta_features))
Y_TEST = pd.get_dummies(df_test_features.label)

In [262]:
# xgb_clf = xgb.XGBClassifier(max_depth=4, n_estimators=20, objective='multi:softmax', n_jobs=4, verbose=10, booster='gblinear')
# train_X, test_X, train_Y, test_Y = train_test_split(X, df_features.label, test_size=0.2, random_state=seed)

In [263]:
# xgb_clf.fit(train_X, train_Y.values, eval_set=[(test_X, test_Y)], eval_metric='mlogloss', verbose=10)

[0]	validation_0-mlogloss:0.369565
[10]	validation_0-mlogloss:0.023428
[19]	validation_0-mlogloss:0.021325


XGBClassifier(base_score=0.5, booster='gblinear', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=4, min_child_weight=1, missing=None, n_estimators=20,
       n_jobs=4, nthread=None, objective='multi:softprob', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1, verbose=10)

In [284]:
class_weight = {0:1, 1:1, 2:1, 3:1, 4:1, 5:1, 6:1, 7:2}

In [283]:
perceptron = Sequential()
perceptron.add(Dense(1024, input_dim=(2064), kernel_initializer=glorot_uniform(seed=seed+6)))
perceptron.add(Activation('sigmoid'))
perceptron.add(Dense(512, kernel_initializer=glorot_uniform(seed=seed+7)))
perceptron.add(Activation('sigmoid'))
perceptron.add(Dense(8, kernel_initializer=glorot_uniform(seed=seed+8)))
perceptron.add(Activation('softmax'))
perceptron.compile(loss='categorical_crossentropy',              
              optimizer=Adam(lr=0.0001),              
              metrics=['categorical_accuracy'])
hist = perceptron.fit(train_X, train_Y, validation_data=(test_X, test_Y), shuffle=True, batch_size=48, 
                      epochs=10, verbose=2, class_weight=class_weight)

Train on 9045 samples, validate on 2262 samples
Epoch 1/10
 - 9s - loss: 0.9655 - categorical_accuracy: 0.8051 - val_loss: 0.2769 - val_categorical_accuracy: 0.9408
Epoch 2/10
 - 4s - loss: 0.1781 - categorical_accuracy: 0.9776 - val_loss: 0.1118 - val_categorical_accuracy: 0.9766
Epoch 3/10
 - 5s - loss: 0.0923 - categorical_accuracy: 0.9852 - val_loss: 0.0769 - val_categorical_accuracy: 0.9828
Epoch 4/10
 - 5s - loss: 0.0661 - categorical_accuracy: 0.9883 - val_loss: 0.0591 - val_categorical_accuracy: 0.9850
Epoch 5/10
 - 5s - loss: 0.0541 - categorical_accuracy: 0.9889 - val_loss: 0.0554 - val_categorical_accuracy: 0.9854
Epoch 6/10
 - 4s - loss: 0.0460 - categorical_accuracy: 0.9904 - val_loss: 0.0432 - val_categorical_accuracy: 0.9907
Epoch 7/10
 - 4s - loss: 0.0421 - categorical_accuracy: 0.9910 - val_loss: 0.0445 - val_categorical_accuracy: 0.9876
Epoch 8/10
 - 4s - loss: 0.0377 - categorical_accuracy: 0.9909 - val_loss: 0.0411 - val_categorical_accuracy: 0.9881
Epoch 9/10
 - 4s

In [306]:
pred = perceptron.predict(X_TEST)
pred.argmax(axis=1)

array([0, 0, 0, 0, 0, 5, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 5, 5, 0, 0, 0, 5, 5, 5, 6, 0, 0, 0, 6, 7, 0, 2, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 3, 1, 2, 1, 6, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2,
       2, 2, 2, 2, 2, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 6, 2, 6,
       2, 1, 2, 2, 2, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 1, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4,
       6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 6, 4, 4, 4, 6, 4, 4, 4, 4, 4,
       4, 4, 5, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 4,
       4, 4, 4, 4, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5,
       5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,

In [307]:
f1_score(df_test_meta.label.values, pred.argmax(axis=1), average=None)

array([0.78947368, 0.91089109, 0.82608696, 0.94736842, 0.89285714,
       0.89932886, 0.9209622 , 0.8       ])

In [308]:
f1_score(df_test_meta.label.values, pred.argmax(axis=1), average='macro')

0.8733710440155085

In [310]:
df_pred_test = pd.DataFrame()
df_pred_test['filename'] = df_test_meta.filename
df_pred_test['confidence'] = pred.max(axis=1)
df_pred_test['label'] = pred.argmax(axis=1)

In [291]:
test_path = 'data/test/'
unknown_files = []
for file in os.listdir(test_path):
    if file.find('unknown') == 0:
        unknown_files.append(file)

In [292]:
df_unknown_meta = pd.DataFrame(unknown_files, columns=['filename'])
df_unknown_meta['label'] = 0

In [254]:
# UNCOMMENT THIS IF yYOU DONT DOWNLOAD proc_test_meta.csv

# fn, lb, cnt = cropping_all(df_unknown_meta, read_path='data/test/', save_path='data/processed_unknown/')

# tmp = dict()
# tmp['filename'] = fn
# tmp['label'] = lb

# df_proc_unknown_meta = pd.DataFrame.from_dict(tmp)
# df_proc_unknown_meta.to_csv('proc_unknown_meta.csv', encoding='UTF-8', index=False)




In [293]:
df_proc_unknown_meta = pd.read_csv('proc_unknown_meta.csv')
df_proc_unknown_meta['group'] = df_proc_unknown_meta.filename.apply(lambda x: str(x).split('.')[0])

In [294]:
path = './data/processed_unknown/'
unknown_meta_features = []
files_queue = []
p = np.zeros((1,300,257))
for group in tqdm_notebook(df_proc_unknown_meta.group.unique()):
    file_groups = df_proc_unknown_meta[df_proc_unknown_meta.group == group].filename
    n = len(file_groups)
    pred_file = np.zeros((n,1024))
    for i,file in enumerate(file_groups):
        files_queue.append(file+'.png')
        img = plt.imread(path+file+'.png')[:,:,0]
        img = np.rot90(img)
        p[0] = img
        pred_file[i] = intermediate_layer_model.predict(p)
    unknown_meta_features.append(pred_file)

Widget Javascript not detected.  It may not be installed or enabled properly.





In [295]:
unknown_mean_meta_features = np.zeros((len(unknown_meta_features), 1024))
for i in range(len(unknown_meta_features)):
    unknown_mean_meta_features[i] = unknown_meta_features[i].mean(axis=0)

In [296]:
features = np.zeros(shape=(len(df_unknown_meta), 8000))

PATH = 'data/test/'
labels = []
files = []
for i,file in enumerate(tqdm_notebook(df_unknown_meta.filename)):
    sr,wave_data = sc.io.wavfile.read(PATH+file)
    spectr = np.abs(np.fft.rfft(wave_data, n=sr))+10
    spectr = spectr[1:]/np.max(spectr[1:])
    labels.append(df_unknown_meta.iloc[i].label)
    files.append(df_unknown_meta.iloc[i].filename)
    if sr != 16000:
        features[i] = spectr[:8000]
    else:
        features[i] = spectr
    
PATH = 'data/test/'
data_dict = dict()
for i in tqdm_notebook(range(features.shape[1])):
    data_dict[i] = features[:,i]
    
data_dict['label'] = labels
data_dict['filename'] = files

df_unknown_features = pd.DataFrame.from_dict(data_dict)
df_unknown_features.label = df_unknown_features.label.map(class_str2num)

Widget Javascript not detected.  It may not be installed or enabled properly.





Widget Javascript not detected.  It may not be installed or enabled properly.





In [297]:
unknown_features_transf = pca.transform(df_unknown_features.drop(labels=['filename', 'label'], axis=1))

In [298]:
unknown_features_transf = (unknown_features_transf - unknown_features_transf.min())/(unknown_features_transf - unknown_features_transf.min()).max()

In [299]:
X_unknown = np.hstack((unknown_features_transf, unknown_mean_meta_features))

In [312]:
pred = perceptron.predict(X_unknown)

In [313]:
class_str2num

background       0
bags             1
door             2
keyboard         3
knocking_door    4
ring             5
speech           6
tool             7
dtype: int32

In [314]:
df_pred_unknown = pd.DataFrame()
df_pred_unknown['filename'] = df_unknown_meta.filename
df_pred_unknown['confidence'] = pred.max(axis=1)
df_pred_unknown['label'] = pred.argmax(axis=1)
df_pred_unknown.label = df_pred_unknown.label.map(class_num2str)

In [315]:
df_pred = pd.concat((df_pred_test, df_pred_unknown))
df_pred.to_csv('result.txt', sep='\t', encoding='UTF-8', index=False, header=False)