In [1]:
import os
import sys
from os.path import isdir, join
from pathlib import Path
import pandas as pd
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999

# Math
import numpy as np
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
import librosa

from sklearn.decomposition import PCA

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
import librosa.display

import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import pandas as pd
import gc

module_path = os.path.abspath(os.path.join('..'))
sys.path.append(module_path)

from utils.transform_util import label_transform, pad_audio, chop_audio
from conf.configure import Configure
import pickle

%matplotlib inline

In [2]:
train_audio_path = '../input/train/audio/'

In [3]:
gc.collect()

11

In [4]:
dirs = [f for f in os.listdir(train_audio_path) if isdir(join(train_audio_path, f))]
dirs.sort()
print('Number of labels: ' + str(len(dirs)))
print(dirs)

# dirs.pop(0)
# print(dirs)
to_keep = 'yes no up down left right on off stop go'.split()
dirs = [d for d in dirs if d in to_keep]
print(dirs)

Number of labels: 31
['_background_noise_', 'bed', 'bird', 'cat', 'dog', 'down', 'eight', 'five', 'four', 'go', 'happy', 'house', 'left', 'marvin', 'nine', 'no', 'off', 'on', 'one', 'right', 'seven', 'sheila', 'six', 'stop', 'three', 'tree', 'two', 'up', 'wow', 'yes', 'zero']
['down', 'go', 'left', 'no', 'off', 'on', 'right', 'stop', 'up', 'yes']


In [5]:
legal_labels = 'yes no up down left right on off stop go silence unknown'.split()

In [6]:
def custom_fft(y, fs):
    T = 1.0 / fs
    N = y.shape[0]
    yf = fft(y)
    xf = np.linspace(0.0, 1.0/(2.0*T), N//2)
    vals = 2.0/N * np.abs(yf[0:N//2])  # FFT is simmetrical, so we take just the first half
    # FFT is also complex, to we take just the real part (abs)
    return xf, vals

In [7]:
# fft_all = []
# names = []
# for direct in dirs:
#     waves = [f for f in os.listdir(join(train_audio_path, direct)) if f.endswith('.wav')]
#     for wav in waves:
#         sample_rate, samples = wavfile.read(train_audio_path + direct + '/' + wav)
#         if samples.shape[0] != sample_rate:
#             samples = np.append(samples, np.zeros((sample_rate - samples.shape[0], )))
#         x, val = custom_fft(samples, sample_rate)
#         fft_all.append(val)
#         names.append(direct + '/' + wav)

# fft_all = np.array(fft_all)

# # Normalization
# fft_all = (fft_all - np.mean(fft_all, axis=0)) / np.std(fft_all, axis=0)

# # Dim reduction
# pca = PCA(n_components=3)
# fft_all = pca.fit_transform(fft_all)
# # fft_all = (fft_all - np.mean(fft_all, axis=0)) / np.std(fft_all, axis=0)

# def interactive_3d_plot(data, names):
#     scatt = go.Scatter3d(x=data[:, 0], y=data[:, 1], z=data[:, 2], mode='markers', text=names)
#     data = go.Data([scatt])
#     layout = go.Layout(title="Anomaly detection")
#     figure = go.Figure(data=data, layout=layout)
#     py.iplot(figure)
    
# interactive_3d_plot(fft_all, names)

In [8]:
for direct in dirs:
    waves = [f for f in os.listdir(join(train_audio_path, direct)) if f.endswith('.wav')]
    print(direct, ' len: ', len(waves))

down  len:  2359
go  len:  2372
left  len:  2353
no  len:  2375
off  len:  2357
on  len:  2367
right  len:  2367
stop  len:  2380
up  len:  2375
yes  len:  2377


In [9]:
outlierList = []
for direct in dirs:
    fft_all = []
    names = []
    waves = [f for f in os.listdir(join(train_audio_path, direct)) if f.endswith('.wav')]
    for wav in waves:
        sample_rate, samples = wavfile.read(train_audio_path + direct + '/' + wav)
#         if samples.shape[0] != sample_rate:
#             samples = np.append(samples, np.zeros((sample_rate - samples.shape[0], )))
        samples = pad_audio(samples)
        x, val = custom_fft(samples, sample_rate)
        fft_all.append(val)
        names.append(direct + '/' + wav)
    fft_all = np.array(fft_all)
    fft_all = (fft_all - np.mean(fft_all, axis=0)) / np.std(fft_all, axis=0)

    pca = PCA(n_components=3)
    fft_all = pca.fit_transform(fft_all)
#     fft_all = (fft_all - np.mean(fft_all, axis=0)) / np.std(fft_all, axis=0)/np.sqrt(3)
    fft_all = pd.DataFrame(fft_all)
    
    threshold = ((fft_all**2).sum(axis = 1)).quantile(.95)*np.sqrt(fft_all.shape[0])
    for i in fft_all.index:
        if ((fft_all.loc[i])**2).sum() > threshold:
            print (names[i])
            outlierList.append(names[i])
print(len(outlierList))

down/f638a812_nohash_0.wav
down/a42a88ff_nohash_0.wav
down/f638a812_nohash_1.wav
go/d90b4138_nohash_4.wav
go/0487ba9b_nohash_0.wav
off/a42a88ff_nohash_0.wav
off/617de221_nohash_1.wav
on/a42a88ff_nohash_0.wav
on/617de221_nohash_0.wav
on/3ea77ede_nohash_0.wav
right/f638a812_nohash_0.wav
up/a42a88ff_nohash_0.wav
12


In [16]:
fname = outlierList[np.random.randint(len(outlierList))]
print(fname)
ipd.Audio(join(train_audio_path, fname))

go/d90b4138_nohash_4.wav


In [7]:
with open(Configure.outlierNameList, "wb") as f:
    pickle.dump(outlierList, f, -1)

In [31]:
print('Recording go/0487ba9b_nohash_0.wav')
ipd.Audio(join(train_audio_path, 'go/0487ba9b_nohash_0.wav'))

Recording go/0487ba9b_nohash_0.wav


In [32]:
print('Recording yes/e4b02540_nohash_0.wav')
ipd.Audio(join(train_audio_path, 'yes/e4b02540_nohash_0.wav'))

Recording yes/e4b02540_nohash_0.wav


In [33]:
print('Recording seven/e4b02540_nohash_0.wav')
ipd.Audio(join(train_audio_path, 'seven/b1114e4f_nohash_0.wav'))

Recording seven/e4b02540_nohash_0.wav
