In [13]:
import os
import pandas as pd
import numpy as np

import sklearn
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score, f1_score
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler

import librosa
import librosa.display

from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt

%matplotlib inline
np.random.seed(7)

In [14]:
base_dir = '../data/'
train_dir = base_dir+'train/'
val_dir = base_dir+'val/'
test_dir = base_dir+'test/'

In [15]:
def load_wavs(filenames):
    return np.asarray([librosa.load(filename)[0] for filename in tqdm(filenames)])

### If you have lack of memory, Use this
#     wav = librosa.load(filenames[0])
#     wavs = np.zeros( (len(filenames), wav.shape[0]) )
#     for i, filename in enumerate(filenames):
#         wavs[i][:] = librosa.load(filename)[:]
#     return wavs
    
def find_y_by_filename(filename, y_dict):
    basename = os.path.basename(filename)
    y = y_dict[basename]
    return y

def make_y_by_filenames(filenames, y_dict):
    return np.asarray([find_y_by_filename(filename, y_dict) 
                           for filename in filenames])

def make_xy_data(filenames, y_dict):
    x_train = load_wavs(filenames)
    y_train = make_y_by_filenames(filenames, y_dict)
    return x_train, y_train

In [17]:
### Make Y data
annotations_filename = '1200_data_annotations.csv'
df = pd.read_csv(annotations_filename)
y_dict = {filename:int(label) for _, filename, label, _ in df.itertuples()}
# y_dict

In [18]:
print('Make train data.......')
x_train_wav_filenames = [train_dir+filename for filename in os.listdir(train_dir)
                            if filename.endswith('.wav')]
x_val_wav_filenames = [val_dir+filename for filename in os.listdir(val_dir)
                            if filename.endswith('.wav')]
x_train_wav_filenames += x_val_wav_filenames
x_train_wavs, y_train = make_xy_data(x_train_wav_filenames, y_dict)

print(x_train_wavs.shape, y_train.shape)

Make train data.......


HBox(children=(IntProgress(value=0, max=1856), HTML(value='')))

KeyboardInterrupt: 

In [None]:
print('Make test data.......')
x_test_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
x_test_wavs, y_test = make_xy_data(x_test_wav_filenames, y_dict)

In [11]:
def preprocess(wav):
    wav = sklearn.preprocessing.maxabs_scale(wav)
    wav_mfcc = librosa.feature.mfcc(y=wav, n_mfcc=13)
    wav_mfcc_std = StandardScaler().fit_transform(wav_mfcc)
    wav_mfcc_std_mean = wav_mfcc_std.mean(axis=1)

    features = np.concatenate([wav_mfcc_std_mean])
    return features

def train(x_train_wavs, y_train):
    x_train = np.apply_along_axis(preprocess, 1, x_train_wavs)

    logreg = linear_model.LogisticRegression(penalty='l2', C=0.5)
    logreg.fit(x_train, y_train)
    print('Logistic Regression Score:', logreg.score(x_train, y_train))
    joblib.dump(logreg, 'logreg_chainsaw_mfcc_13.pkl')
    
    linear_svc = LinearSVC()
    linear_svc.fit(x_train, y_train)
    print('Linear SVM Score:', linear_svc.score(x_train, y_train))
    joblib.dump(linear_svc, 'linear_svc_chainsaw.pkl') 

    kernel_svc = SVC()
    kernel_svc.fit(x_train, y_train)
    print('Kernel SVM Score:', kernel_svc.score(x_train, y_train))
    joblib.dump(kernel_svc, 'kernel_svc_chainsaw.pkl')

    print()

def test(x_test_wavs, y_test):
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)

    clf = joblib.load('logreg_chainsaw.pkl')
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Logistic Regression Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Logistic Regression F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    
    clf = joblib.load('linear_svc_chainsaw.pkl')
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Linear SVM Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Linear SVM F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    
    clf = joblib.load('kernel_svc_chainsaw.pkl')
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Kernel SVM Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Kernel SVM F1 Score:', f1_score(y_test, y_test_estimated))
    print()

In [12]:
train(x_train_wavs, y_train)
test(x_test_wavs, y_test)



Logistic Regression Score: 0.9308817013735047
Linear SVM Score: 0.9308817013735047




Kernel SVM Score: 0.9481612760301285



FileNotFoundError: [Errno 2] No such file or directory: 'logreg_chainsaw.pkl'

In [225]:
base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'wind/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)


base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'rain/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)

base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'engine/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))

66.6 µs ± 20.5 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.85


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

The slowest run took 4.31 times longer than the fastest. This could mean that an intermediate result is being cached.
130 µs ± 63.3 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Linear SVM Accuracy: 0.85


  'recall', 'true', average, warn_for)


Linear SVM F1 Score: 0.0

682 µs ± 32.7 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.85


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))

52.1 µs ± 4.82 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.475


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

78.8 µs ± 30.3 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Linear SVM Accuracy: 0.5


  'recall', 'true', average, warn_for)


Linear SVM F1 Score: 0.0

680 µs ± 138 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.475


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))

68.9 µs ± 15.9 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.675


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

183 µs ± 57.3 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Linear SVM Accuracy: 0.65


  'recall', 'true', average, warn_for)


Linear SVM F1 Score: 0.0

838 µs ± 121 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.65


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0



In [9]:
def preprocess(wav):
    wav = sklearn.preprocessing.maxabs_scale(wav)
    wav_mfcc = librosa.feature.mfcc(y=wav, n_mfcc=13)
#     wav_mfcc_std = StandardScaler().fit_transform(wav_mfcc)
    wav_mfcc_std_mean = wav_mfcc.mean(axis=1)

    S = librosa.feature.melspectrogram(wav, n_mels=128)
    log_S = librosa.amplitude_to_db(S) 
#     log_S_std = StandardScaler().fit_transform(log_S)
    log_S_std_mean = log_S.mean(axis=1)
    
    features = np.concatenate([wav_mfcc_std_mean, log_S_std_mean])
    return features

def train(x_train_wavs, y_train):
    x_train = np.apply_along_axis(preprocess, 1, x_train_wavs)

    logreg = linear_model.LogisticRegression(C=0.5)
    logreg.fit(x_train, y_train)
    print('Logistic Regression Score:', logreg.score(x_train, y_train))
    joblib.dump(logreg, 'logreg_chainsaw_mfcc_logmel_C0.5.pkl')
    
    linear_svc = LinearSVC()
    linear_svc.fit(x_train, y_train)
    print('Linear SVM Score:', linear_svc.score(x_train, y_train))
    joblib.dump(linear_svc, 'linear_svc_chainsaw.pkl') 

    kernel_svc = SVC()
    kernel_svc.fit(x_train, y_train)
    print('Kernel SVM Score:', kernel_svc.score(x_train, y_train))
    joblib.dump(kernel_svc, 'kernel_svc_chainsaw.pkl')

    print()

def test(x_test_wavs, y_test):
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)

    clf = joblib.load('logreg_chainsaw.pkl')
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Logistic Regression Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Logistic Regression F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    
    clf = joblib.load('linear_svc_chainsaw.pkl')
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Linear SVM Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Linear SVM F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    
    clf = joblib.load('kernel_svc_chainsaw.pkl')
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Kernel SVM Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Kernel SVM F1 Score:', f1_score(y_test, y_test_estimated))
    print()

In [10]:
train(x_train_wavs, y_train)
test(x_test_wavs, y_test)



Logistic Regression Score: 0.9712007089056269




Linear SVM Score: 0.9587948604342047




Kernel SVM Score: 1.0



FileNotFoundError: [Errno 2] No such file or directory: 'logreg_chainsaw.pkl'

In [229]:
base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'wind/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)


base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'rain/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)

base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'engine/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))

65.3 µs ± 14.7 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.9


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

75.1 µs ± 17.3 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Linear SVM Accuracy: 0.875


  'recall', 'true', average, warn_for)


Linear SVM F1 Score: 0.0

25.8 ms ± 521 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.65


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))

63.7 µs ± 8.03 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.7


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

The slowest run took 7.73 times longer than the fastest. This could mean that an intermediate result is being cached.
155 µs ± 147 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Linear SVM Accuracy: 0.55


  'recall', 'true', average, warn_for)


Linear SVM F1 Score: 0.0

25.7 ms ± 3.22 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.65


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))

63.7 µs ± 6.27 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.95


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

76.4 µs ± 12.5 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Linear SVM Accuracy: 0.975


  'recall', 'true', average, warn_for)


Linear SVM F1 Score: 0.0

27.4 ms ± 4.04 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.65


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0



In [231]:
preprocess(x_temp_wavs[0]).shape

(141,)