In [35]:
import os
import pandas as pd
import numpy as np

import sklearn
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score, f1_score
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler

import librosa
import librosa.display

from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt

%matplotlib inline
np.random.seed(7)

In [36]:
base_dir = '../data/'
train_dir = base_dir+'train/'
val_dir = base_dir+'val/'
test_dir = base_dir+'test/'

In [37]:
def load_wavs(filenames):
    return np.asarray([librosa.load(filename)[0] for filename in tqdm(filenames)])

### If you have lack of memory, Use this
#     wav = librosa.load(filenames[0])
#     wavs = np.zeros( (len(filenames), wav.shape[0]) )
#     for i, filename in enumerate(filenames):
#         wavs[i][:] = librosa.load(filename)[:]
#     return wavs
    
def find_y_by_filename(filename, y_dict):
    basename = os.path.basename(filename)
    y = y_dict[basename]
    return y

def make_y_by_filenames(filenames, y_dict):
    return np.asarray([find_y_by_filename(filename, y_dict) 
                           for filename in filenames])

def make_xy_data(filenames, y_dict):
    x_train = load_wavs(filenames)
    y_train = make_y_by_filenames(filenames, y_dict)
    return x_train, y_train

In [38]:
### Make Y data
annotations_filename = '1200_data_annotations.csv'
df = pd.read_csv(annotations_filename)
y_dict = {filename:int(label) for _, filename, label, _ in df.itertuples()}
# y_dict

In [39]:
print('Make train data.......')
x_train_wav_filenames = [train_dir+filename for filename in os.listdir(train_dir)
                            if filename.endswith('.wav')]
x_val_wav_filenames = [val_dir+filename for filename in os.listdir(val_dir)
                            if filename.endswith('.wav')]
x_train_wav_filenames += x_val_wav_filenames
x_train_wavs, y_train = make_xy_data(x_train_wav_filenames, y_dict)

print(x_train_wavs.shape, y_train.shape)

Make train data.......


HBox(children=(IntProgress(value=0, max=1856), HTML(value='')))

(1856, 110250) (1856,)


In [40]:
print('Make test data.......')
x_test_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
x_test_wavs, y_test = make_xy_data(x_test_wav_filenames, y_dict)

Make test data.......


HBox(children=(IntProgress(value=0, max=884), HTML(value='')))

In [7]:
def preprocess(wav):
    wav = sklearn.preprocessing.maxabs_scale(wav)
    wav_mfcc = librosa.feature.mfcc(y=wav, n_mfcc=13)
    wav_mfcc_std = StandardScaler().fit_transform(wav_mfcc)
    wav_mfcc_std_mean = wav_mfcc_std.mean(axis=1)

    features = np.concatenate([wav_mfcc_std_mean])
    return features

def train(x_train_wavs, y_train):
    x_train = np.apply_along_axis(preprocess, 1, x_train_wavs)

    logreg = linear_model.LogisticRegression(penalty='l2', C=0.5)
    logreg.fit(x_train, y_train)
    print('Logistic Regression Score:', logreg.score(x_train, y_train))
    joblib.dump(logreg, '1200_logreg_chainsaw_mfcc_13.pkl')
    
    kernel_svc = SVC()
    kernel_svc.fit(x_train, y_train)
    print('Kernel SVM Score:', kernel_svc.score(x_train, y_train))
    joblib.dump(kernel_svc, '1200_kernel_svc_chainsaw_mfcc_13.pkl')

    print()

def test(x_test_wavs, y_test):
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)

    clf = joblib.load('1200_logreg_chainsaw_mfcc_13.pkl')
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Logistic Regression Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Logistic Regression F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    
    clf = joblib.load('1200_kernel_svc_chainsaw_mfcc_13.pkl')
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Kernel SVM Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Kernel SVM F1 Score:', f1_score(y_test, y_test_estimated))
    print()

In [8]:
train(x_train_wavs, y_train)
test(x_test_wavs, y_test)



Logistic Regression Score: 0.9116379310344828




Kernel SVM Score: 0.9423491379310345

The slowest run took 4.39 times longer than the fastest. This could mean that an intermediate result is being cached.
200 µs ± 120 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.9321266968325792
Logistic Regression F1 Score: 0.933920704845815

10.7 ms ± 242 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.9457013574660633
Kernel SVM F1 Score: 0.947939262472885



In [9]:
base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'wind/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)


base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'rain/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)

base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'engine/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))


The slowest run took 4.79 times longer than the fastest. This could mean that an intermediate result is being cached.
82.5 µs ± 45.4 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.925


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

536 µs ± 9.56 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.925


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))


45 µs ± 4.75 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.525


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

544 µs ± 17.7 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.525


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))


46.5 µs ± 6.21 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.675


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

557 µs ± 33.9 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.725


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0



In [10]:
def preprocess(wav):
    wav = sklearn.preprocessing.maxabs_scale(wav)
    wav_mfcc = librosa.feature.mfcc(y=wav, n_mfcc=13)
#     wav_mfcc_std = StandardScaler().fit_transform(wav_mfcc)
    wav_mfcc_std_mean = wav_mfcc.mean(axis=1)

    S = librosa.feature.melspectrogram(wav, n_mels=128)
    log_S = librosa.amplitude_to_db(S) 
#     log_S_std = StandardScaler().fit_transform(log_S)
    log_S_std_mean = log_S.mean(axis=1)
    
    features = np.concatenate([wav_mfcc_std_mean, log_S_std_mean])
    return features

def train(x_train_wavs, y_train):
    x_train = np.apply_along_axis(preprocess, 1, x_train_wavs)

    logreg = linear_model.LogisticRegression(C=0.5)
    logreg.fit(x_train, y_train)
    print('Logistic Regression Score:', logreg.score(x_train, y_train))
    joblib.dump(logreg, '1200_logreg_chainsaw_mfcc_logmel_C0.5.pkl')
    
    print()

def test(x_test_wavs, y_test):
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)

    clf = joblib.load('1200_logreg_chainsaw_mfcc_logmel_C0.5.pkl')
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Logistic Regression Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Logistic Regression F1 Score:', f1_score(y_test, y_test_estimated))
    print()

In [11]:
train(x_train_wavs, y_train)
test(x_test_wavs, y_test)



Logistic Regression Score: 0.9665948275862069

330 µs ± 69.4 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.9423076923076923
Logistic Regression F1 Score: 0.9437706725468578



In [12]:
base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'wind/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)


base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'rain/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)

base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'engine/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))


70.9 µs ± 31.4 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.95


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))


55.9 µs ± 14.1 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.7


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))


53.6 µs ± 6.98 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.95


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0



In [16]:
def preprocess(wav):
    wav = sklearn.preprocessing.maxabs_scale(wav)
    wav_mfcc = librosa.feature.mfcc(y=wav, n_mfcc=13)
    wav_mfcc_std = StandardScaler().fit_transform(wav_mfcc)
    wav_mfcc_std_mean = wav_mfcc_std.mean(axis=1)

    S = librosa.feature.melspectrogram(wav, n_mels=128)
    log_S = librosa.amplitude_to_db(S) 
    log_S_std = StandardScaler().fit_transform(log_S)
    log_S_std_mean = log_S_std.mean(axis=1)
    
    features = np.concatenate([wav_mfcc_std_mean, log_S_std_mean])
    return features

def train(x_train_wavs, y_train):
    x_train = np.apply_along_axis(preprocess, 1, x_train_wavs)

    logreg = linear_model.LogisticRegression(C=0.1)
    logreg.fit(x_train, y_train)
    print('Logistic Regression Score:', logreg.score(x_train, y_train))
    joblib.dump(logreg, '1200_logreg_chainsaw_mfcc_logmel_C0.1_std.pkl')
     
    kernel_svc = SVC()
    kernel_svc.fit(x_train, y_train)
    print('Kernel SVM Score:', kernel_svc.score(x_train, y_train))
    joblib.dump(kernel_svc, '1200_kernel_svc_chainsaw_mfcc_13_std.pkl')

    print()

def test(x_test_wavs, y_test):
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)

    clf = joblib.load('1200_logreg_chainsaw_mfcc_logmel_C0.1_std.pkl')
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Logistic Regression Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Logistic Regression F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    
    clf = joblib.load('1200_kernel_svc_chainsaw_mfcc_13_std.pkl')
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Kernel SVM Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Kernel SVM F1 Score:', f1_score(y_test, y_test_estimated))
    print()

In [17]:
train(x_train_wavs, y_train)
test(x_test_wavs, y_test)

331 µs ± 85.5 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.9615384615384616
Logistic Regression F1 Score: 0.9626373626373628

99.4 ms ± 7.55 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.9660633484162896
Kernel SVM F1 Score: 0.9672489082969432



In [18]:
base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'wind/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)


base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'rain/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)

base_dir = '../ESC-50-master/split_wav/'
test_dir = base_dir+'engine/'

print('Make test data.......')
x_temp_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))


71.8 µs ± 39.2 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.9


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

3.93 ms ± 148 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.9


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))


56.2 µs ± 12.1 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.65


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

5.39 ms ± 1.69 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.65


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0

Make test data.......


HBox(children=(IntProgress(value=0, max=40), HTML(value='')))


118 µs ± 19.8 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.95


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

4.18 ms ± 598 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.95


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0



In [33]:
presentation_audio_path = '../presentation_audio/'

print('Make test data.......')
x_temp_wav_filenames = [presentation_audio_path+filename for filename in os.listdir(presentation_audio_path)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)


x_test = np.apply_along_axis(preprocess, 1, x_temp_wavs)
clf = joblib.load('1200_logreg_chainsaw_mfcc_logmel_C0.1_std.pkl')
y_test_estimated = clf.predict(x_test)
result_idx = np.argwhere(y_test_estimated != y_temp)
print(result_idx)
result_names = [name for i, name in enumerate(x_temp_wav_filenames) if i in result_idx]
print(result_names)

Make test data.......


HBox(children=(IntProgress(value=0, max=76), HTML(value='')))

156 µs ± 80.1 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.8026315789473685


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

7.29 ms ± 231 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Kernel SVM Accuracy: 0.7894736842105263


  'recall', 'true', average, warn_for)


Kernel SVM F1 Score: 0.0

[[42]
 [43]
 [44]
 [45]
 [46]
 [47]
 [48]
 [49]
 [50]
 [51]
 [52]
 [53]
 [54]
 [56]
 [57]]
['../presentation_audio/전기톱 (1).wav', '../presentation_audio/전기톱 (10).wav', '../presentation_audio/전기톱 (11).wav', '../presentation_audio/전기톱 (12).wav', '../presentation_audio/전기톱 (13).wav', '../presentation_audio/전기톱 (14).wav', '../presentation_audio/전기톱 (15).wav', '../presentation_audio/전기톱 (16).wav', '../presentation_audio/전기톱 (2).wav', '../presentation_audio/전기톱 (3).wav', '../presentation_audio/전기톱 (4).wav', '../presentation_audio/전기톱 (5).wav', '../presentation_audio/전기톱 (6).wav', '../presentation_audio/전기톱 (8).wav', '../presentation_audio/전기톱 (9).wav']


In [41]:
def preprocess(wav):
    wav = sklearn.preprocessing.maxabs_scale(wav)
    wav_mfcc = librosa.feature.mfcc(y=wav, n_mfcc=13)
#     wav_mfcc_std = StandardScaler().fit_transform(wav_mfcc)
    wav_mfcc_std_mean = wav_mfcc.mean(axis=1)

    S = librosa.feature.melspectrogram(wav, n_mels=64)
    log_S = librosa.amplitude_to_db(S) 
#     log_S_std = StandardScaler().fit_transform(log_S)
    log_S_std_mean = log_S.mean(axis=1)
    
    features = np.concatenate([wav_mfcc_std_mean, log_S_std_mean])
    return features

def train(x_train_wavs, y_train):
    x_train = np.apply_along_axis(preprocess, 1, x_train_wavs)

    logreg = linear_model.LogisticRegression(C=0.5)
    logreg.fit(x_train, y_train)
    print('Logistic Regression Score:', logreg.score(x_train, y_train))
    joblib.dump(logreg, '1200_logreg_chainsaw_mfcc_logmel_C0.5_64.pkl')
     
def test(x_test_wavs, y_test):
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)

    clf = joblib.load('1200_logreg_chainsaw_mfcc_logmel_C0.5_64.pkl')
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Logistic Regression Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Logistic Regression F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    

In [42]:
train(x_train_wavs, y_train)
test(x_test_wavs, y_test)



Logistic Regression Score: 0.9595905172413793
The slowest run took 4.59 times longer than the fastest. This could mean that an intermediate result is being cached.
273 µs ± 206 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.9490950226244343
Logistic Regression F1 Score: 0.9502762430939227



In [44]:
presentation_audio_path = '../presentation_audio/'

print('Make test data.......')
x_temp_wav_filenames = [presentation_audio_path+filename for filename in os.listdir(presentation_audio_path)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)


x_test = np.apply_along_axis(preprocess, 1, x_temp_wavs)
clf = joblib.load('1200_logreg_chainsaw_mfcc_logmel_C0.5_64.pkl')
y_test_estimated = clf.predict(x_test)
result_idx = np.argwhere(y_test_estimated != y_temp)
print(result_idx)
result_names = [name for i, name in enumerate(x_temp_wav_filenames) if i in result_idx]
print(result_names)

Make test data.......


HBox(children=(IntProgress(value=0, max=75), HTML(value='')))

50.9 µs ± 6.45 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.7733333333333333


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

[[27]
 [42]
 [43]
 [44]
 [45]
 [46]
 [47]
 [48]
 [49]
 [50]
 [51]
 [52]
 [53]
 [54]
 [55]
 [56]
 [58]]
['../presentation_audio/비행기 (1).wav', '../presentation_audio/전기톱 (1).wav', '../presentation_audio/전기톱 (10).wav', '../presentation_audio/전기톱 (11).wav', '../presentation_audio/전기톱 (12).wav', '../presentation_audio/전기톱 (13).wav', '../presentation_audio/전기톱 (14).wav', '../presentation_audio/전기톱 (15).wav', '../presentation_audio/전기톱 (2).wav', '../presentation_audio/전기톱 (3).wav', '../presentation_audio/전기톱 (4).wav', '../presentation_audio/전기톱 (5).wav', '../presentation_audio/전기톱 (6).wav', '../presentation_audio/전기톱 (7).wav', '../presentation_audio/전기톱 (8).wav', '../presentation_audio/전기톱 (9).wav', '../presentation_audio/찌르레기 (2).wav']


In [45]:
def preprocess(wav):
    wav = sklearn.preprocessing.maxabs_scale(wav)
    wav_mfcc = librosa.feature.mfcc(y=wav, n_mfcc=13)
#     wav_mfcc_std = StandardScaler().fit_transform(wav_mfcc)
    wav_mfcc_std_mean = wav_mfcc.mean(axis=1)

    S = librosa.feature.melspectrogram(wav, n_mels=32)
    log_S = librosa.amplitude_to_db(S) 
#     log_S_std = StandardScaler().fit_transform(log_S)
    log_S_std_mean = log_S.mean(axis=1)
    
    features = np.concatenate([wav_mfcc_std_mean, log_S_std_mean])
    return features

def train(x_train_wavs, y_train):
    x_train = np.apply_along_axis(preprocess, 1, x_train_wavs)

    logreg = linear_model.LogisticRegression(C=0.5)
    logreg.fit(x_train, y_train)
    print('Logistic Regression Score:', logreg.score(x_train, y_train))
    joblib.dump(logreg, '1200_logreg_chainsaw_mfcc_logmel_C0.5_32.pkl')
     
def test(x_test_wavs, y_test):
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)

    clf = joblib.load('1200_logreg_chainsaw_mfcc_logmel_C0.5_32.pkl')
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Logistic Regression Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Logistic Regression F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    

In [46]:
train(x_train_wavs, y_train)
test(x_test_wavs, y_test)



Logistic Regression Score: 0.9558189655172413
348 µs ± 136 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.9581447963800905
Logistic Regression F1 Score: 0.9593852908891329



In [47]:
presentation_audio_path = '../presentation_audio/'

print('Make test data.......')
x_temp_wav_filenames = [presentation_audio_path+filename for filename in os.listdir(presentation_audio_path)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)


x_test = np.apply_along_axis(preprocess, 1, x_temp_wavs)
clf = joblib.load('1200_logreg_chainsaw_mfcc_logmel_C0.5_32.pkl')
y_test_estimated = clf.predict(x_test)
result_idx = np.argwhere(y_test_estimated != y_temp)
print(result_idx)
result_names = [name for i, name in enumerate(x_temp_wav_filenames) if i in result_idx]
print(result_names)

Make test data.......


HBox(children=(IntProgress(value=0, max=75), HTML(value='')))

61 µs ± 12.5 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.7866666666666666


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

[[27]
 [42]
 [43]
 [44]
 [45]
 [46]
 [47]
 [48]
 [49]
 [50]
 [51]
 [52]
 [53]
 [54]
 [55]
 [56]]
['../presentation_audio/비행기 (1).wav', '../presentation_audio/전기톱 (1).wav', '../presentation_audio/전기톱 (10).wav', '../presentation_audio/전기톱 (11).wav', '../presentation_audio/전기톱 (12).wav', '../presentation_audio/전기톱 (13).wav', '../presentation_audio/전기톱 (14).wav', '../presentation_audio/전기톱 (15).wav', '../presentation_audio/전기톱 (2).wav', '../presentation_audio/전기톱 (3).wav', '../presentation_audio/전기톱 (4).wav', '../presentation_audio/전기톱 (5).wav', '../presentation_audio/전기톱 (6).wav', '../presentation_audio/전기톱 (7).wav', '../presentation_audio/전기톱 (8).wav', '../presentation_audio/전기톱 (9).wav']


In [48]:
def preprocess(wav):
    wav = sklearn.preprocessing.maxabs_scale(wav)
    wav_mfcc = librosa.feature.mfcc(y=wav, n_mfcc=13)
#     wav_mfcc_std = StandardScaler().fit_transform(wav_mfcc)
    wav_mfcc_std_mean = wav_mfcc.mean(axis=1)

    S = librosa.feature.melspectrogram(wav, n_mels=16)
    log_S = librosa.amplitude_to_db(S) 
#     log_S_std = StandardScaler().fit_transform(log_S)
    log_S_std_mean = log_S.mean(axis=1)
    
    features = np.concatenate([wav_mfcc_std_mean, log_S_std_mean])
    return features

def train(x_train_wavs, y_train):
    x_train = np.apply_along_axis(preprocess, 1, x_train_wavs)

    logreg = linear_model.LogisticRegression(C=0.5)
    logreg.fit(x_train, y_train)
    print('Logistic Regression Score:', logreg.score(x_train, y_train))
    joblib.dump(logreg, '1200_logreg_chainsaw_mfcc_logmel_C0.5_16.pkl')
     
def test(x_test_wavs, y_test):
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)

    clf = joblib.load('1200_logreg_chainsaw_mfcc_logmel_C0.5_16.pkl')
    %timeit -n 10 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Logistic Regression Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Logistic Regression F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    

In [49]:
train(x_train_wavs, y_train)
test(x_test_wavs, y_test)



Logistic Regression Score: 0.9558189655172413
227 µs ± 69.3 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.9547511312217195
Logistic Regression F1 Score: 0.956140350877193



In [51]:
presentation_audio_path = '../presentation_audio/'

print('Make test data.......')
x_temp_wav_filenames = [presentation_audio_path+filename for filename in os.listdir(presentation_audio_path)
                            if filename.endswith('.wav')]
# x_temp_wavs, y_temp = make_xy_data(x_test_wav_filenames, y_dict)

x_temp_wavs = load_wavs(x_temp_wav_filenames)
y_temp = np.zeros(x_temp_wavs.shape[0])

test(x_temp_wavs, y_temp)


x_test = np.apply_along_axis(preprocess, 1, x_temp_wavs)
clf = joblib.load('1200_logreg_chainsaw_mfcc_logmel_C0.5_16.pkl')
y_test_estimated = clf.predict(x_test)
result_idx = np.argwhere(y_test_estimated != y_temp)
print(result_idx)
result_names = [name for i, name in enumerate(x_temp_wav_filenames) if i in result_idx]
print(result_names)

Make test data.......


HBox(children=(IntProgress(value=0, max=75), HTML(value='')))

55.5 µs ± 6.12 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)
Logistic Regression Accuracy: 0.7866666666666666


  'recall', 'true', average, warn_for)


Logistic Regression F1 Score: 0.0

[[27]
 [42]
 [43]
 [44]
 [45]
 [46]
 [47]
 [48]
 [49]
 [50]
 [51]
 [52]
 [53]
 [54]
 [55]
 [56]]
['../presentation_audio/비행기 (1).wav', '../presentation_audio/전기톱 (1).wav', '../presentation_audio/전기톱 (10).wav', '../presentation_audio/전기톱 (11).wav', '../presentation_audio/전기톱 (12).wav', '../presentation_audio/전기톱 (13).wav', '../presentation_audio/전기톱 (14).wav', '../presentation_audio/전기톱 (15).wav', '../presentation_audio/전기톱 (2).wav', '../presentation_audio/전기톱 (3).wav', '../presentation_audio/전기톱 (4).wav', '../presentation_audio/전기톱 (5).wav', '../presentation_audio/전기톱 (6).wav', '../presentation_audio/전기톱 (7).wav', '../presentation_audio/전기톱 (8).wav', '../presentation_audio/전기톱 (9).wav']
