In [1]:
import os
import librosa
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler
import librosa
import librosa.display

import scipy

from tqdm import tqdm
import matplotlib.pyplot as plt

% matplotlib inline
np.random.seed(7)

In [2]:
base_dir = '/data/private/SU/bbchip13/chainsaw_classification/data/'
chainsaw_wav_dir = base_dir+'chainsaw/'
other_wav_dir = base_dir+'no_chainsaw/'

In [3]:
chainsaw_list = [chainsaw_wav_dir+filename for filename in os.listdir(chainsaw_wav_dir) 
                 if filename.endswith('.wav')]
chainsaw_list = sklearn.utils.shuffle(chainsaw_list)
# chainsaw_list = chainsaw_list[:1200]
chainsaw_list = chainsaw_list
chainsaw_labels = np.ones(len(chainsaw_list))

no_chainsaw_list = [other_wav_dir+filename for filename in os.listdir(other_wav_dir)
                    if filename.endswith('.wav')]
no_chainsaw_list = sklearn.utils.shuffle(no_chainsaw_list)
# no_chainsaw_list = no_chainsaw_list[:1200]
no_chainsaw_list = no_chainsaw_list
no_chainsaw_labels = np.zeros(len(no_chainsaw_list))

In [4]:
def load_wavs(filenames):
    return np.asarray([librosa.load(filename)[0] for filename in tqdm(filenames)])

### If you have lack of memory, Use this
#     wav = librosa.load(filenames[0])
#     wavs = np.zeros( (len(filenames), wav.shape[0]) )
#     for i, filename in enumerate(filenames):
#         wavs[i][:] = librosa.load(filename)[:]
#     return wavs

In [5]:
x_train_chainsaw, x_test_chainsaw, y_train_chainsaw, y_test_chainsaw \
    = train_test_split(chainsaw_list, chainsaw_labels, test_size = 0.33, random_state = 7)
x_train_no_chainsaw, x_test_no_chainsaw, y_train_no_chainsaw, y_test_no_chainsaw \
    = train_test_split(no_chainsaw_list, no_chainsaw_labels, test_size = 0.33, random_state = 7)

x_train_filenames = x_train_chainsaw+x_train_no_chainsaw
y_train = np.concatenate([y_train_chainsaw, y_train_no_chainsaw])

x_test_filenames = x_test_chainsaw+x_test_no_chainsaw
y_test = np.concatenate([y_test_chainsaw, y_test_no_chainsaw])

x_train_filenames, y_train = sklearn.utils.shuffle(x_train_filenames, y_train)
x_test_filenames, y_test = sklearn.utils.shuffle(x_test_filenames, y_test)

x_train_wavs = load_wavs(x_train_filenames)
x_test_wavs = load_wavs(x_test_filenames)

100%|██████████| 2218/2218 [11:16<00:00,  3.26it/s]
100%|██████████| 1093/1093 [05:33<00:00,  3.26it/s]


In [6]:
def preprocess(wav):
    wav = sklearn.preprocessing.minmax_scale(wav, feature_range=(-1, 1))
    wav_mfcc = librosa.feature.mfcc(y=wav, n_mfcc=13)
    wav_mfcc_std = StandardScaler().fit_transform(wav_mfcc)
    wav_mfcc_std = wav_mfcc_std.mean(axis=1)
    
    features = np.concatenate([wav_mfcc_std])
    return features

def train(x_train_wavs, y_train):
    x_train = np.apply_along_axis(preprocess, 1, x_train_wavs)

    logreg = linear_model.LogisticRegression()
    logreg.fit(x_train, y_train)
    print('Logistic Regression Score:', logreg.score(x_train, y_train))
    joblib.dump(logreg, 'logreg_chainsaw.pkl')
    
    linear_svc = LinearSVC()
    linear_svc.fit(x_train, y_train)
    print('Linear SVM Score:', linear_svc.score(x_train, y_train))
    joblib.dump(linear_svc, 'linear_svc_chainsaw.pkl') 

    kernel_svc = SVC()
    kernel_svc.fit(x_train, y_train)
    print('Kernel SVM Score:', kernel_svc.score(x_train, y_train))
    joblib.dump(kernel_svc, 'kernel_svc_chainsaw.pkl')


def test(x_test_wavs, y_test):
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)

    clf = joblib.load('logreg_chainsaw.pkl')
    %timeit -n 100 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Logistic Regression Accuracy:', accuracy_score(y_test_estimated, y_test))
    
    clf = joblib.load('linear_svc_chainsaw.pkl')
    %timeit -n 100 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Linear SVM Accuracy:', accuracy_score(y_test_estimated, y_test))
    
    clf = joblib.load('kernel_svc_chainsaw.pkl')
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)
    %timeit -n 100 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Kernel SVM Accuracy:', accuracy_score(y_test_estimated, y_test))

In [7]:
train(x_train_wavs, y_train)
test(x_test_wavs, y_test)

Logistic Regression Score: 0.9337240757439135
Linear SVM Score: 0.9350766456266907
Kernel SVM Score: 0.9517583408476105
27.3 ms ± 7.81 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
Logistic Regression Accuracy: 0.9277218664226898
35.3 ms ± 8.48 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
Linear SVM Accuracy: 0.9277218664226898
19.3 ms ± 302 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
Kernel SVM Accuracy: 0.9451052150045746
