In [1]:
import os
import pandas as pd
import numpy as np

import sklearn
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score, f1_score
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler

import librosa
import librosa.display

from tqdm import tqdm
import matplotlib.pyplot as plt

% matplotlib inline
np.random.seed(7)

In [2]:
base_dir = '/data/private/SU/bbchip13/chainsaw_classification/data/'
train_dir = base_dir+'train/'
val_dir = base_dir+'val/'
test_dir = base_dir+'test/'

In [3]:
def load_wavs(filenames):
    return np.asarray([librosa.load(filename)[0] for filename in tqdm(filenames)])

### If you have lack of memory, Use this
#     wav = librosa.load(filenames[0])
#     wavs = np.zeros( (len(filenames), wav.shape[0]) )
#     for i, filename in enumerate(filenames):
#         wavs[i][:] = librosa.load(filename)[:]
#     return wavs
    
def find_y_by_filename(filename, y_dict):
    basename = os.path.basename(filename)
    y = y_dict[basename]
    return y

def make_y_by_filenames(filenames, y_dict):
    return np.asarray([find_y_by_filename(filename, y_dict) 
                           for filename in filenames])

def make_xy_data(filenames, y_dict):
    x_train = load_wavs(filenames)
    y_train = make_y_by_filenames(filenames, y_dict)
    return x_train, y_train

In [4]:
### Make Y data
annotations_filename = 'data_annotations.csv'
df = pd.read_csv(annotations_filename)
y_dict = {filename:int(label) for _, filename, label, _ in df.itertuples()}
# y_dict

In [5]:
print('Make train data.......')
x_train_wav_filenames = [train_dir+filename for filename in os.listdir(train_dir)
                            if filename.endswith('.wav')]
x_val_wav_filenames = [val_dir+filename for filename in os.listdir(val_dir)
                            if filename.endswith('.wav')]
x_train_wav_filenames += x_val_wav_filenames
x_train_wavs, y_train = make_xy_data(x_train_wav_filenames, y_dict)

print(x_train_wavs.shape, y_train.shape)

  0%|          | 0/2257 [00:00<?, ?it/s]

Make train data.......


100%|██████████| 2257/2257 [10:43<00:00,  3.45it/s]


(2257, 110250) (2257,)


In [6]:
def preprocess(wav):
    wav = sklearn.preprocessing.minmax_scale(wav, feature_range=(-1, 1))
    wav_mfcc = librosa.feature.mfcc(y=wav, n_mfcc=13)
    wav_mfcc_std = StandardScaler().fit_transform(wav_mfcc)
    wav_mfcc_std = wav_mfcc_std.mean(axis=1)
    
    features = np.concatenate([wav_mfcc_std])
    return features

def train(x_train_wavs, y_train):
    x_train = np.apply_along_axis(preprocess, 1, x_train_wavs)

    logreg = linear_model.LogisticRegression()
    logreg.fit(x_train, y_train)
    print('Logistic Regression Score:', logreg.score(x_train, y_train))
    joblib.dump(logreg, 'logreg_chainsaw.pkl')
    
    linear_svc = LinearSVC()
    linear_svc.fit(x_train, y_train)
    print('Linear SVM Score:', linear_svc.score(x_train, y_train))
    joblib.dump(linear_svc, 'linear_svc_chainsaw.pkl') 

    kernel_svc = SVC()
    kernel_svc.fit(x_train, y_train)
    print('Kernel SVM Score:', kernel_svc.score(x_train, y_train))
    joblib.dump(kernel_svc, 'kernel_svc_chainsaw.pkl')

    print()

def test(x_test_wavs, y_test):
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)

    clf = joblib.load('logreg_chainsaw.pkl')
    %timeit -n 100 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Logistic Regression Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Logistic Regression F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    
    clf = joblib.load('linear_svc_chainsaw.pkl')
    %timeit -n 100 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Linear SVM Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Linear SVM F1 Score:', f1_score(y_test, y_test_estimated))
    print()
    
    clf = joblib.load('kernel_svc_chainsaw.pkl')
    x_test = np.apply_along_axis(preprocess, 1, x_test_wavs)
    %timeit -n 100 -r 10 clf.predict(x_test)
    y_test_estimated = clf.predict(x_test)
    print('Kernel SVM Accuracy:', accuracy_score(y_test_estimated, y_test))
    print('Kernel SVM F1 Score:', f1_score(y_test, y_test_estimated))
    print()

In [7]:
train(x_train_wavs, y_train)



Logistic Regression Score: 0.9317678334071776
Linear SVM Score: 0.9322108994240141
Kernel SVM Score: 0.9472751439964555



In [8]:
print('Make test data.......')
x_test_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.wav')]
x_test_wavs, y_test = make_xy_data(x_test_wav_filenames, y_dict)

  0%|          | 0/1054 [00:00<?, ?it/s]

Make test data.......


100%|██████████| 1054/1054 [05:00<00:00,  3.50it/s]


In [9]:
test(x_test_wavs, y_test)

The slowest run took 8.44 times longer than the fastest. This could mean that an intermediate result is being cached.
906 µs ± 262 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
Logistic Regression Accuracy: 0.9364326375711575
Logistic Regression F1 Score: 0.9477786438035852

The slowest run took 21.93 times longer than the fastest. This could mean that an intermediate result is being cached.
7.5 ms ± 5.48 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
Linear SVM Accuracy: 0.9335863377609108
Linear SVM F1 Score: 0.9454828660436139

20 ms ± 146 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
Kernel SVM Accuracy: 0.9516129032258065
Kernel SVM F1 Score: 0.9606177606177606

