In [1]:
import numpy as np
import os
import scipy
import matplotlib.pyplot as plt
from tqdm import tqdm
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
import librosa
from dataset import ALCDataset

%matplotlib inline

In [2]:
SR = 16000

### Load data

In [3]:
alc_dataset = ALCDataset('/Users/mazeyu/Desktop/CMU/20fall/18797/project/code/data')
data_train, label_train = alc_dataset.load_data('train', percentage=1.0, num_threads=4)
data_dev1, label_dev1 = alc_dataset.load_data('d1', percentage=1.0, num_threads=4)
data_dev2, label_dev2 = alc_dataset.load_data('d2', percentage=1.0, num_threads=4)
data_test, label_test = alc_dataset.load_data('test', percentage=1.0, num_threads=4)

00002 left.

In [4]:
assert len(data_train) == len(label_train)
assert len(data_dev1) == len(label_dev1)
assert len(data_dev2) == len(label_dev2)
assert len(data_test) == len(label_test)

print('#train: {}'.format(len(data_train)))
print('#dev1: {}'.format(len(data_dev1)))
print('#dev2: {}'.format(len(data_dev2)))
print('#test: {}'.format(len(data_test)))

#train: 5400
#dev1: 3960
#dev2: 1500
#test: 3000


### Feature extraction

In [81]:
class ALCFeature:
    def __init__(self, sr):
        self.sr = sr
        
    def delete_silence(self, audio, top_db=20):
        result = []
        intervals = librosa.effects.split(audio, top_db=top_db, frame_length=2048, hop_length=512)
        for interval in intervals:
            result.append(audio[interval[0]: interval[1]])
        result = np.concatenate(result)
        return result
        
    def get_mfcc(self, data, label, n_mfcc=20, wsize=0.05, concat=20, scale=True):
        x_mfcc = []
        y_mfcc = []
        wsize = int(wsize * self.sr)
        for i in tqdm(range(len(data)), ncols=100, ascii=True, desc='MFCC feature'):
            audio = self.delete_silence(data[i])
            x = librosa.feature.mfcc(audio, sr=self.sr, n_mfcc=n_mfcc, n_fft=2048, hop_length=512, win_length=wsize, window='hann')
            if scale:
                x = x - np.min(x, axis=1, keepdims=True)
            for j in range(x.shape[1] // concat):
                slice_ = x[:, j * concat: (j + 1) * concat]
                x_mfcc.append(slice_.flatten())
                y_mfcc.append(label[i])
        x_mfcc = np.stack(x_mfcc)
        y_mfcc = np.array(y_mfcc)
        return x_mfcc, y_mfcc
    
    def pncc(self, data, label):
        pass
    
    def get_cqt(self, data, label):
        pass

In [6]:
alc_feature = ALCFeature(SR)

x_train, y_train = alc_feature.get_mfcc(data_train, label_train)
x_dev1, y_dev1 = alc_feature.get_mfcc(data_dev1, label_dev1)
x_dev2, y_dev2 = alc_feature.get_mfcc(data_dev2, label_dev2)
x_test, y_test = alc_feature.get_mfcc(data_test, label_test)

MFCC feature: 100%|#############################################| 5400/5400 [01:01<00:00, 88.01it/s]
MFCC feature: 100%|#############################################| 3960/3960 [00:43<00:00, 91.24it/s]
MFCC feature: 100%|#############################################| 1500/1500 [00:15<00:00, 95.57it/s]
MFCC feature: 100%|#############################################| 3000/3000 [00:31<00:00, 95.65it/s]


In [7]:
pca = sklearn.decomposition.PCA(n_components=50)
pca.fit(x_train)
x_train = pca.transform(x_train)
x_dev1 = pca.transform(x_dev1)
x_dev2 = pca.transform(x_dev2)
x_test = pca.transform(x_test)

### Classification model

In [16]:
class ALCModel:
    def __init__(self, method, verbose):
        if method == 'lr':
            self.clf = LogisticRegression(verbose=verbose)
        elif method == 'svm':
            self.clf = SVC(C=1.0, kernel='rbf', verbose=verbose)
        elif method == 'forest':
            self.clf = RandomForestClassifier(n_estimators=100, verbose=verbose)
        elif method == 'adaboost':
            self.clf = AdaBoostClassifier(n_estimators=100)
        else:
            pass
        
    def fit(self, x, y):
        self.clf.fit(x, y)
    
    def predict(self, x):
        pred = self.clf.predict(x)
        return pred
    
    def evaluate(self, x, y, roc=False):
        pred = self.clf.predict(x)
        acc = self.clf.score(x, y)
        report = sklearn.metrics.classification_report(y, pred)
        if roc:
            fpr, tpr, thresholds = sklearn.metrics.roc_curve(y, pred)
            plt.figure()
            plt.plot(fpr, tpr)
            plt.title('ROC Curve')
            plt.xlabel('False positive rate')
            plt.ylabel('True positive rate')
            plt.show()
        return acc, report

In [None]:
model = ALCModel('svm')
model.fit(x_train, y_train)
acc, report = model.evaluate(x_test, y_test, roc=True)
print(report)

In [None]:
model = ALCModel('lr')
model.fit(x_train, y_train)
acc, report = model.evaluate(x_test, y_test, roc=True)
print(report)

In [None]:
model = ALCModel('forest')
model.fit(x_train, y_train)
acc, report = model.evaluate(x_test, y_test, roc=True)
print(report)

In [None]:
model = ALCModel('adaboost')
model.fit(x_train, y_train)
acc, report = model.evaluate(x_test, y_test, roc=True)
print(report)