# Challenge SD207 - 2017
*<p>Author: Pengfei MI, Rui SONG</p>*
*<p>Date: 06/06/2017</p>*

In [1]:
import os
import numpy as np
import scipy as sp
from scipy.stats import mode
import pandas as pd
import matplotlib.pyplot as plt
from time import time

import librosa
import librosa.display

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import BaggingClassifier

FILEROOT = '/tsi/plato/sons/sd207/'

In [2]:
# Define some usefull functions
def load_sound_file(file_path):
    X, sr = librosa.load(os.path.join(FILEROOT, file_path), sr=None)
    return X

def extract_feature(file_name): # Late fusion
    X, sample_rate = librosa.load(os.path.join(FILEROOT, file_name), sr=None)
    #mfcc = librosa.feature.mfcc(y=librosa.effects.harmonic(X), sr=sample_rate, n_fft=4096, hop_length=2048, n_mfcc=n_mfcc).T
    mfcc = librosa.feature.mfcc(y=X, sr=sample_rate, n_fft=4096, hop_length=2048, n_mfcc=n_mfcc).T
    mfcc_delta = librosa.feature.delta(mfcc, width=5, order=1, trim=True)
    return mfcc, mfcc_delta

def parse_audio_files(file_names, file_labels):
    mfcc_features, mfcc_delta_features, labels = np.empty((0,n_mfcc)), np.empty((0,n_mfcc)), np.empty(0)
    for fn, fl in zip(file_names, file_labels):
        try:
            mfcc, mfcc_delta = extract_feature(fn)
        except Exception as e:
            print "Error encountered while parsing file: ", fn
            continue
        mfcc_features, mfcc_delta_features = np.vstack([mfcc_features, mfcc]), np.vstack([mfcc_delta_features, mfcc_delta])
        labels = np.append(labels, fl*np.ones(mfcc.shape[0]))
    return np.array(mfcc_features), mfcc_delta_features, np.array(labels, dtype = np.int)

def predict(clf, X_test):
    y_pred = np.empty(0)
    y_pred_sum = np.empty(0)
    y_pred_prod = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_predict_probas = clf.predict_proba(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
        y_pred_sum = np.append(y_pred_sum, np.argmax(np.sum(y_predict_probas, axis=0)))
        y_pred_prod = np.append(y_pred_prod, np.argmax(np.prod(y_predict_probas, axis=0)))
    return np.array(y_pred, dtype=np.int), np.array(y_pred_sum, dtype=np.int), np.array(y_pred_prod, dtype=np.int)

def predict_maj(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
    return np.array(y_pred, dtype = np.int)

def predict_sum(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.sum(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

def predict_prod(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.prod(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

def plot_wave(title, raw_sound):
    plt.close('all')
    plt.figure(figsize=(20,6))
    plt.title(title)
    librosa.display.waveplot(np.array(raw_sound), sr=16000)
    plt.show()
    
def plot_specgram(title,raw_sound):
    plt.close('all')
    plt.figure(figsize=(20,6))
    plt.title(title)
    plt.specgram(np.array(raw_sound), Fs=16000)
    plt.show()

def plot_log_power_specgram(title,raw_sound):
    plt.close('all')
    plt.figure(figsize=(20,6))
    plt.title(title)
    D = librosa.logamplitude(np.abs(librosa.stft(raw_sound))**2, ref_power=np.max)
    librosa.display.specshow(D, x_axis='time' ,y_axis='log')
    plt.specgram(np.array(f), Fs=16000)
    plt.show()

In [3]:
# Read data and preprocessing
print "Loading files..."
t0 = time()
files_train = pd.read_csv('train.txt', sep='\s+', header=None)[0].values
labels = np.unique(pd.read_csv('train.txt', sep='\s+', header=None)[1])
n_labels = len(labels)
labels_train = pd.factorize(pd.read_csv('train.txt', sep='\s+', header=None)[1])[0]
files_val = pd.read_csv('dev.txt', sep='\s+', header=None)[0].values
labels_val = pd.factorize(pd.read_csv('dev.txt', sep='\s+', header=None)[1])[0]
files_test = pd.read_csv('test_files.txt', header=None)[0].values

print "Training set size: %d" % len(files_train)
print "Validation set size: %d" % len(files_val)
print "Test set size: %d" % len(files_test)
print "Done in %0.3fs." % (time()-t0)

Loading files...
Training set size: 582
Validation set size: 290
Test set size: 298
Done in 0.007s.


In [None]:
y = load_sound_file(files_train[42])
y_harmonic, y_percussive = librosa.effects.hpss(y)
plot_wave("%s: original signal" % labels[labels_train[42]], y)
plot_wave("%s: harmonic signal" % labels[labels_train[42]], y_harmonic)
plot_wave("%s: percussive signal" % labels[labels_train[42]], y_percussive)

In [None]:
# Feature extraction
print "Extracting features..."
n_mfcc = 40
t0 = time()
X_mfcc, X_mfcc_delta, y_train = parse_audio_files(files_train, labels_train)
print X_train.shape, y_train.shape
print "Done in %0.3fs." % (time()-t0)

In [None]:
# Train classifier
print "Training classifier..."
np.random.seed(42)
t0 = time()
clf1 = MLPClassifier(hidden_layer_sizes=(40), alpha=0.1)
clf1.fit(X_mfcc, y_train)

clf2 = MLPClassifier(hidden_layer_sizes=(40), alpha=0.1)
clf2.fit(X_mfcc_delta, y_train)
print "Done in %0.3fs." % (time()-t0)

In [None]:
# Predicting on validation set...
t0 = time()
print "Prediction result via mfcc features:"
y_val_pred, y_val_pred_sum, y_val_pred_prod = predict(clf1, files_val)
print "Score on validation test (vote by majority): %f" % np.mean(y_val_pred == labels_val)
print classification_report(labels_val, y_val_pred, target_names=labels)

print "Score on validation test (vote by proba sum): %f" % np.mean(y_val_pred_sum == labels_val)
print classification_report(labels_val, y_val_pred_sum, target_names=labels)

print "Score on validation test (vote by proba product): %f" % np.mean(y_val_pred_prod == labels_val)
print classification_report(labels_val, y_val_pred_prod, target_names=labels)

print "Prediction result via mfcc delta features:"
y_val_pred, y_val_pred_sum, y_val_pred_prod = predict(clf2, files_val)
print "Score on validation test (vote by majority): %f" % np.mean(y_val_pred == labels_val)
print classification_report(labels_val, y_val_pred, target_names=labels)

print "Score on validation test (vote by proba sum): %f" % np.mean(y_val_pred_sum == labels_val)
print classification_report(labels_val, y_val_pred_sum, target_names=labels)

print "Score on validation test (vote by proba product): %f" % np.mean(y_val_pred_prod == labels_val)
print classification_report(labels_val, y_val_pred_prod, target_names=labels)

"""print "Prediction result via mfcc and mfcc delta calibration:"
y_val_pred, y_val_pred_sum, y_val_pred_prod = predict_calibration(clf1, clf2, files_val)
print "Score on validation test (vote by majority): %f" % np.mean(y_val_pred == labels_val)
print classification_report(labels_val, y_val_pred, target_names=labels)

print "Score on validation test (vote by proba sum): %f" % np.mean(y_val_pred_sum == labels_val)
print classification_report(labels_val, y_val_pred_sum, target_names=labels)

print "Score on validation test (vote by proba product): %f" % np.mean(y_val_pred_prod == labels_val)
print classification_report(labels_val, y_val_pred_prod, target_names=labels)"""
print "Done in %0.3fs." % (time()-t0)

In [None]:
y_test_pred, y_test_pred_sum, y_test_pred_prod = predict(clf, files_test)
np.savetxt('y_test_pred_delta_mfcc_mlp.txt', y_test_pred, fmt='%d')
np.savetxt('y_test_pred_delta_mfcc_mlp_sum.txt', y_test_pred_sum, fmt='%d')
np.savetxt('y_test_pred_delta_mfcc_mlp_prod.txt', y_test_pred_prod, fmt='%d')
print y_test_pred