# Challenge SD207 - 2017
*<p>Author: Pengfei MI, Rui SONG</p>*
*<p>Date: 06/06/2017</p>*

In [50]:
import numpy as np
import scipy as sp
from scipy.stats import mode
import pandas as pd
import matplotlib.pyplot as plt
from time import time

import librosa
import librosa.display

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import BaggingClassifier
from sklearn import linear_model
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC

In [51]:
# Define some usefull functions
def load_sound_file(file_path):
    X, sr = librosa.load(file_path, sr=None)
    return X

def extract_feature(file_name): # Late fusion
    X, sample_rate = librosa.load(file_name, sr=None)
    #mfcc = librosa.feature.mfcc(y=librosa.effects.harmonic(X), sr=sample_rate, n_fft=4096, hop_length=2048, n_mfcc=n_mfcc).T
    mfcc = librosa.feature.mfcc(y=X, sr=sample_rate, n_fft=4096, hop_length=2048, n_mfcc=n_mfcc).T
    mfcc_delta = librosa.feature.delta(mfcc, width=7, order=1, trim=True)
    return mfcc, mfcc_delta

def parse_audio_files(file_names, file_labels):
    features, features_delta, labels = np.empty((0,n_mfcc)), np.empty((0,n_mfcc)), np.empty(0)
    for fn, fl in zip(file_names, file_labels):
        try:
            mfcc, mfcc_delta = extract_feature(fn)
        except Exception as e:
            print "Error encountered while parsing file: ", fn
            continue
        features = np.vstack([features, mfcc])
        features_delta = np.vstack([features_delta, mfcc_delta])
        labels = np.append(labels, fl*np.ones(mfcc.shape[0]))
    return np.array(features), np.array(features_delta), np.array(labels, dtype = np.int)

def predict_proba(clf1, clf2, X_val):
    pred_proba = np.empty((0,30))
    for x in X_val:
        x_mfcc, x_mfcc_delta = extract_feature(x)
        y_pred_proba1 = np.sum(clf1.predict_proba(x_mfcc), axis=0).reshape(15)
        y_pred_proba2 = np.sum(clf2.predict_proba(x_mfcc_delta), axis=0).reshape(15)
        pred_proba = np.vstack([pred_proba,np.hstack([y_pred_proba1,y_pred_proba2])])
        #print pred_proba.shape
    return np.array(pred_proba, dtype=np.int)


"""
def predict(clf, X_test):
    y_pred = np.empty(0)
    y_pred_sum = np.empty(0)
    y_pred_prod = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_predict_probas = clf.predict_proba(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
        y_pred_sum = np.append(y_pred_sum, np.argmax(np.sum(y_predict_probas, axis=0)))
        y_pred_prod = np.append(y_pred_prod, np.argmax(np.prod(y_predict_probas, axis=0)))
    return np.array(y_pred, dtype=np.int), np.array(y_pred_sum, dtype=np.int), np.array(y_pred_prod, dtype=np.int)

def predict_maj(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
    return np.array(y_pred, dtype = np.int)

def predict_sum(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.sum(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

def predict_prod(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.prod(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)
"""

def plot_wave(title, raw_sound):
    plt.close('all')
    plt.figure(figsize=(20,6))
    plt.title(title)
    librosa.display.waveplot(np.array(raw_sound), sr=16000)
    plt.show()
    
def plot_specgram(title,raw_sound):
    plt.close('all')
    plt.figure(figsize=(20,6))
    plt.title(title)
    plt.specgram(np.array(raw_sound), Fs=16000)
    plt.show()

def plot_log_power_specgram(title,raw_sound):
    plt.close('all')
    plt.figure(figsize=(20,6))
    plt.title(title)
    D = librosa.logamplitude(np.abs(librosa.stft(raw_sound))**2, ref_power=np.max)
    librosa.display.specshow(D, x_axis='time' ,y_axis='log')
    plt.specgram(np.array(f), Fs=16000)
    plt.show()

In [4]:
# Read data and preprocessing
print "Loading files..."
t0 = time()
FILEROOT = './'

files_train = pd.read_csv('train.txt', sep='\s+', header=None)[0].values
labels = np.unique(pd.read_csv('train.txt', sep='\s+', header=None)[1])
n_labels = len(labels)
labels_train = pd.factorize(pd.read_csv('train.txt', sep='\s+', header=None)[1])[0]
files_val = pd.read_csv('dev.txt', sep='\s+', header=None)[0].values
labels_val = pd.factorize(pd.read_csv('dev.txt', sep='\s+', header=None)[1])[0]
files_test = pd.read_csv('test_files.txt', header=None)[0].values

print "Training set size: %d" % len(files_train)
print "Validation set size: %d" % len(files_val)
print "Test set size: %d" % len(files_test)
print "Done in %0.3fs." % (time()-t0)

Loading files...
Training set size: 582
Validation set size: 290
Test set size: 298
Done in 0.015s.


In [6]:
"""
y = load_sound_file(files_train[42])
y_harmonic, y_percussive = librosa.effects.hpss(y)
plot_wave("%s: original signal" % labels[labels_train[42]], y)
plot_wave("%s: harmonic signal" % labels[labels_train[42]], y_harmonic)
plot_wave("%s: percussive signal" % labels[labels_train[42]], y_percussive)
"""

'\ny = load_sound_file(files_train[42])\ny_harmonic, y_percussive = librosa.effects.hpss(y)\nplot_wave("%s: original signal" % labels[labels_train[42]], y)\nplot_wave("%s: harmonic signal" % labels[labels_train[42]], y_harmonic)\nplot_wave("%s: percussive signal" % labels[labels_train[42]], y_percussive)\n'

In [7]:
# Feature extraction
print "Extracting features..."
n_mfcc = 40
t0 = time()
X_train1, X_train2, y_train = parse_audio_files(files_train, labels_train)
print X_train1.shape, X_train2.shape, y_train.shape
print "Done in %0.3fs." % (time()-t0)

Extracting features...
(136770, 40) (136770, 40) (136770,)
Done in 35.808s.


In [10]:
# Train classifier
print "Training classifier..."
np.random.seed(42)
t0 = time()
clf1 = MLPClassifier(hidden_layer_sizes=(40), alpha=0.1)
clf1.fit(X_train1, y_train)
print "Training on MFCC done in %0.3fs." % (time()-t0)
t0 = time()
clf2 = MLPClassifier(hidden_layer_sizes=(40), alpha=0.1)
clf2.fit(X_train2, y_train)
print "Training on MFCC delta done in %0.3fs." % (time()-t0)

Training classifier...
Training on MFCC done in 30.530s.
Training on MFCC delta done in 39.967s.


In [14]:
print pred_proba_val.shape
print labels_val.shape

(8700,)
(290,)


In [33]:
pred_proba_val = predict_proba(clf1, clf2, files_val)
pred_proba_test = predict_proba(clf1, clf2, files_test)

In [70]:
print "Score calibration..."
t0 = time()
logistic = linear_model.LogisticRegression(C=0.000085)
logistic.fit(pred_proba_val, labels_val)
print "Done in %0.3fs." % (time()-t0)

Score calibration...
Done in 0.013s.


In [71]:
y_test_pred = logistic.predict(pred_proba_test)
np.savetxt('y_test_pred_mfcc_delta_mfcc_mlp_logistic.txt', y_test_pred, fmt='%d')
print y_test_pred

[10  8  3  7 14  5  7  9  3  7 10 12  6 14 11  5  4  8  0  8  8  6  7  7 11
 11  2  7  2  3 14  1 10 12  8  5  3  3  0  4  7  0  0  7  6 12  4  6  3  7
 12  6  7 12 12  7  8  6  5 12  7  4  9 12  2 13  6  7  3  4  0 13 12  7  4
  2  1  9  1 10 12  0  4  2  4  3 14 13  1  9  4 14 10  6 13  1  7 10  5  9
  1 13  6  6 10  0  7  7  5 13  4 12 13  2  3 11  0  9  9 10 13  3  4  9  1
  1  1 12  4  5 13  7  1 10  2  2 12 12 12 14 12  5 12  8  3  3  8  7  1  8
  7  3  3 13  9 13 12  0  1  6  7  4  8  9 13 13  9  5  7  7 14 13  0  0 12
  9  8  7 12  5  5  6  4  2  5  6  4 13  5  9 13  7  4 12  9  3  9  9  8  0
  1 10 14  3 12  0 13 14 13  4  9  6  2 10 12  4  2 14  2 14  2  7  3 13  8
  4 13  1  2  2  9 12  4 10  4  7  8  5  4  5  8  7  3 12  7  5  4  7 12 13
  4  2 14  6 10  5 14  5  1  8 10  7 12  7  8 12  5  0 14  4 10  3  6 13  4
 12  2 14  8  7  9  5  3 14  2  6 10 14  4  0  9 10  0  9  8  9  5 12]
