# Challenge SD207 - 2017
*<p>Author: Pengfei MI, Rui SONG</p>*
*<p>Date: 06/06/2017</p>*

In [1]:
# Basic libraries
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from sys import platform
import os
from time import time
from scipy.stats import mode

# Librosa related: audio feature extraction
import librosa
import librosa.display

# Sklearn related: data preprocessing and classifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import PredefinedSplit
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier

In [2]:
# Define some usefull functions
def load_sound_file(file_name):
    X, sr = librosa.load(os.path.join(FILEROOT, file_name), sr=None)
    return X

def extract_feature(file_name): # Late fusion
    X, sample_rate = librosa.load(os.path.join(FILEROOT, file_name), sr=None)
    mfcc = librosa.feature.mfcc(y=X, sr=sample_rate, n_fft=4096, hop_length=2048, n_mfcc=n_mfcc).T
    #delta_mfcc = librosa.feature.delta(mfcc, width=5, order=1, trim=True)
    return mfcc

def parse_audio_files(file_names, file_labels, test_fold):
    features, labels, tf = np.empty((0,n_features)), np.empty(0), np.empty(0)
    for fn, fl, f in zip(file_names, file_labels, test_fold):
        try:
            ff = extract_feature(fn)
        except Exception as e:
            print "Error encountered while parsing file: ", fn
            continue
        features = np.vstack([features, ff])
        labels = np.append(labels, fl*np.ones(ff.shape[0]))
        tf = np.append(tf, f*np.ones(ff.shape[0]))
    return np.array(features), np.array(labels, dtype = np.int), np.array(tf, dtype=np.int)

def predict(clf, X_test):
    y_pred = np.empty(0)
    y_pred_sum = np.empty(0)
    y_pred_prod = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_predict_probas = clf.predict_proba(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
        y_pred_sum = np.append(y_pred_sum, np.argmax(np.sum(y_predict_probas, axis=0)))
        y_pred_prod = np.append(y_pred_prod, np.argmax(np.prod(y_predict_probas, axis=0)))
    return np.array(y_pred, dtype=np.int), np.array(y_pred_sum, dtype=np.int), np.array(y_pred_prod, dtype=np.int)

def predict_maj(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
    return np.array(y_pred, dtype = np.int)

def predict_sum(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.sum(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

def predict_prod(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.prod(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

In [18]:
# Read data and preprocessing
print "Loading files..."
t0 = time()

# Define FILEROOT according to the platform
# My personal computer
if platform == "darwin":
    FILEROOT = './'
# The machines of Telecom
else:
    FILEROOT = '/tsi/plato/sons/sd207/'

# Load the cross validation folds
N_FOLDS = 3
train_files, train_scenes, test_fold = np.empty(0, dtype=str), np.empty(0), np.empty(0)
for i in range(N_FOLDS):
    files = pd.read_csv('fold%s.txt' % str(i), sep='\s+', header=None)[0].values
    scenes = pd.read_csv('fold%s.txt' % str(i), sep='\s+', header=None)[1].values
    print "Fold #%d: %d files from %d sources" % (i, len(files), len(np.unique([f.split('_')[0] for f in files])))
    train_files = np.append(train_files, files)
    train_scenes = np.append(train_scenes, scenes)
    test_fold = np.append(test_fold, i*np.ones_like(scenes))

scenes = np.unique(train_scenes)
n_scenes = len(scenes)
labels = pd.factorize(scenes)[0]
n_labels = len(labels)
train_labels = pd.factorize(train_scenes)[0]
val_files = pd.read_csv('dev.txt', header=None)[0].values
val_scenes = pd.read_csv('dev.txt', sep='\s+', header=None)[1].values
val_labels = pd.factorize(val_scenes)[0]
test_files = pd.read_csv('test_files.txt', header=None)[0].values

print "Training set size: %d" % len(train_files)
print "Validation set size: %d" % len(val_files)
print "Test set size: %d" % len(test_files)
print "Done in %0.3fs." % (time()-t0)

Loading files...
Fold #0: 194 files from 29 sources
Fold #1: 194 files from 29 sources
Fold #2: 194 files from 30 sources
Training set size: 582
Validation set size: 290
Test set size: 298
Done in 0.021s.


In [None]:
# Feature extraction
print "Extracting features..."
t0 = time()

n_mfcc = 40
n_features = 40
X_train, y_train, test_fold_train = parse_audio_files(train_files, train_labels, test_fold)
ps = PredefinedSplit(test_fold_train)
print X_train.shape, y_train.shape, test_fold_train.shape
print "Done in %0.3fs." % (time()-t0)

In [None]:
# Train classifier
print "Training classifier..."
np.random.seed(42)
t0 = time()

eatimator = 
params = {
          }
clf = GridSearchCV(eatimator, params, cv=ps, n_jobs=-1, verbose=1)
clf.fit(X_train, y_train)
print "Done in %0.3fs." % (time()-t0)

In [None]:
# Predicting on validation set...
t0 = time()
y_pred, y_pred_sum, y_pred_prod = predict(clf, val_files)
print "Score on validation test (vote by majority): %f" % np.mean(y_pred == val_labels)
print classification_report(val_labels, y_pred, target_names=labels)

print "Score on validation test (vote by proba sum): %f" % np.mean(y_pred_sum == val_labels)
print classification_report(val_labels, y_pred_sum, target_names=labels)

print "Score on validation test (vote by proba product): %f" % np.mean(y_pred_prod == val_labels)
print classification_report(val_labels, y_pred_prod, target_names=scenes)
print "Done in %0.3fs." % (time()-t0)

In [None]:
y_test_pred, y_test_pred_sum, y_test_pred_prod = predict(clf, test_files)
np.savetxt('y_test_pred_delta_mfcc_mlp.txt', y_test_pred, fmt='%d')
np.savetxt('y_test_pred_delta_mfcc_mlp_sum.txt', y_test_pred_sum, fmt='%d')
np.savetxt('y_test_pred_delta_mfcc_mlp_prod.txt', y_test_pred_prod, fmt='%d')
print y_test_pred