# Challenge SD207 - 2017
*<p>Author: Pengfei MI, Rui SONG</p>*
*<p>Date: 06/06/2017</p>*

In [5]:
# Basic libraries
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from sys import platform
import os
from time import time
from scipy.stats import mode

# Librosa related: audio feature extraction
import librosa
import librosa.display

# Sklearn related: data preprocessing and classifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import PredefinedSplit
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier

In [2]:
# Define some usefull functions
def load_sound_file(file_name):
    X, sr = librosa.load(os.path.join(FILEROOT, file_name), sr=None)
    return X

def extract_feature(file_name): # Late fusion
    X, sample_rate = librosa.load(os.path.join(FILEROOT, file_name), sr=None)
    mfcc = librosa.feature.mfcc(y=X, sr=sample_rate, n_fft=4096, hop_length=2048, n_mfcc=n_mfcc).T
    delta_mfcc = librosa.feature.delta(mfcc, width=5, order=1, trim=True)
    return delta_mfcc

def parse_audio_files(file_names, file_labels, test_fold):
    features, labels, tf = np.empty((0,n_features)), np.empty(0), np.empty(0)
    for fn, fl, f in zip(file_names, file_labels, test_fold):
        try:
            ff = extract_feature(fn)
        except Exception as e:
            print "Error encountered while parsing file: ", fn
            continue
        features = np.vstack([features, ff])
        labels = np.append(labels, fl*np.ones(ff.shape[0]))
        tf = np.append(tf, f*np.ones(ff.shape[0]))
    return np.array(features), np.array(labels, dtype = np.int), np.array(tf, dtype=np.int)

def predict(clf, X_test):
    y_pred = np.empty(0)
    y_pred_sum = np.empty(0)
    y_pred_prod = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_predict_probas = clf.predict_proba(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
        y_pred_sum = np.append(y_pred_sum, np.argmax(np.sum(y_predict_probas, axis=0)))
        y_pred_prod = np.append(y_pred_prod, np.argmax(np.prod(y_predict_probas, axis=0)))
    return np.array(y_pred, dtype=np.int), np.array(y_pred_sum, dtype=np.int), np.array(y_pred_prod, dtype=np.int)

def predict_maj(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
    return np.array(y_pred, dtype = np.int)

def predict_sum(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.sum(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

def predict_prod(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.prod(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

In [3]:
# Read data and preprocessing
print "Loading files..."
t0 = time()

# Define FILEROOT according to the platform
# My personal computer
if platform == "darwin":
    FILEROOT = './'
# The machines of Telecom
else:
    FILEROOT = '/tsi/plato/sons/sd207/'

# Load the cross validation folds
N_FOLDS = 3
train_files, train_labels, test_fold = np.empty(0, dtype=str), np.empty(0), np.empty(0)
for i in range(N_FOLDS):
    files = pd.read_csv('fold%s.txt' % str(i), sep='\s+', header=None)[0].values
    labels = pd.read_csv('fold%s.txt' % str(i), sep='\s+', header=None)[1].values
    train_files = np.append(train_files, files)
    train_labels = np.append(train_labels, labels)
    test_fold = np.append(test_fold, i*np.ones_like(labels))

labels = np.unique(train_labels)
n_labels = len(labels)
test_files = pd.read_csv('test_files.txt', header=None)[0].values

print "Training set size: %d" % len(train_files)
print "Test set size: %d" % len(test_files)
print "Done in %0.3fs." % (time()-t0)

Loading files...
Training set size: 872
Test set size: 298
Done in 0.016s.


In [5]:
# Feature extraction
print "Extracting features..."
n_mfcc = 40
t0 = time()
X_train, y_train, test_fold_train = parse_audio_files(train_files, train_labels, test_fold)
print X_train.shape, y_train.shape
print "Done in %0.3fs." % (time()-t0)

Extracting features...
(136770, 40) (136770,)
Done in 27.128s.


In [6]:
# Train classifier
print "Training classifier..."
np.random.seed(42)
t0 = time()
clf = MLPClassifier(hidden_layer_sizes=(40), alpha=0.1)
clf.fit(X_train, y_train)
print "Done in %0.3fs." % (time()-t0)

Training classifier...
Done in 56.006s.


In [7]:
# Predicting on validation set...
t0 = time()
y_val_pred, y_val_pred_sum, y_val_pred_prod = predict(clf, files_val)
print "Score on validation test (vote by majority): %f" % np.mean(y_val_pred == labels_val)
print classification_report(labels_val, y_val_pred, target_names=labels)

print "Score on validation test (vote by proba sum): %f" % np.mean(y_val_pred_sum == labels_val)
print classification_report(labels_val, y_val_pred_sum, target_names=labels)

print "Score on validation test (vote by proba product): %f" % np.mean(y_val_pred_prod == labels_val)
print classification_report(labels_val, y_val_pred_prod, target_names=labels)
print "Done in %0.3fs." % (time()-t0)

Score on validation test (vote by majority): 0.593103
                  precision    recall  f1-score   support

           beach       0.62      1.00      0.76        21
             bus       0.12      0.05      0.07        20
 cafe/restaurant       0.61      0.58      0.59        19
             car       0.90      0.95      0.92        19
     city_center       0.77      0.53      0.62        19
     forest_path       0.63      0.94      0.76        18
   grocery_store       0.72      1.00      0.84        21
            home       0.89      0.44      0.59        18
         library       0.50      0.56      0.53        18
   metro_station       0.73      0.44      0.55        18
          office       0.96      1.00      0.98        23
            park       0.26      0.33      0.29        18
residential_area       0.14      0.10      0.11        21
           train       1.00      0.05      0.10        19
            tram       0.38      0.83      0.53        18

     avg / total

In [8]:
y_test_pred, y_test_pred_sum, y_test_pred_prod = predict(clf, files_test)
np.savetxt('y_test_pred_delta_mfcc_mlp.txt', y_test_pred, fmt='%d')
np.savetxt('y_test_pred_delta_mfcc_mlp_sum.txt', y_test_pred_sum, fmt='%d')
np.savetxt('y_test_pred_delta_mfcc_mlp_prod.txt', y_test_pred_prod, fmt='%d')
print y_test_pred

[ 7  8  3  7 14  5  7 14  3  7  7 11  6 14 12  5 12  8  0  8  7  6  7  0 11
 12  2  7  2  3 14 14 10 12  8  5  3  3  0  4  7  0  0  7  6  4  1  6  3  8
  0  6  0 12 12  7  8  6  5 11  7  4 14 11  2 14  6  7  3  4  0  6 11  7 14
  2 14  9  1  7 12  0  4  2  2  3 14 13  1  9 12 14 10  6  1 14  7 10  5 14
  1  1  6  6 10  0  7  7  5 14  4 12  0  2  3  0  0  9  5 10 13  3  4  9  1
 14 14 12  4  5 14  7  1 10  2  2 11 12 12 14 12  5 12  8  3  3  5  7 14  8
  7  3  3 13  9 13 12  0 14  6  2  4  5  9 14  1  9  5  7  7 14 13  0  0 11
 14  8  5 12  5  5  6  4  2  5  2  4 14  5  9 14  7  4 11  9  3  9  2  7  0
 14  7 14  3 11  0 14 14  0  4  9  6  6  7 11  2  6 14  2 14  2  7  3  2  5
 12 13 14  2  4  9  0 12  7 12  7  8  5  4  5  7  7  3 11  9  5  4  7  0  1
  4  2 14  6 10  5 14  5 14  8 10  7 12  8  8 12  7  0 14  4 10  3  6 14  4
 11  2 14  8  7  9  5  3 14  2  6 10 14  4  0  9  7  0  9  7  9  5 12]
