# Challenge SD207 - 2017
*<p>Author: Pengfei MI, Rui SONG</p>*
*<p>Date: 06/06/2017</p>*

In [1]:
# Basic libraries
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from sys import platform
import os
from time import time
from scipy.stats import mode

# Librosa related: audio feature extraction
import librosa
import librosa.display

# Sklearn related: data preprocessing and classifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import PredefinedSplit
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.base import clone

In [2]:
# Define some usefull functions
def load_sound_file(file_name):
    X, sr = librosa.load(os.path.join(FILEROOT, file_name), sr=None)
    return X

def extract_feature(file_name): # Late fusion
    if file_name not in file_features:
        X, sample_rate = librosa.load(os.path.join(FILEROOT, file_name), sr=None)
        mfcc = librosa.feature.mfcc(y=X, sr=sample_rate, n_fft=4096, hop_length=2048, n_mfcc=n_mfcc).T
        #delta_mfcc = librosa.feature.delta(mfcc, width=5, order=1, trim=True)
        file_features[file_name] = mfcc
    return file_features[file_name]

def parse_audio_files(file_names, file_labels):
    features, labels = np.empty((0,n_features)), np.empty(0)
    for fn, fl in zip(file_names, file_labels):
        try:
            ff = extract_feature(fn)
        except Exception as e:
            print "Error encountered while parsing file: ", fn
            continue
        features = np.vstack([features, ff])
        labels = np.append(labels, fl*np.ones(ff.shape[0]))
    return np.array(features), np.array(labels, dtype = np.int)

def cross_validation(clf, X, y, test_fold):
    y_pred, y_pred_sum, y_pred_prod = np.empty_like(y), np.empty_like(y), np.empty_like(y)
    n_folds = len(np.unique(test_fold))
    for i in range(n_folds):
        print "Launching fold #%d/%d" % (i+1, n_folds)
        t0 = time()
        new_clf = clone(clf, safe=True)
        X_train = X[test_fold != i]
        X_test = X[test_fold == i]
        y_train = y[test_fold != i]
        y_test = y[test_fold == i]
        print "train set size: %d, test set size: %d" % (len(X_train), len(X_test))
        clf_train(new_clf, X_train, y_train)
        test_pred, test_pred_sum, test_pred_prod = clf_predict(new_clf, X_test)
        y_pred[test_fold == i] = test_pred
        y_pred_sum[test_fold == i] = test_pred_sum
        y_pred_prod[test_fold == i] = test_pred_prod
        print "fold#%d done in %0.3fs, score: %0.3f." % (i, time()-t0, accuracy_score(y_test, test_pred))
    t0 = time()
    print "Retraining classifier with whole train set..."
    clf_train(clf, X, y)
    print "Done in %0.3fs." % (time() - t0)
    return y_pred, y_pred_sum, y_pred_prod

def clf_train(clf, files, file_labels):
    X_train, y_train= parse_audio_files(files, file_labels)
    clf.fit(X_train, y_train)
        
def predict_maj(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
    return np.array(y_pred, dtype = np.int)

def predict_sum(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.sum(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

def predict_prod(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.prod(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

def clf_predict(clf, X_test):
    y_pred = np.empty(0)
    y_pred_sum = np.empty(0)
    y_pred_prod = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_predict_probas = clf.predict_proba(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
        y_pred_sum = np.append(y_pred_sum, np.argmax(np.sum(y_predict_probas, axis=0)))
        y_pred_prod = np.append(y_pred_prod, np.argmax(np.prod(y_predict_probas, axis=0)))
    return np.array(y_pred, dtype=np.int), np.array(y_pred_sum, dtype=np.int), np.array(y_pred_prod, dtype=np.int)

In [3]:
# Read data and preprocessing
print "Loading files..."
t0 = time()

# Define FILEROOT according to the platform
# My personal computer
if platform == "darwin":
    FILEROOT = './'
# The machines of Telecom
else:
    FILEROOT = '/tsi/plato/sons/sd207/'

# Load the cross validation folds
N_FOLDS = 3
train_files, train_scenes, test_fold = np.empty(0, dtype=str), np.empty(0), np.empty(0)
for i in range(N_FOLDS):
    files = pd.read_csv('train%s.txt' % str(i), sep='\s+', header=None)[0].values
    scenes = pd.read_csv('train%s.txt' % str(i), sep='\s+', header=None)[1].values
    print "Fold #%d: %d files from %d sources" % (i+1, len(files), len(np.unique([f.split('_')[0] for f in files])))
    train_files = np.append(train_files, files)
    train_scenes = np.append(train_scenes, scenes)
    test_fold = np.append(test_fold, i*np.ones_like(scenes))

scenes = np.unique(train_scenes)
n_scenes = len(scenes)
labels = pd.factorize(scenes)[0]
n_labels = len(labels)
train_labels = pd.factorize(train_scenes)[0]
test_files = pd.read_csv('test_files.txt', header=None)[0].values

print "%d scenes:" % n_scenes, scenes
print "Training set size: %d" % len(train_files)
print "Test set size: %d" % len(test_files)
print "Done in %0.3fs." % (time()-t0)

Loading files...
Fold #1: 290 files from 45 sources
Fold #2: 292 files from 43 sources
Fold #3: 290 files from 45 sources
15 scenes: ['beach' 'bus' 'cafe/restaurant' 'car' 'city_center' 'forest_path'
 'grocery_store' 'home' 'library' 'metro_station' 'office' 'park'
 'residential_area' 'train' 'tram']
Training set size: 872
Test set size: 298
Done in 0.016s.


In [4]:
# Train classifier
print "Doing cross validation..."
t0 = time()

np.random.seed(42)
n_mfcc = 40
n_features = 40
file_features = {}

clf = MLPClassifier(hidden_layer_sizes=(40), alpha=0.1)
y_pred, y_pred_sum, y_pred_prod = cross_validation(clf, train_files, train_labels, test_fold)
print "Done in %0.3fs." % (time()-t0)

Doing cross validation...
Launching fold #1/3
train set size: 582, test set size: 290
fold#0 done in 62.610s, score: 0.566.
Launching fold #2/3
train set size: 580, test set size: 292
fold#1 done in 37.846s, score: 0.651.
Launching fold #3/3
train set size: 582, test set size: 290
fold#2 done in 38.880s, score: 0.624.
Retraining classifier with whole train set...
Done in 68.253s.
Done in 207.592s.


In [5]:
# Predicting on validation set...
t0 = time()
print "Score on validation test (vote by majority): %f" % accuracy_score(train_labels, y_pred)
print classification_report(train_labels, y_pred, target_names=scenes)
print "Confusion matrix:"
print confusion_matrix(train_labels, y_pred)

print "Score on validation test (vote by proba sum): %f" % accuracy_score(train_labels, y_pred_sum )
print classification_report(train_labels, y_pred_sum, target_names=scenes)
print "Confusion matrix:"
print confusion_matrix(train_labels, y_pred_sum)

print "Score on validation test (vote by proba product): %f" % accuracy_score(train_labels, y_pred_prod)
print classification_report(train_labels, y_pred_prod, target_names=scenes)
print "Confusion matrix:"
print confusion_matrix(train_labels, y_pred_prod)
print "Done in %0.3fs." % (time()-t0)

Score on validation test (vote by majority): 0.613532
                  precision    recall  f1-score   support

           beach       0.79      0.51      0.62        59
             bus       0.61      0.68      0.64        59
 cafe/restaurant       0.86      0.54      0.67        57
             car       0.78      0.68      0.73        59
     city_center       0.83      0.90      0.86        59
     forest_path       0.74      0.83      0.78        60
   grocery_store       0.70      0.80      0.75        59
            home       0.62      0.34      0.44        58
         library       0.63      0.84      0.72        58
   metro_station       0.46      0.57      0.51        56
          office       0.67      0.68      0.68        60
            park       0.28      0.21      0.24        58
residential_area       0.36      0.53      0.43        59
           train       0.57      0.22      0.32        55
            tram       0.51      0.84      0.63        56

     avg / total

In [6]:
y_test_pred, y_test_pred_sum, y_test_pred_prod = clf_predict(clf, test_files)
np.savetxt('y_test_pred_mfcc_mlp.txt', y_test_pred, fmt='%d')
np.savetxt('y_test_pred_mfcc_mlp_sum.txt', y_test_pred_sum, fmt='%d')
np.savetxt('y_test_pred_mfcc_mlp_prod.txt', y_test_pred_prod, fmt='%d')
print y_test_pred

[10  8  3  7 14  5  7  0  3  7 10 12  6 14 12  5 12  8  0  8  8  6  7  7 11
 12  2  7  2  3 14 14 10 12  8  5  3  3  0  4  7  0  0  7  6 11  0  6  3  8
  0  6 11 12 12 10 11  6  5 11  7  4  0 11  2 14  9  7  3  4  0 14 11  7 14
  2  1  9  1 10 12  0  4  2  4  3 14 13  1  9 12 14 10  6  1  1  7 10  5  0
  1  1  6  6 10  0  7  5  5 14  4 12 14  2  3  0  0  9  9 10 13  3  4  9  1
  1 14 12  4  5 14  7  1 10  2  2 11 12 12 14 12  5 12  9  3  3  9  7  1 11
  7  3 14 13  9 13 12  0 14  6  7  4 11  9 14  1  9  5 10  7 14 13  0 12 11
  0  8  7 12  5  5  6  4  2  5  2  4 14  7  9 14  7  4 11  9  3  9  2  8  0
  1 10 14  3 11  0  4 14  0  4  9  6  6 10 11  2  6 14  2 14  2  7  3  2 11
 12 13  1  2  2  9 11 12 10  4 10 11  5  4  7  8  7  3 12  9  5  4  7  0  1
  4  2 14  6 10  5 14  5  1  8 10  7 12  7 11 12  5 12 14  4 10  3  6 14  4
 11  2 14  8  7  9  5  3 14  2  6 10 14  4  0  9  7  0  9  8  9  5 12]
