# Challenge SD207 - 2017
*<p>Author: Pengfei MI, Rui SONG</p>*
*<p>Date: 06/06/2017</p>*

In [1]:
import numpy as np
import scipy as sp
from scipy.stats import mode
import pandas as pd
import matplotlib.pyplot as plt
from time import time

import librosa
import librosa.display

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import BaggingClassifier

In [2]:
# Define some usefull functions
def load_sound_files(file_paths):
    raw_sounds = []
    for fp in file_paths:
        X, sr = librosa.load(fp, sr=None)
        raw_sounds.append(X)
    return raw_sounds

def extract_feature(file_name): # Late fusion
    X, sample_rate = librosa.load(file_name, sr=None)
    melspec = librosa.feature.melspectrogram(y=X, sr=sample_rate, n_mels=n_mels, n_fft=4096, hop_length=2048, power=2.0).T
    return melspec

def parse_audio_files(file_names, file_labels):
    features, labels = np.empty((0,n_mels)), np.empty(0)
    for fn, fl in zip(file_names, file_labels):
        mfccs = extract_feature(fn)
        features = np.vstack([features, mfccs])
        labels = np.append(labels, fl*np.ones(mfccs.shape[0]))
    return np.array(features), np.array(labels, dtype = np.int)

def predict_maj(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = clf.predict(x_mfccs)
        y_pred = np.append(y_pred, mode(y_predicts).mode[0])
    return np.array(y_pred, dtype = np.int)

def predict_sum(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.sum(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

def predict_prod(clf, X_test):
    y_pred = np.empty(0)
    for x in X_test:
        x_mfccs = extract_feature(x)
        y_predicts = np.prod(clf.predict_proba(x_mfccs), axis=0)
        y_pred = np.append(y_pred, np.argmax(y_predicts))
    return np.array(y_pred, dtype = np.int)

def plot_waves(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(20, 5*len(sound_names)))
    for n, f in zip(sound_names, raw_sounds):
        plt.subplot(len(sound_names), 1, i)
        librosa.display.waveplot(np.array(f), sr=16000)
        plt.title(str(n))
        i += 1
    plt.suptitle("Figure 1: Waveplot")
    plt.show()
    
def plot_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(20, 5*len(sound_names)))
    for n, f in zip(sound_names, raw_sounds):
        plt.subplot(len(sound_names), 1, i)
        plt.specgram(np.array(f), Fs=16000)
        plt.title(str(n))
        i += 1
    plt.suptitle("Figure 2: Spectrogram")
    plt.show()

def plot_log_power_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(20, 5*len(sound_names)))
    for n, f in zip(sound_names, raw_sounds):
        plt.subplot(len(sound_names), 1, i)
        D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
        librosa.display.specshow(D,x_axis='time' ,y_axis='log')
        plt.title(str(n))
        i += 1
    plt.suptitle("Figure 3: Log power spectrogram")
    plt.show()

In [3]:
# Read data and preprocessing
print "Loading files..."
t0 = time()
FILEROOT = './'

files_train = pd.read_csv('train.txt', sep='\s+', header=None)[0].values
labels = np.unique(pd.read_csv('train.txt', sep='\s+', header=None)[1])
n_labels = len(labels)
labels_train = pd.factorize(pd.read_csv('train.txt', sep='\s+', header=None)[1])[0]
files_val = pd.read_csv('dev.txt', sep='\s+', header=None)[0].values
labels_val = pd.factorize(pd.read_csv('dev.txt', sep='\s+', header=None)[1])[0]
files_test = pd.read_csv('test_files.txt', header=None)[0].values

print "Training set size: %d" % len(files_train)
print "Validation set size: %d" % len(files_val)
print "Test set size: %d" % len(files_test)
print "Done in %0.3fs." % (time()-t0)

Loading files...
Training set size: 582
Validation set size: 290
Test set size: 298
Done in 0.018s.


In [4]:
#raw_sounds = load_sound_files(files_train[:2])

#plot_waves(labels_train[:2], raw_sounds)
#plot_specgram(labels_train[:2],raw_sounds)
#plot_log_power_specgram(labels_train[:2],raw_sounds)

In [5]:
# Feature extraction
n_mels = 64
print "Extracting features..."
t0 = time()
X_train, y_train = parse_audio_files(files_train, labels_train)
print X_train.shape, y_train.shape
print "Done in %0.3fs." % (time()-t0)

Extracting features...
(136770, 64) (136770,)
Done in 39.340s.


In [6]:
# Train classifier
print "Training classifier..."
np.random.seed(42)
t0 = time()
clf = MLPClassifier(hidden_layer_sizes=(64, 128, 64), alpha=0.01)
clf.fit(X_train, y_train)
print "Done in %0.3fs." % (time()-t0)

Training classifier...
Done in 209.221s.


In [7]:
y_val_pred = predict_maj(clf, files_val)
print "Score on validation test (vote by majority): %f" % np.mean(y_val_pred == labels_val)
print classification_report(labels_val, y_val_pred, target_names=labels)

y_val_pred = predict_sum(clf, files_val)
print "Score on validation test (vote by proba sum): %f" % np.mean(y_val_pred == labels_val)
print classification_report(labels_val, y_val_pred, target_names=labels)

y_val_pred = predict_prod(clf, files_val)
print "Score on validation test (vote by proba product): %f" % np.mean(y_val_pred == labels_val)
print classification_report(labels_val, y_val_pred, target_names=labels)

Score on validation test (vote by majority): 0.520690
                  precision    recall  f1-score   support

           beach       0.57      1.00      0.72        21
             bus       0.70      0.70      0.70        20
 cafe/restaurant       0.00      0.00      0.00        19
             car       0.58      0.79      0.67        19
     city_center       0.68      1.00      0.81        19
     forest_path       0.48      0.89      0.63        18
   grocery_store       0.71      0.57      0.63        21
            home       0.14      0.17      0.15        18
         library       0.50      0.67      0.57        18
   metro_station       0.42      0.56      0.48        18
          office       1.00      0.26      0.41        23
            park       0.57      0.44      0.50        18
residential_area       0.29      0.10      0.14        21
           train       0.00      0.00      0.00        19
            tram       0.54      0.72      0.62        18

     avg / total

In [8]:
y_test_pred = predict_sum(clf, files_test)
np.savetxt('y_test_pred_mfcc_mlp.txt', y_test_pred, fmt='%d')
print y_test_pred

[ 7  8  3  7  1  5  5  8  3  7  7 12  6 14 12  5  4  8  0  8  8 14  7 11 12
 12 12  8 14  3 14  1  7 11  8  5  3  3  0  4  7  0  0  7 14 11 12  6  3  7
  0  9 11 12  0  7  8 14  5 11  7  4  8 11 12 14 14  7  3  4  0 14 11  7  9
  2  1  9  1  7 12  0  4  6  4  3 14 14 14  9 12  1  7  6  1  1  7  7  5  9
  1  1  2 14  7  3  7  7  5  1  4 12 14  4  3  0  0 14  9  7  1  3  4  8  1
  1  1 11  4  5 14  9  1  7  6 12 11 11 12 14 11  5 12  9  3  3  8  7  1  8
  9  3  3  1  9  1 12  0 14  9  7  4  8  9 14  1  9  5  7  7  1  0  0  0 11
  9  8  7 12  0  5  6  4  2  5  6  4 14  7  9  8  7  4 11  9  3  9 12  8  0
  1  7 14  3 11  0  9 14 14  4  9  6  9  7 11  4  9 14 12  1 12  7  3  4  7
  4  1  1  2  4  9 11 12  7  4  7 11  5  4  7  8  9  3 11  7  5  4  7 11  1
  4  4 14  6  7  5 14  5 14  8  7  7 11 11  8 12  7  0 14  4  7  3 14  1  4
  4  6 14  8  8  9  5  3  3  2 14  7 14  4  0  9 10  0  9  8  9  5 11]
