In [1]:
# Import libraries necessary for this project
import numpy as np
import pandas as pd
from time import time
from IPython.display import display # Allows the use of display() for DataFrames

# Pretty display for notebooks
%matplotlib inline
random_seed = 777

# Paths
data_path = '../data'
train = pd.read_csv(data_path + "/train_noisy.csv")

# Success - Display the first record
display(train.head(n=1))

Unnamed: 0,fname,labels
0,00097e21.wav,Bathtub_(filling_or_washing)


In [2]:
# Import train_test_split
from sklearn.cross_validation import train_test_split

# Split the 'features' and 'income' data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(train['fname'], train['labels'], test_size = 0.2, random_state = 0)

# Show the results of the split
print("Training set has {} samples.".format(X_train.shape[0]))
print("Testing set has {} samples.".format(X_test.shape[0]))

Training set has 15852 samples.
Testing set has 3963 samples.




In [3]:
import matplotlib.pyplot as pl
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd
from time import time
from sklearn.metrics import f1_score, accuracy_score

def evaluate(results, accuracy, f1):
    """
    Visualization code to display results of various learners.
    
    inputs:
      - learners: a list of supervised learners
      - stats: a list of dictionaries of the statistic results from 'train_predict()'
      - accuracy: The score for the naive predictor
      - f1: The score for the naive predictor
    """
  
    # Create figure
    fig, ax = pl.subplots(2, 3, figsize = (11,7))

    # Constants
    bar_width = 0.3
    colors = ['#A00000','#00A0A0','#00A000']
    
    # Super loop to plot four panels of data
    for k, learner in enumerate(results.keys()):
        for j, metric in enumerate(['train_time', 'acc_train', 'f_train', 'pred_time', 'acc_test', 'f_test']):
            for i in np.arange(3):
                
                # Creative plot code
                ax[j//3, j%3].bar(i+k*bar_width, results[learner][i][metric], width = bar_width, color = colors[k])
                ax[j//3, j%3].set_xticks([0.45, 1.45, 2.45])
                ax[j//3, j%3].set_xticklabels(["1%", "10%", "100%"])
                ax[j//3, j%3].set_xlabel("Training Set Size")
                ax[j//3, j%3].set_xlim((-0.1, 3.0))
    
    # Add unique y-labels
    ax[0, 0].set_ylabel("Time (in seconds)")
    ax[0, 1].set_ylabel("Accuracy Score")
    ax[0, 2].set_ylabel("F-score")
    ax[1, 0].set_ylabel("Time (in seconds)")
    ax[1, 1].set_ylabel("Accuracy Score")
    ax[1, 2].set_ylabel("F-score")
    
    # Add titles
    ax[0, 0].set_title("Model Training")
    ax[0, 1].set_title("Accuracy Score on Training Subset")
    ax[0, 2].set_title("F-score on Training Subset")
    ax[1, 0].set_title("Model Predicting")
    ax[1, 1].set_title("Accuracy Score on Testing Set")
    ax[1, 2].set_title("F-score on Testing Set")
    
    # Add horizontal lines for naive predictors
    ax[0, 1].axhline(y = accuracy, xmin = -0.1, xmax = 3.0, linewidth = 1, color = 'k', linestyle = 'dashed')
    ax[1, 1].axhline(y = accuracy, xmin = -0.1, xmax = 3.0, linewidth = 1, color = 'k', linestyle = 'dashed')
    ax[0, 2].axhline(y = f1, xmin = -0.1, xmax = 3.0, linewidth = 1, color = 'k', linestyle = 'dashed')
    ax[1, 2].axhline(y = f1, xmin = -0.1, xmax = 3.0, linewidth = 1, color = 'k', linestyle = 'dashed')
    
    # Set y-limits for score panels
    ax[0, 1].set_ylim((0, 1))
    ax[0, 2].set_ylim((0, 1))
    ax[1, 1].set_ylim((0, 1))
    ax[1, 2].set_ylim((0, 1))

    # Create patches for the legend
    patches = []
    for i, learner in enumerate(results.keys()):
        patches.append(mpatches.Patch(color = colors[i], label = learner))
    pl.legend(handles = patches, bbox_to_anchor = (-.80, 2.53), \
               loc = 'upper center', borderaxespad = 0., ncol = 3, fontsize = 'x-large')
    
    # Aesthetics
    pl.suptitle("Performance Metrics for Three Supervised Learning Models", fontsize = 16, y = 1.10)
    pl.tight_layout()
    pl.show()
    

In [4]:
class Config(object):
    def __init__(self,
                 sampling_rate=16000,
                 audio_duration=2, 
                 n_classes=80,
                 use_mfcc=False,
                 n_folds=10,
                 learning_rate=0.0001, 
                 max_epochs=50,
                 n_mfcc=20):
        self.sampling_rate = sampling_rate
        self.audio_duration = audio_duration
        self.n_classes = n_classes
        self.use_mfcc = use_mfcc
        self.n_mfcc = n_mfcc
        self.n_folds = n_folds
        self.learning_rate = learning_rate
        self.max_epochs = max_epochs

        self.audio_length = self.sampling_rate * self.audio_duration
        if self.use_mfcc:
            self.dim = (self.n_mfcc, 1 + int(np.floor(self.audio_length/512)), 1)
        else:
            self.dim = (self.audio_length, 1)
            
config = Config(sampling_rate=16000, audio_duration=2, n_folds=10, learning_rate=0.001)

In [5]:
import librosa

def audio_norm(data):
    max_data = np.max(data)
    min_data = np.min(data)
    data = (data-min_data)/(max_data-min_data+1e-6)
    return data - 0.5

def open_audio(fn):
    data, _ = librosa.core.load(data_path + '/train_noisy/' + fn, sr=config.sampling_rate, res_type='kaiser_fast')
    # Random offset / Padding
    input_length = config.audio_length
    if len(data) > input_length:
        max_offset = len(data) - input_length
        offset = np.random.randint(max_offset)
        data = data[offset:(input_length+offset)]
    else:
        if input_length > len(data):
            max_offset = input_length - len(data)
            offset = np.random.randint(max_offset)
        else:
            offset = 0
        data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
    return audio_norm(data)[:, np.newaxis]

In [6]:
# open_audio('0006ae4e.wav')

In [7]:
from sklearn.metrics import accuracy_score, fbeta_score

def train_predict(learner, sample_size, X_train, y_train, X_test, y_test): 
    results = {}
    
    start = time()
    file_names_train = X_train[:sample_size]
    file_names_test = X_test[:sample_size]
    audio_samples_train = np.array([open_audio(fn) for fn in file_names_train])
    audio_samples_test = np.array([open_audio(fn) for fn in file_names_test])
    
    nsamples, nx, ny = audio_samples_train.shape
    d2_audio_samples_train = audio_samples_train.reshape((nsamples,nx*ny))
    d2_audio_samples_test = audio_samples_test.reshape((nsamples,nx*ny))
    
    learner = learner.fit(d2_audio_samples_train, y_train[:sample_size])
    end = time()
    
    results['train_time'] = end - start
    start = time()
    predictions_test = learner.predict(d2_audio_samples_test)
    predictions_train = learner.predict(d2_audio_samples_train)
    end = time() 
    results['pred_time'] = end - start
    results['acc_train'] = accuracy_score(y_train[:sample_size], predictions_train)
    results['acc_test'] = accuracy_score(y_test[:sample_size], predictions_test)
    results['f_train'] = fbeta_score(y_train[:sample_size], predictions_train, beta=0.5, average=None)
    results['f_test'] = fbeta_score(y_test[:sample_size], predictions_test, beta=0.5, average=None)
    print("{} trained on {} samples.".format(learner.__class__.__name__, sample_size))
    return results

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

random_seed = 777
clf_A = RandomForestClassifier(random_state=random_seed)

samples_1 = int(X_train.shape[0] * .1)

# Collect results on the learners
results = {}
for clf in [clf_A]:
    clf_name = clf.__class__.__name__
    results[clf_name] = {}
    for i, samples in enumerate([samples_1]):
        results[clf_name][i] = train_predict(clf, samples, X_train, y_train, X_test, y_test)

RandomForestClassifier trained on 1585 samples.


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [10]:
results['RandomForestClassifier']

{0: {'train_time': 504.5403187274933,
  'pred_time': 0.1318514347076416,
  'acc_train': 0.9949526813880126,
  'acc_test': 0.012618296529968454,
  'f_train': array([1.        , 1.        , 1.        , 1.        , 0.96899225,
         1.        , 0.96491228, 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         0.96153846, 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 0.97014925, 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.        , 1.        , 1.        , 1.        , 1.        ,
         1.      