In [1]:
import pandas as pd
from preprocess import Audio_Processor
from sklearn import metrics
from classification_plots import plot_confusion_matrix, plot_learning_curve
import matplotlib.pyplot as plt
import os
import numpy as np
from sklearn.ensemble import RandomForestClassifier

Using TensorFlow backend.


Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.


  return f(*args, **kwds)


In [2]:
# Able to specify which GPU to use
os.environ["CUDA_VISIBLE_DEVICES"]="1"

SR = 44100
blocksize = 88200
overlap = 22050

In [3]:
import warnings
warnings.filterwarnings('ignore')

# Setup

In [4]:
path_to_db = '../../FSDKaggle2018/'

In [5]:
ps = Audio_Processor(path_to_db + 'train/')

# Load Dataset
Here we load the csv that describes each file in the dataset. We add a high level category that is defined in the ESC-50 documentation. This we realize is anthetical to true training, it is a stopgap for when we use NLP to classify tags into these categories.

In [None]:
dataset = pd.read_csv(path_to_db + 'meta/train.csv')
classes = dataset.label.unique()
dataset = dataset.drop(['manually_verified', 'freesound_id', 'license'], axis=1)
print(dataset.head())

          fname         label
0  00044347.wav        Hi-hat
1  001ca53d.wav     Saxophone
2  002d256b.wav       Trumpet
3  0033e230.wav  Glockenspiel
4  00353774.wav         Cello


In [None]:
h_classes = ['Human & Animal', 'Interacting Materials', 'Musical Instruments']
mapping = {'Hi-hat': h_classes[2],'Saxophone': h_classes[2],'Trumpet': h_classes[2],
           'Glockenspiel': h_classes[2],'Cello': h_classes[2],'Knock': h_classes[1],
           'Gunshot_or_gunfire': h_classes[1],'Clarinet': h_classes[2],'Computer_keyboard': h_classes[1],
           'Keys_jangling': h_classes[1],'Snare_drum': h_classes[2],'Writing': h_classes[1],
           'Laughter': h_classes[0],'Tearing': h_classes[1],'Fart': h_classes[1],'Oboe': h_classes[2],
           'Flute': h_classes[2],'Cough': h_classes[0], 'Telephone': h_classes[1], 'Bark': h_classes[0],
           'Chime': h_classes[2],'Bass_drum': h_classes[2],'Bus': h_classes[1], 'Squeak': h_classes[0],
           'Scissors': h_classes[1],'Harmonica': h_classes[2],'Gong': h_classes[2],'Microwave_oven': h_classes[1],
           'Burping_or_eructation': h_classes[0],'Double_bass': h_classes[2],'Shatter': h_classes[1],
           'Fireworks': h_classes[1], 'Tambourine': h_classes[2], 'Cowbell': h_classes[2], 'Electric_piano': h_classes[2],
           'Meow': h_classes[0], 'Drawer_open_or_close': h_classes[1], 'Applause': h_classes[1], 'Acoustic_guitar': h_classes[2],
           'Violin_or_fiddle': h_classes[2], 'Finger_snapping': h_classes[1]
            }

In [None]:
from data_utils import enumerate_strings
dataset['h_category'] = None
for index, row in dataset.iterrows():
    row['target'] = np.where(classes == row['label'])
    dataset.loc[index, 'h_category'] = mapping[row['label']]
dataset.columns = ['filename', 'target', 'h_category']

In [None]:
enum_map = enumerate_strings(dataset, ['filename'])

In [None]:
dataset.head()

Unnamed: 0,filename,target,h_category
0,00044347.wav,23,2
1,001ca53d.wav,30,2
2,002d256b.wav,38,2
3,0033e230.wav,19,2
4,00353774.wav,6,2


## Getting Preprocessed Data
We allow for previously preprocessed data to be retrieved for faster training turnaround. If the fold has been preprocessed, it is loaded but if not it is processed and saved.

In [None]:
df = ps.preprocess_fold(dataset, 
                        kind='mfcc', 
                        fld=None, 
                        blocksize=blocksize, 
                        overlap=overlap)

In [None]:
df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,106,107,108,109,110,111,112,113,l_target,h_target
count,44810.0,44810.0,44810.0,44810.0,44810.0,44810.0,44810.0,44810.0,44810.0,44810.0,...,44810.0,44810.0,44810.0,44810.0,44810.0,44810.0,44810.0,44810.0,44810.0,44810.0
mean,52.147663,49.042768,34.044342,28.592786,25.257585,23.30937,21.786458,21.422385,20.47207,19.808558,...,-0.004034,0.002897,-0.004107,0.000633,-0.002784654,0.000304,-0.001229,0.000227,1.288976,19.323522
std,31.098928,29.891698,17.780059,14.404772,12.171209,11.246079,10.363928,10.054391,9.60932,9.396413,...,0.033203,0.031584,0.030881,0.030562,0.02981827,0.029159,0.028121,0.027795,0.726143,12.799254
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.187329,-0.178769,-0.174579,-0.192774,-0.1731542,-0.175207,-0.18134,-0.179204,0.0,0.0
25%,28.446815,25.563715,21.444611,18.701112,17.265588,15.84898,15.049056,14.967373,14.264389,13.772265,...,-0.019268,-0.011237,-0.01763,-0.012756,-0.01623025,-0.013106,-0.014336,-0.01269,1.0,8.0
50%,49.651886,45.817881,34.123636,28.947119,25.885107,23.791175,22.283264,21.990376,20.943076,20.238885,...,-0.000238,0.000277,-0.000323,0.0,-4.63025e-09,0.0,0.0,0.0,1.0,18.0
75%,73.039497,70.176756,46.170658,38.628279,33.838917,31.265005,28.909249,28.349496,26.969666,26.177079,...,0.012015,0.018537,0.011232,0.015854,0.01171952,0.014751,0.012433,0.014088,2.0,31.0
max,184.837936,160.923843,104.124741,106.353622,96.328018,100.486008,84.271973,86.573502,72.514648,75.306274,...,0.172847,0.185924,0.161228,0.171234,0.1726877,0.186002,0.167953,0.172679,2.0,40.0


In [None]:
from sklearn.model_selection import train_test_split
train, test, = train_test_split(
                        df, test_size=0.20, random_state=125)

In [None]:
from sklearn.preprocessing import normalize

train_X = normalize(train.drop(['l_target', 'h_target'], axis=1))
train_y = train['l_target']
test_X = normalize(test.drop(['l_target', 'h_target'], axis=1))
test_y = test['l_target']

# Shallow Net
Separate into classes

In [None]:
from sklearn.model_selection import GridSearchCV
params = {
          'n_estimators': range(5,25,1),
          'criterion': ('gini', 'entropy')
         }

rfc = RandomForestClassifier()

clf = GridSearchCV(rfc, params, cv=5)

clf.fit(train_X, train_y)

In [None]:
pred = clf.predict(test_X)
print(metrics.accuracy_score(test_y, pred))
cm = metrics.confusion_matrix(test_y, pred)
plt.figure(figsize=(10,10))
plot_confusion_matrix(cm, h_classes)
plt.show()

# All Data

In [None]:
all_df = ps.preprocess_fold(dataset, 
                        kind='mfcc', 
                        fld=1, 
                        blocksize=blocksize, 
                        overlap=overlap)

In [None]:
X = all_df.drop(['l_target', 'h_target'], axis=1)
y = all_df['l_target']

In [None]:
pred = anim.predict(X)
print(metrics.accuracy_score(y, pred))
cm = metrics.confusion_matrix(y, pred)
plt.figure(figsize=(20,20))
plot_confusion_matrix(cm, h_classes)
plt.show()