In [1]:
import pandas as pd
from preprocess import Audio_Processor
import data_utils as du
from sklearn import metrics
from keras.wrappers.scikit_learn import KerasClassifier
import snn_classifier as snn_clas

Using TensorFlow backend.


In [2]:
import warnings
warnings.filterwarnings('ignore')

# Setup

In [3]:
path_to_db='../ESC-50/'
ps = Audio_Processor(path_to_db + 'audio/')

# Load Dataset
Here we load the csv that describes each file in the dataset. We add a high level category that is defined in the ESC-50 documentation. This we realize is anthetical to true training, it is a stopgap for when we use NLP to classify tags into these categories.

In [4]:
dataset = pd.read_csv(path_to_db + 'meta/esc50.csv')
classes = [None] * 50
h_classes = ['animal', 'nature', 'human', 'domestic', 'urban']

In [5]:
dataset['h_category'] = None
for index, row in dataset.iterrows():
    target = row['target']
    classes[target] = row['category']
    if target < 10:
        dataset.loc[index, 'h_category'] = 0
    elif target < 20:
        dataset.loc[index, 'h_category'] = 1
    elif target < 30:
        dataset.loc[index, 'h_category'] = 2
    elif target < 40:
        dataset.loc[index, 'h_category'] = 3
    elif target < 50:
        dataset.loc[index, 'h_category'] = 4

In [6]:
dataset.head()

Unnamed: 0,filename,fold,target,category,esc10,src_file,take,h_category
0,1-100032-A-0.wav,1,0,dog,True,100032,A,0
1,1-100038-A-14.wav,1,14,chirping_birds,False,100038,A,1
2,1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A,3
3,1-100210-B-36.wav,1,36,vacuum_cleaner,False,100210,B,3
4,1-101296-A-19.wav,1,19,thunderstorm,False,101296,A,1


## Getting Preprocessed Data
We allow for previously preprocessed data to be retrieved for faster training turnaround. If the fold has been preprocessed, it is loaded but if not it is processed and saved.

In [7]:
c_data = pd.concat([ps.preprocess_fold(fld, dataset) for fld in range(1,6)], ignore_index=True)

In [8]:
c_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,29,30,31,32,33,34,35,36,37,target
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.323366,5.238711,0.887285,-0.479714,-2.101468,-2.279721,-0.546664,0.781288,2.636188,0
1,83.621621,-50.48301,-31.115186,-3.566867,-19.398316,-14.354486,-4.658161,-14.146847,-16.949019,-1.723476,...,2.323366,5.238711,0.887285,-0.479714,-2.101468,-2.279721,-0.546664,0.781288,2.636188,0
2,109.848331,-110.08382,-54.56892,-30.153585,-58.98546,-5.598281,-5.862913,17.490401,12.544187,13.053928,...,2.323366,5.238711,0.887285,-0.479714,-2.101468,-2.279721,-0.546664,0.781288,2.636188,0
3,77.69867,-123.925469,-64.058169,-27.170063,-57.60693,-9.980594,6.718248,21.293678,15.17349,-1.113725,...,2.323366,5.238711,0.887285,-0.479714,-2.101468,-2.279721,-0.546664,0.781288,2.636188,0
4,58.790968,-133.719666,-76.282456,-25.140746,-54.37115,-13.053846,14.737484,17.361937,12.619366,-8.285872,...,2.323366,5.238711,0.887285,-0.479714,-2.101468,-2.279721,-0.546664,0.781288,2.636188,0


In [9]:
n_categories = c_data.target.unique().size

In [10]:
c_data, scalar = du.normalize_data(c_data, 'target')
train, test = du.split_training_test(c_data, 0.2)
train_X = train.drop(columns=['target'])
train_y = train.target
test_X = test.drop(columns=['target'])
test_y = test.target

## High-Level Shallow Nets
Train binary shallow nets for high level categories(animals, natural, human, domestic, urban)

In [11]:
hl_shallow = {}
for i in range(0,5):
    start_i = i * 10
    end_i = ((i + 1) * 10) - 1
#     Get equally distributed set of positive and negative instances
    xx, yy = du.balanced_supersample(train_X, train_y.between(start_i, end_i))
    clas = KerasClassifier(build_fn=snn_clas.create_baseline, epochs=20, batch_size=128, verbose=1)
    clas.fit(xx, yy)
    clas.score(test_X, test_y.between(start_i,end_i))
    hl_shallow[h_classes[i]] = clas

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 38)                1482      
_________________________________________________________________
dropout_1 (Dropout)          (None, 38)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 39        
Total params: 1,521
Trainable params: 1,521
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


ValueError: The model is not configured to compute accuracy. You should pass `metrics=["accuracy"]` to the `model.compile()` method.

In [None]:
from classification_plots import plot_confusion_matrix
import matplotlib.pyplot as plt
for i in range(0,5):
    start_i = i * 10
    end_i = ((i + 1) * 10) - 1
    pred = list(hl_shallow[h_classes[i]].predict(test_X))
    print(str(h_classes[i]) + ": " + 
          str(metrics.accuracy_score(test_y.between(start_i, end_i), pred)))
    
    cm = metrics.confusion_matrix(test_y.between(start_i, end_i), pred)
    plot_confusion_matrix(cm, ['False', 'True'])
    plt.show()

# Low-Level Deep Net

In [None]:
import reduced_dnn_classifier as dnn_clas
hl_deep = {}
for i in range(0,5):
    start_i = i * 10
    end_i = ((i + 1) * 10) - 1
    tmp = train_y.between(start_i, end_i)
    this_train_y = train_y[tmp] - start_i
    this_train_X = train_X[tmp]
    tmp = test_y.between(start_i, end_i)
    this_test_y = test_y[tmp] - start_i
    this_test_X = test_X[tmp]
    clas = KerasClassifier(build_fn=dnn_clas.create_baseline, epochs=20, batch_size=128, verbose=1)
    clas.fit(this_train_X, this_train_y)
    clas.score(this_test_X, this_test_y)
    hl_deep[h_classes[i]] = clas

## Scoring
Here we get the log likelihood of the categories when matched with its training data

In [None]:
import numpy as np
for i in range(0,5):
    start_i = i * 10
    end_i = ((i + 1) * 10) - 1
    tmp = test_y.between(start_i, end_i)
    this_test_y = test_y[tmp] - start_i
    this_test_X = test_X[tmp]
    pred = list(hl_deep[h_classes[i]].predict(this_test_X))
    print(str(h_classes[i]) + ": " + 
          str(metrics.accuracy_score(this_test_y, pred)))
    
    cm = metrics.confusion_matrix(test_y[tmp], np.array(pred) + start_i)
    plot_confusion_matrix(cm, classes[start_i:end_i])
    plt.show()

## Full Evaluation
We combine the classifiers to determine overall performance

In [None]:
res_shallow_y = [None] * 5
res_deep_y = [None] * 5
for i in range(0,5):
    res_shallow_y[i] = list(hl_shallow[h_classes[i]].predict(test_X))
    res_deep_y[i] = list(hl_deep[h_classes[i]].predict(test_X))

In [None]:
print(res_shallow_y)

In [None]:
print(res_deep_y)

In [None]:
res_y = [0] * len(res_shallow_y[0])
for i in range(0, len(res_y)):
    if res_shallow_y[0][i] == 1:
        res_y[i] = res_deep_y[0][i]
    elif res_shallow_y[1][i] == 1:
        res_y[i] = res_deep_y[1][i] + 10
    elif res_shallow_y[2][i] == 1:
        res_y[i] = res_deep_y[2][i] + 20
    elif res_shallow_y[3][i] == 1:
        res_y[i] = res_deep_y[3][i] + 30
    elif res_shallow_y[4][i] == 1:
        res_y[i] = res_deep_y[4][i] + 40

In [None]:
print(res_y)

In [None]:
from classification_plots import plot_confusion_matrix
cm = metrics.confusion_matrix(test_y, res_y)

In [None]:
print(str("Accuracy: " + 
      str(metrics.accuracy_score(test_y, res_y))))

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(25,25))
plot_confusion_matrix(cm, classes)

## Basic Probability of File Category
We run our classifier over the file's audio frames and use basic averages to determine the probability of it belonging to a class. (Should in future have classifier give probability scores for each category for each frame but... thats future work)

In [None]:
print(classes)

In [None]:
from collections import Counter
def file_probability(filename, clas, preprocessor, class_list):
    test_file = pd.DataFrame(preprocessor.preprocess(filename))
    predictions = [class_list[i] for i in list(clas.predict(test_file))]
    sns.countplot(predictions)
    c = Counter(predictions)
    return [(i, c[i] / len(predictions) * 100.0) for i, count in c.most_common()]

In [None]:
plt.figure(figsize=(10,10))
file = dataset.filename.sample(n=1).values[0]
file_probability(file, clas, preprocess, classes)

In [None]:
dataset[dataset.filename == file]