# Incremental Evaluation

## 1 - Load the data

In [1]:
# Imports

import json
import os
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

# Be sure to set this after downloading the dataset!
DATA_ROOT = 'C:\openmic-2018\openmic-2018'

if not os.path.exists(DATA_ROOT):
    raise ValueError('Did you forget to set `DATA_ROOT`?')


In [2]:
# Loading the data
OPENMIC = np.load(os.path.join(DATA_ROOT, 'openmic-2018.npz'), allow_pickle=True)

# Make direct variable names for everything
X, Y_true, Y_mask, sample_key = OPENMIC['X'], OPENMIC['Y_true'], OPENMIC['Y_mask'], OPENMIC['sample_key']


In [3]:
# Map class indices to names
with open(os.path.join(DATA_ROOT, 'class-map.json'), 'r') as f:
    class_map = json.load(f)

In [4]:
class_map

{'accordion': 0,
 'banjo': 1,
 'bass': 2,
 'cello': 3,
 'clarinet': 4,
 'cymbals': 5,
 'drums': 6,
 'flute': 7,
 'guitar': 8,
 'mallet_percussion': 9,
 'mandolin': 10,
 'organ': 11,
 'piano': 12,
 'saxophone': 13,
 'synthesizer': 14,
 'trombone': 15,
 'trumpet': 16,
 'ukulele': 17,
 'violin': 18,
 'voice': 19}

## 2 - Load OpenMIC's train-test splits

In [5]:
# Let's split the data into the training and test set
# We use squeeze=True here to return a single array for each, rather than a full DataFrame

split_train = pd.read_csv(os.path.join(DATA_ROOT, 'partitions/split01_train.csv'), 
                          header=None).squeeze("columns")
split_test = pd.read_csv(os.path.join(DATA_ROOT, 'partitions/split01_test.csv'), 
                         header=None).squeeze("columns")

# Create partition CSV for unlabeled
split_unlabeled = pd.read_csv(os.path.join(DATA_ROOT, 'partitions/split01_unlabeled.csv'), 
                         header=None).squeeze("columns")

In [6]:
# How many train and test examples do we have?  About 75%/25%
print('# Train: {},  # Test: {}, # Unlabeled: {}'.format(len(split_train), len(split_test), len(split_unlabeled)))

# Train: 9993,  # Test: 5085, # Unlabeled: 4922


In [7]:
train_set = set(split_train)
test_set = set(split_test)
unlabeled_set = set(split_unlabeled)

In [8]:
# Split the data into arrays

idx_train, idx_test, idx_unlabeled = [], [], []

for idx, n in enumerate(sample_key):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    elif n in unlabeled_set:
        idx_unlabeled.append(idx)
    else:
        raise RuntimeError('Unknown sample key={}! Abort!'.format(sample_key[n]))

# Cast the idx_* arrays to numpy structures
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)
idx_unlabeled = np.asarray(idx_unlabeled)

In [9]:
# Finally, we use the split indices to partition the features, labels, and masks
X_train = X[idx_train]
X_test = X[idx_test]
X_unlabeled = X[idx_unlabeled]

Y_true_train = Y_true[idx_train]
Y_true_test = Y_true[idx_test]
Y_true_unlabeled = Y_true[idx_unlabeled]

Y_mask_train = Y_mask[idx_train]
Y_mask_test = Y_mask[idx_test]
Y_mask_unlabeled = Y_mask[idx_unlabeled]

In [10]:
# Validate shapes of slices
print(X_train.shape)
print(X_test.shape)
print(X_unlabeled.shape)

(9993, 10, 128)
(5085, 10, 128)
(4922, 10, 128)


## 3 - Fit the models

In [11]:
models = dict()

for instrument in class_map:

    # get column num from instrument name
    inst_num = class_map[instrument]

    # isolate data that has been labeled as this instrument
    train_inst = Y_mask_train[:, inst_num]
    test_inst = Y_mask_test[:, inst_num]

    # gets training data with labels for this instrument
    X_train_inst = X_train[train_inst]

    # averages features over time
    X_train_inst_sklearn = np.mean(X_train_inst, axis=1)

    # labels instrument as present if value over 0.5
    Y_true_train_inst = Y_true_train[train_inst, inst_num] >= 0.5

    # Repeat slicing for test
    X_test_inst = X_test[test_inst]
    X_test_inst_sklearn = np.mean(X_test_inst, axis=1)
    Y_true_test_inst = Y_true_test[test_inst, inst_num] >= 0.5

    # Initialize a new classifier
    rfc = RandomForestClassifier(max_depth=8, n_estimators=100, random_state=0)
    knn = KNeighborsClassifier(n_neighbors=3)

    # Fit model
    rfc.fit(X_train_inst_sklearn, Y_true_train_inst)
    knn.fit(X_train_inst_sklearn, Y_true_train_inst)

    # Evaluate the model
    Y_pred_train_rfc = rfc.predict(X_train_inst_sklearn)
    Y_pred_test_rfc = rfc.predict(X_test_inst_sklearn)

    Y_pred_train_knn = knn.predict(X_train_inst_sklearn)
    Y_pred_test_knn = knn.predict(X_test_inst_sklearn)

    print('-' * 52)
    print(instrument)
    print('\tTRAIN RFC')
    print(classification_report(Y_true_train_inst, Y_pred_train_rfc))
    print('\tTEST RFC')
    print(classification_report(Y_true_test_inst, Y_pred_test_rfc))
    print('\tTRAIN knn')
    print(classification_report(Y_true_train_inst, Y_pred_train_knn))
    print('\tTEST knn')
    print(classification_report(Y_true_test_inst, Y_pred_test_knn))
    
    # Store the classifier in our dictionary
    models[instrument] = [rfc, knn]

----------------------------------------------------
accordion
	TRAIN RFC
              precision    recall  f1-score   support

       False       0.98      1.00      0.99       772
        True       1.00      0.93      0.96       231

    accuracy                           0.98      1003
   macro avg       0.99      0.96      0.98      1003
weighted avg       0.98      0.98      0.98      1003

	TEST RFC
              precision    recall  f1-score   support

       False       0.84      0.97      0.90       423
        True       0.72      0.31      0.44       115

    accuracy                           0.83       538
   macro avg       0.78      0.64      0.67       538
weighted avg       0.81      0.83      0.80       538

	TRAIN knn
              precision    recall  f1-score   support

       False       0.92      0.95      0.93       772
        True       0.82      0.71      0.76       231

    accuracy                           0.90      1003
   macro avg       0.87      0.83

## Applying model to new data

### This is where the algorithmic disagreement process will commence

In [12]:
# We need soundfile to load audio data
import soundfile as sf
import sys

# And the openmic-vggish preprocessor
sys.path.append('../openmic/')
from openmic.vggish import *
# test

# For audio playback
from IPython.display import Audio



In [63]:
trackUncertScores = {}  # dictionary of all tracks which will contain the average differences
instrumentDiffs = {}    # this keeps record of all the instruments disagreed upon
allInstProbs = {}

for i in range(50):    # TODO: Change to full unlabeled set when ready
    track = X_unlabeled[i]
    feature_mean = np.mean(track, axis=0, keepdims=True)

    instrDict = {}
    allInsts = {}

    # Predict for each instrument
    for instrument in models:
        rfc = models[instrument][0]
        knn = models[instrument][1]

        rfcPred = rfc.predict_proba(feature_mean)[0,1]
        knnPred = knn.predict_proba(feature_mean)[0,1]

        allInsts[instrument] = [rfcPred, knnPred]

        # Check if the models agree that the instrument is present or not. 
        # A score of over 0.5 indicates the instrument is present
        if round(rfcPred) != round(knnPred):
            instrDict[instrument] = [rfcPred, knnPred]

            if trackUncertScores.get(i):
                trackUncertScores[i] = trackUncertScores[i] + abs(rfcPred - knnPred)
            else: 
                trackUncertScores[i] = abs(rfcPred - knnPred)

            # print('P[{:18s}=1] = RF: {:.3f}, KNN: {:.3f}'.format(instrument, rfc.predict_proba(feature_mean)[0,1], knn.predict_proba(feature_mean)[0,1]))

    # save the instrument differences 
    instrumentDiffs[i] = instrDict
    allInstProbs[i] = allInsts

    # average the differences to get uncertainty score
    if trackUncertScores.get(i):
        trackUncertScores[i] = trackUncertScores[i] / len(instrDict)
        # print(trackUncertScores[i], "/", len(instrDict))



In [64]:
# Sort the dictionary to get the highest uncertainty score
sorted_dict = dict(sorted(trackUncertScores.items(), key=lambda item:item[1], reverse=True))

In [65]:
# get highest track
trackidx = list(sorted_dict.items())[0][0]

highestTrack = X_unlabeled[trackidx]


In [66]:
trackID = split_unlabeled[trackidx]
print(trackID)


000473_138240


Below is from original

In [67]:
# We include a test ogg file in the openmic repository, which we can use here.
audio, rate = sf.read(os.path.join(DATA_ROOT, 'audio/000/000473_138240.ogg'))

time_points, features = waveform_to_features(audio, rate)



INFO:tensorflow:Restoring parameters from c:\Users\18607\Github\inc-eval-openmic\openmic\vggish\_model\vggish_model.ckpt


In [68]:
# Let's listen to the example
Audio(data=audio.T, rate=rate)

In [69]:
instrumentDiffs.get(trackidx)

{'accordion': [0.09218509921908369, 0.6666666666666666],
 'cymbals': [0.3669606123255063, 1.0],
 'flute': [0.2749067962781873, 0.6666666666666666],
 'guitar': [0.261933514726357, 1.0],
 'saxophone': [0.4011621905735017, 1.0],
 'synthesizer': [0.18985176341524937, 1.0]}

In [70]:
allInstProbs.get(trackidx)

{'accordion': [0.09218509921908369, 0.6666666666666666],
 'banjo': [0.3356600971172971, 0.3333333333333333],
 'bass': [0.17588881705465742, 0.3333333333333333],
 'cello': [0.2512680628907145, 0.3333333333333333],
 'clarinet': [0.17680841354152455, 0.0],
 'cymbals': [0.3669606123255063, 1.0],
 'drums': [0.4781021440953075, 0.3333333333333333],
 'flute': [0.2749067962781873, 0.6666666666666666],
 'guitar': [0.261933514726357, 1.0],
 'mallet_percussion': [0.5848769860119273, 0.6666666666666666],
 'mandolin': [0.3003631696242374, 0.3333333333333333],
 'organ': [0.07364822108088392, 0.0],
 'piano': [0.5022898504882792, 1.0],
 'saxophone': [0.4011621905735017, 1.0],
 'synthesizer': [0.18985176341524937, 1.0],
 'trombone': [0.23522444344019106, 0.3333333333333333],
 'trumpet': [0.1534626842527355, 0.3333333333333333],
 'ukulele': [0.22046737052008436, 0.3333333333333333],
 'violin': [0.456517700624774, 0.3333333333333333],
 'voice': [0.541003079686212, 0.6666666666666666]}

Update
* Algorithmic disagreement is largely done

To Do:
* Refactor listening to grab audio algorithmically
* Simulate annotation process