# Incremental Evaluation

## 1 - Load the data

In [None]:
# Imports

import json
import os
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

# Be sure to set this after downloading the dataset!
DATA_ROOT = 'C:\openmic-2018\openmic-2018'

if not os.path.exists(DATA_ROOT):
    raise ValueError('Did you forget to set `DATA_ROOT`?')


In [None]:
# Loading the data
OPENMIC = np.load(os.path.join(DATA_ROOT, 'openmic-2018.npz'), allow_pickle=True)

# Make direct variable names for everything
X, Y_true, Y_mask, sample_key = OPENMIC['X'], OPENMIC['Y_true'], OPENMIC['Y_mask'], OPENMIC['sample_key']


In [None]:
# Map class indices to names
with open(os.path.join(DATA_ROOT, 'class-map.json'), 'r') as f:
    class_map = json.load(f)

In [None]:
class_map

## 2 - Load OpenMIC's train-test splits

In [None]:
# Let's split the data into the training and test set
# We use squeeze=True here to return a single array for each, rather than a full DataFrame

split_train = pd.read_csv(os.path.join(DATA_ROOT, 'partitions/split01_train.csv'), 
                          header=None).squeeze("columns")
split_test = pd.read_csv(os.path.join(DATA_ROOT, 'partitions/split01_test.csv'), 
                         header=None).squeeze("columns")

# Create partition CSV for unlabeled
split_unlabeled = pd.read_csv(os.path.join(DATA_ROOT, 'partitions/split01_unlabeled.csv'), 
                         header=None).squeeze("columns")

In [None]:
# How many train and test examples do we have?  About 75%/25%
print('# Train: {},  # Test: {}, # Unlabeled: {}'.format(len(split_train), len(split_test), len(split_unlabeled)))

In [None]:
train_set = set(split_train)
test_set = set(split_test)
unlabeled_set = set(split_unlabeled)

In [None]:
# Split the data into arrays

idx_train, idx_test, idx_unlabeled = [], [], []

for idx, n in enumerate(sample_key):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    elif n in unlabeled_set:
        idx_unlabeled.append(idx)
    else:
        raise RuntimeError('Unknown sample key={}! Abort!'.format(sample_key[n]))

# Cast the idx_* arrays to numpy structures
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)
idx_unlabeled = np.asarray(idx_unlabeled)

In [None]:
# Finally, we use the split indices to partition the features, labels, and masks
X_train = X[idx_train]
X_test = X[idx_test]
X_unlabeled = X[idx_unlabeled]

Y_true_train = Y_true[idx_train]
Y_true_test = Y_true[idx_test]
Y_true_unlabeled = Y_true[idx_unlabeled]

Y_mask_train = Y_mask[idx_train]
Y_mask_test = Y_mask[idx_test]
Y_mask_unlabeled = Y_mask[idx_unlabeled]

In [None]:
# Validate shapes of slices
print(X_train.shape)
print(X_test.shape)
print(X_unlabeled.shape)

## 3 - Fit the models

In [None]:
models = dict()

for instrument in class_map:

    # get column num from instrument name
    inst_num = class_map[instrument]

    # isolate data that has been labeled as this instrument
    train_inst = Y_mask_train[:, inst_num]
    test_inst = Y_mask_test[:, inst_num]

    # gets training data with labels for this instrument
    X_train_inst = X_train[train_inst]

    # averages features over time
    X_train_inst_sklearn = np.mean(X_train_inst, axis=1)

    # labels instrument as present if value over 0.5
    Y_true_train_inst = Y_true_train[train_inst, inst_num] >= 0.5

    # Repeat slicing for test
    X_test_inst = X_test[test_inst]
    X_test_inst_sklearn = np.mean(X_test_inst, axis=1)
    Y_true_test_inst = Y_true_test[test_inst, inst_num] >= 0.5

    # Initialize a new classifier
    rfc = RandomForestClassifier(max_depth=8, n_estimators=100, random_state=0)
    knn = KNeighborsClassifier(n_neighbors=3)

    # Fit model
    rfc.fit(X_train_inst_sklearn, Y_true_train_inst)
    knn.fit(X_train_inst_sklearn, Y_true_train_inst)

    # Evaluate the model
    Y_pred_train_rfc = rfc.predict(X_train_inst_sklearn)
    Y_pred_test_rfc = rfc.predict(X_test_inst_sklearn)

    Y_pred_train_knn = knn.predict(X_train_inst_sklearn)
    Y_pred_test_knn = knn.predict(X_test_inst_sklearn)

    print('-' * 52)
    print(instrument)
    print('\tTRAIN RFC')
    print(classification_report(Y_true_train_inst, Y_pred_train_rfc))
    print('\tTEST RFC')
    print(classification_report(Y_true_test_inst, Y_pred_test_rfc))
    print('\tTRAIN knn')
    print(classification_report(Y_true_train_inst, Y_pred_train_knn))
    print('\tTEST knn')
    print(classification_report(Y_true_test_inst, Y_pred_test_knn))
    
    # Store the classifier in our dictionary
    models[instrument] = [rfc, knn]

## Algorithmic Disagreement

### Let's start with an example

In [None]:
# We need soundfile to load audio data
import soundfile as sf
import sys

# And the openmic-vggish preprocessor
sys.path.append('../openmic/')
from openmic.vggish import *
# test

# For audio playback
from IPython.display import Audio

from prioritize import compare

In [None]:
# Run algorithmic disagreement process
comparisons, instrumentDiffs, allInstProbs = compare(X_unlabeled, models)

# Sort the dictionary to get the highest uncertainty score    
sorted1 = dict(sorted(comparisons.items(), key=lambda item:item[1], reverse=True))
highest_idx = list(sorted1.items())[0][0]

# Get trackID
trackID = split_unlabeled[highest_idx]

In [None]:
trackID

Let's hear what this difficult-to-label track sounds like!

In [None]:
# Replace below .ogg file with trackID
audio, rate = sf.read(os.path.join(DATA_ROOT, 'audio/000/000046_3840.ogg'))

In [None]:
# Let's listen to the example
Audio(data=audio.T, rate=rate)

In [None]:
# this displays the instruments that the models disagreed on
instrumentDiffs.get(highest_idx)

In [None]:
# this displays all the instrument evaluations
allInstProbs.get(highest_idx)

### Simulate Annotation (Iterate)

In [None]:
X_labeled = []      # Track IDs for labeled tracks will be added here
NUM_TO_LABEL = 5    # Batch size
NUM_RANDOM = 2      # Number of random tracks to include 
EPOCHS = 5          # Number of times to annotate

lenUnl = len(X_unlabeled)


for i in range(EPOCHS):
    # get instrument predictions
    predictions = compare(X_unlabeled, models)[0]

    # Sort the dictionary to get the highest uncertainty score    
    sorted_trx = dict(sorted(predictions.items(), key=lambda item:item[1], reverse=True))

    # get highest tracks
    track_indices = list(sorted_trx.items())[:NUM_TO_LABEL]
    track_indices = [i[0] for i in track_indices]   # gets just indices

    # add random tracks for labeling
    rand_idx = np.random.randint(0, len(sorted_trx))
    i = 0

    while i < NUM_RANDOM:
        if rand_idx not in track_indices:
            track_indices.append(rand_idx)
            i += 1
        rand_idx = np.random.randint(0, len(sorted_trx))

    # add track IDs to the labeled list
    for trk in track_indices:
        X_labeled.append(split_unlabeled[trk])

    # simulate annotation, remove tracks from unlabeled set
    X_unlabeled = np.delete(X_unlabeled, track_indices, 0)
    print("prev:", lenUnl, ", new:", len(X_unlabeled), ", labeled:", len(X_labeled))

In [None]:
# Compare sizes 
print(lenUnl, len(X_unlabeled))

Update
* Annotation process is simulated

To Do
* Train model on incrementally evaluated dataset