In [73]:
import json
import os
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.linear_model import Perceptron
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix

DATA_ROOT = 'C:/Users/mzazu/OneDrive/Documents/USD papers/504/Project/openmic-2018-v1.0.0/openmic-2018-v1.0.0/openmic-2018'

if not os.path.exists(DATA_ROOT):
    raise ValueError('Did you forget to set `DATA_ROOT`?')

### Loading the data

In [2]:
OPENMIC = np.load(os.path.join(DATA_ROOT, 'openmic-2018.npz'),  allow_pickle=True)

In [3]:
# What's included?
print(list(OPENMIC.keys()))

['X', 'Y_true', 'Y_mask', 'sample_key']


In [4]:
X, Y_true, Y_mask, sample_key = OPENMIC['X'], OPENMIC['Y_true'], OPENMIC['Y_mask'], OPENMIC['sample_key']

### Loading the class map

In [5]:
with open(os.path.join(DATA_ROOT, 'class-map.json'), 'r') as f:
    class_map = json.load(f)

In [6]:
class_map

{'accordion': 0,
 'banjo': 1,
 'bass': 2,
 'cello': 3,
 'clarinet': 4,
 'cymbals': 5,
 'drums': 6,
 'flute': 7,
 'guitar': 8,
 'mallet_percussion': 9,
 'mandolin': 10,
 'organ': 11,
 'piano': 12,
 'saxophone': 13,
 'synthesizer': 14,
 'trombone': 15,
 'trumpet': 16,
 'ukulele': 17,
 'violin': 18,
 'voice': 19}

### Loading the train-test splits

In [7]:
# We use squeeze=True here to return a single array for each, rather than a full DataFrame

split_train = pd.read_csv(os.path.join(DATA_ROOT, 'partitions/split01_train.csv'), 
                          header=None, squeeze=True)
split_test = pd.read_csv(os.path.join(DATA_ROOT, 'partitions/split01_test.csv'), 
                         header=None, squeeze=True)

In [8]:
# train and test examples are about 75%/25%
print('# Train: {},  # Test: {}'.format(len(split_train), len(split_test)))

# Train: 14915,  # Test: 5085


In [9]:
# sample key maps are easier to use as sets
train_set = set(split_train)
test_set = set(split_test)

### Spliting data

In [10]:
# These loops go through all sample keys, and save their row numbers to either idx_train or idx_test
# This will be useful in the next step for slicing the array data
idx_train, idx_test = [], []

for idx, n in enumerate(sample_key):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    else:
        # This should never happen, but better safe than sorry.
        raise RuntimeError('Unknown sample key={}! Abort!'.format(sample_key[n]))
        
# Finally, cast the idx_* arrays to numpy structures
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

In [11]:
# split indices to partition the features, labels, and masks
X_train = X[idx_train]
X_test = X[idx_test]

Y_true_train = Y_true[idx_train]
Y_true_test = Y_true[idx_test]

Y_mask_train = Y_mask[idx_train]
Y_mask_test = Y_mask[idx_test]

### Fitting Baseline Model (Perceptron)

In [40]:
# This dictionary will include the classifiers for each model
models = dict()

# We'll iterate over all istrument classes, and fit a model for each one
# After training, we'll print a classification report for each instrument
for instrument in class_map:
    
    # Map the instrument name to its column number
    inst_num = class_map[instrument]
        
    # Step 1: sub-sample the data
    
    # First, we need to select down to the data for which we have annotations
    # This is what the mask arrays are for
    train_inst = Y_mask_train[:, inst_num]
    test_inst = Y_mask_test[:, inst_num]
    
    # Here, we're using the Y_mask_train array to slice out only the training examples
    # for which we have annotations for the given class
    X_train_inst = X_train[train_inst]
    
    # Step 2: simplify the data by averaging over time
    
    # Let's arrange the data for a sklearn Perceptron 
    # Instead of having time-varying features, we'll summarize each track by its mean feature vector over time
    X_train_inst_sklearn = np.mean(X_train_inst, axis=1)
    
    # Again, we slice the labels to the annotated examples
    # We thresold the label likelihoods at 0.5 to get binary labels
    Y_true_train_inst = Y_true_train[train_inst, inst_num] >= 0.5

    
    # Repeat the above slicing and dicing but for the test set
    X_test_inst = X_test[test_inst]
    X_test_inst_sklearn = np.mean(X_test_inst, axis=1)
    Y_true_test_inst = Y_true_test[test_inst, inst_num] >= 0.5

    # Step 3.
    # Initialize a new classifier
    clf = Perceptron(random_state = 0)
    
    # Step 4.
    clf.fit(X_train_inst_sklearn, Y_true_train_inst)

    # Step 5.
    # Finally, we'll evaluate the model on both train and test
    Y_pred_train = clf.predict(X_train_inst_sklearn)
    Y_pred_test = clf.predict(X_test_inst_sklearn)
    
    print('-' * 52)
    print(instrument)
    print('\tTRAIN')
    print(classification_report(Y_true_train_inst, Y_pred_train))
    print('\tTEST')
    print(classification_report(Y_true_test_inst, Y_pred_test))

----------------------------------------------------
accordion
	TRAIN
              precision    recall  f1-score   support

       False       0.93      0.81      0.86      1159
        True       0.58      0.81      0.67       374

    accuracy                           0.81      1533
   macro avg       0.75      0.81      0.77      1533
weighted avg       0.84      0.81      0.82      1533

	TEST
              precision    recall  f1-score   support

       False       0.90      0.74      0.81       423
        True       0.43      0.70      0.53       115

    accuracy                           0.73       538
   macro avg       0.66      0.72      0.67       538
weighted avg       0.80      0.73      0.75       538

----------------------------------------------------
banjo
	TRAIN
              precision    recall  f1-score   support

       False       0.81      0.93      0.87      1148
        True       0.81      0.58      0.67       592

    accuracy                           0

### Random Forest

In [77]:
for instrument in class_map:
    
    inst_num = class_map[instrument]

    train_inst = Y_mask_train[:, inst_num]
    test_inst = Y_mask_test[:, inst_num]
    
    X_train_inst = X_train[train_inst]
    
    X_train_inst_sklearn = np.mean(X_train_inst, axis=1)
    
    Y_true_train_inst = Y_true_train[train_inst, inst_num] >= 0.5

    X_test_inst = X_test[test_inst]
    X_test_inst_sklearn = np.mean(X_test_inst, axis=1)
    Y_true_test_inst = Y_true_test[test_inst, inst_num] >= 0.5

    clf = RandomForestClassifier(max_depth=8, n_estimators=250, random_state=0)
    
    clf.fit(X_train_inst_sklearn, Y_true_train_inst)

    # Evaluate the model on both train and test
    Y_pred_train = clf.predict(X_train_inst_sklearn)
    Y_pred_test = clf.predict(X_test_inst_sklearn)
    
    print('-' * 52)
    print(instrument)
    print('\tTRAIN')
    print(classification_report(Y_true_train_inst, Y_pred_train))
    print('\tTEST')
    print(classification_report(Y_true_test_inst, Y_pred_test))
    print('\tConfusion Matrix')
    print(confusion_matrix(Y_true_test_inst, Y_pred_test))

----------------------------------------------------
accordion
	TRAIN
              precision    recall  f1-score   support

       False       0.96      1.00      0.98      1159
        True       1.00      0.87      0.93       374

    accuracy                           0.97      1533
   macro avg       0.98      0.93      0.95      1533
weighted avg       0.97      0.97      0.97      1533

	TEST
              precision    recall  f1-score   support

       False       0.84      0.97      0.90       423
        True       0.78      0.33      0.46       115

    accuracy                           0.84       538
   macro avg       0.81      0.65      0.68       538
weighted avg       0.83      0.84      0.81       538

	Confusion Matrix
[[412  11]
 [ 77  38]]
----------------------------------------------------
banjo
	TRAIN
              precision    recall  f1-score   support

       False       0.98      0.99      0.98      1148
        True       0.98      0.96      0.97       592


### Logistic Regression

In [43]:
for instrument in class_map:
    
    inst_num = class_map[instrument]

    train_inst = Y_mask_train[:, inst_num]
    test_inst = Y_mask_test[:, inst_num]
    
    X_train_inst = X_train[train_inst]
    
    X_train_inst_sklearn = np.mean(X_train_inst, axis=1)
    
    Y_true_train_inst = Y_true_train[train_inst, inst_num] >= 0.5

    X_test_inst = X_test[test_inst]
    X_test_inst_sklearn = np.mean(X_test_inst, axis=1)
    Y_true_test_inst = Y_true_test[test_inst, inst_num] >= 0.5

    clf = LogisticRegression(random_state=0, penalty='l1', solver = 'liblinear')
    
    clf.fit(X_train_inst_sklearn, Y_true_train_inst)

    # Evaluate the model on both train and test
    Y_pred_train = clf.predict(X_train_inst_sklearn)
    Y_pred_test = clf.predict(X_test_inst_sklearn)
    
    print('-' * 52)
    print(instrument)
    print('\tTRAIN')
    print(classification_report(Y_true_train_inst, Y_pred_train))
    print('\tTEST')
    print(classification_report(Y_true_test_inst, Y_pred_test))

----------------------------------------------------
accordion
	TRAIN
              precision    recall  f1-score   support

       False       0.88      0.94      0.91      1159
        True       0.75      0.61      0.67       374

    accuracy                           0.86      1533
   macro avg       0.82      0.77      0.79      1533
weighted avg       0.85      0.86      0.85      1533

	TEST
              precision    recall  f1-score   support

       False       0.85      0.89      0.87       423
        True       0.52      0.43      0.47       115

    accuracy                           0.79       538
   macro avg       0.68      0.66      0.67       538
weighted avg       0.78      0.79      0.78       538

----------------------------------------------------
banjo
	TRAIN
              precision    recall  f1-score   support

       False       0.87      0.89      0.88      1148
        True       0.77      0.74      0.75       592

    accuracy                           0

### Neural Network

In [72]:
import warnings
warnings.filterwarnings('ignore')

for instrument in class_map:
    
    inst_num = class_map[instrument]

    train_inst = Y_mask_train[:, inst_num]
    test_inst = Y_mask_test[:, inst_num]
    
    X_train_inst = X_train[train_inst]
    
    X_train_inst_sklearn = np.mean(X_train_inst, axis=1)
    
    Y_true_train_inst = Y_true_train[train_inst, inst_num] >= 0.5

    X_test_inst = X_test[test_inst]
    X_test_inst_sklearn = np.mean(X_test_inst, axis=1)
    Y_true_test_inst = Y_true_test[test_inst, inst_num] >= 0.5

    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5,2), max_iter=100, random_state=0)
    
    clf.fit(X_train_inst_sklearn, Y_true_train_inst)

    # Evaluate the model on both train and test
    Y_pred_train = clf.predict(X_train_inst_sklearn)
    Y_pred_test = clf.predict(X_test_inst_sklearn)
    
    print('-' * 52)
    print(instrument)
    print('\tTRAIN')
    print(classification_report(Y_true_train_inst, Y_pred_train))
    print('\tTEST')
    print(classification_report(Y_true_test_inst, Y_pred_test))

----------------------------------------------------
accordion
	TRAIN
              precision    recall  f1-score   support

       False       0.84      0.90      0.87      1159
        True       0.62      0.48      0.54       374

    accuracy                           0.80      1533
   macro avg       0.73      0.69      0.71      1533
weighted avg       0.79      0.80      0.79      1533

	TEST
              precision    recall  f1-score   support

       False       0.85      0.90      0.87       423
        True       0.53      0.42      0.47       115

    accuracy                           0.80       538
   macro avg       0.69      0.66      0.67       538
weighted avg       0.78      0.80      0.79       538

----------------------------------------------------
banjo
	TRAIN
              precision    recall  f1-score   support

       False       0.80      0.86      0.83      1148
        True       0.69      0.59      0.64       592

    accuracy                           0