In [1]:
import numpy as np
from keras.models import load_model
import matplotlib.pyplot as plt
import matplotlib
from sklearn.metrics import roc_curve, auc

Using TensorFlow backend.


In [12]:
# set path for hyena training data
train = np.load('hyena_train.npy', allow_pickle=True)

# set path for laughter data for scaling
train_ch = np.load('ch_train_6_6_64_ds.npy', allow_pickle=True)
train_de = np.load('de_train_6_6_64_ds.npy', allow_pickle=True)
train_en = np.load('en_train_6_6_64_ds.npy', allow_pickle=True)

# set path to test data
test = np.load('/import/c4dm-04/jackr/hyena_test_wnegs.npy', allow_pickle=True)

# set paths to models
baseline_model = load_model('baseline_nonorm.h5')
unfrozen_model = load_model('unfrozen_nonorm.h5')
frozen_model = load_model('frozen_nonorm.h5')

In [3]:
def reformat(dataset):
    """Reformat data into suitable format.
    
    # Arguments
        dataset: dataset in format (id, spectro, label)
        
    # Returns
        x: spectros in format (n, timesteps, mel bands, 1)
        y: labels in format (n, timesteps, 1)
    """
    x = dataset[:, 1] 
    x = np.stack(x) # reshape to (n, mel bands, timesteps)
    x = np.expand_dims(np.moveaxis(x, 1, -1), axis=3) # reformat x to (n, timesteps, mel bands, 1)  
    y = dataset[:, 2] 
    y = np.moveaxis(np.stack(y), 1, -1) # reformat y to (n, timesteps, 8)
    return x, y

In [14]:
def plot_ROC(model, x, y):
    """Plot the ROC with AUC.
    
    # Arguments
        model: model after training.
        x: inputs to the network for testing.
        y: actual outputs for testing.
        
    # Output
        plot of ROC.
    """ 
    predicted = model.predict(x).ravel()
    actual = y.ravel()
    fpr, tpr, thresholds = roc_curve(actual, predicted, pos_label=None)
    roc_auc = auc(fpr, tpr)
    plt.title('Test ROC AUC')
    plt.plot(fpr, tpr, 'b', label='AUC = %0.3f' % roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0,1],[0,1],'r--')
    plt.xlim([0.0,1.0])
    plt.ylim([0.0,1.0])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

    
def scale(original_train, new_train):
    """Find value to scale by, based on magnitudes.
    
    # Arguments
        original: data that was used to originally train the network.
        new_train: data that will be used to fine tune network.
    
    # Returns
        value to use to scale.
    """
    # find magnitude original training data
    o_mag = np.linalg.norm(np.stack(original_train[:, 1]))
    # find magnitude new data
    n_mag = np.linalg.norm(np.stack(new_train[:, 1]))
    # scale new data
    scale = o_mag / n_mag
    return scale

In [5]:
# reformat test data
x_test, y_test = reformat(test)
# reduce mel bands
x_test = x_test[:, :, :45, :]

In [None]:
# baseline
plot_ROC(baseline_model, x_test, y_test)

In [15]:
# scale data
human_pretrain = np.vstack((train_ch, train_de))
human_pretrain = np.vstack((human_pretrain, train_en))
scale = scale(human_pretrain, train)
x_test = x_test * scale

In [None]:
# unfrozen
plot_ROC(unfrozen_model, x_test, y_test)

In [None]:
# frozen
plot_ROC(frozen_model, x_test, y_test)