In [9]:
import numpy as np
from keras.models import load_model
import matplotlib.pyplot as plt
import matplotlib
from sklearn.metrics import roc_curve, auc

In [2]:
# set path to model
model = load_model('DE_EN_8.h5')

# set path for training data
# if more than one language data, use np.vstack
# if using only one language data, set train = np.load(dataset_path)
# variable train will be used for evaluation
train_de = np.load('de_train_6_6_64_ds.npy', allow_pickle=True)
train_en = np.load('en_train_6_6_64_ds.npy', allow_pickle=True)
train = np.vstack((train_en, train_de))

# set paths to test data
test_de = np.load('de_test_6_6_64_ds.npy', allow_pickle=True)
test_ch = np.load('ch_test_6_6_64_ds.npy', allow_pickle=True)
test_en = np.load('en_test_6_6_64_ds.npy', allow_pickle=True)

# set number of mels
# set this to 64 for 22,050 Hz experiments or 45 for 8,000 Hz experiments
mels = 45 

In [3]:
def find_mean_stdd(dataset):
    """Find mean and standard deviation of the dataset.
    
    # Arguments
        dataset: dataset in format (id, spectro, label).
        where spectro is in format (n, timesteps, mel bands, 1).
        
    # Returns
        mean: mean for each mel band across the dataset.
        stdd: standard deviation for each mel band across the dataset.
    """
    x = dataset[:, 1]
    x = np.stack(x) # reshape to (n, mel bands, timesteps)
    mean = x.mean(axis=(0, 2)) # mean in shape (mel bands, )
    mean = np.expand_dims(mean, axis=1) # reshape mean to (mel bands, 1)
    stdd = x.std(axis=(0, 2)) # std in shape (mel bands, )
    stdd = np.expand_dims(stdd, axis=1) # reshape stdd to (mel bands, 1)
    return mean, stdd


def normalise_and_reformat(dataset, mean, stdd):
    """Normalise data based on training data and reformat into suitable format.
    
    # Arguments
        dataset: dataset in format (id, spectro, label)
        mean: mean for each mel band across the dataset.
        stdd: standard deviation for each mel band across the dataset.
        
    # Returns
        x: spectros normalised across each mel band in format (n, timesteps, mel bands, 1)
        y: labels in format (n, timesteps, 1)
    """
    x = dataset[:, 1] 
    x = np.stack(x) # reshape to (n, mel bands, timesteps)
    x = (x - mean) / (stdd + 1e-8) # normalise so mean is equal to zero and variance equal to 1
    x = np.expand_dims(np.moveaxis(x, 1, -1), axis=3) # reformat x to (n, timesteps, mel bands, 1)  
    y = dataset[:, 2] 
    y = np.expand_dims(np.moveaxis(np.stack(y), 1, -1), axis=2) # reformat y to (n, timesteps, 1)
    return x, y

In [4]:
def metrics(x, y, threshold):
    """Calculate the TPR, TNR, FPR, FNR and F1 score.
    
    # Arguments
        x: inputs to the network.
        y: actual outputs.
        threshold: values greater than threshold get set to 1, values less than or
                   equal to the threshold get set to 0.

    # Returns
        dictionary with TPR, TNR, FPR, FNR, F1 values
        dependent on threshold.
    """
    predicted = model.predict(x)
    predicted[predicted > threshold] = 1
    predicted[predicted <= threshold] = 0
    actual = y
    TP = np.sum(np.logical_and(predicted == 1, actual == 1))
    FN = np.sum(np.logical_and(predicted == 0, actual == 1))
    TN = np.sum(np.logical_and(predicted == 0, actual == 0))
    FP = np.sum(np.logical_and(predicted == 1, actual == 0))
    TPR  = TP / (TP + FN + 1e-8)
    TNR  = TN / (TN + FP + 1e-8)
    FPR = FP / (FP + TN + 1e-8)
    FNR = FN / (FN + TP + 1e-8)
    precision = TP / (TP + FP + 1e-8)
    recall = TPR
    F1 = 2 * precision * recall / (precision + recall + 1e-8)
    metrics_dict = {'TPR': np.round(TPR, 3),
                    'TNR': np.round(TNR, 3),
                    'FPR' : np.round(FPR, 3),
                    'FNR' : np.round(FNR, 3),
                    'F1 Score' : np.round(F1, 3)
                   }
    return metrics_dict

def plot_ROC(model, x, y):
    """Plot the ROC with AUC.
    
    # Arguments
        model: model after training.
        x: inputs to the network for testing.
        y: actual outputs for testing.
        
    # Output
        plot of ROC.
    """ 
    predicted = model.predict(x).ravel()
    actual = y.ravel()
    fpr, tpr, thresholds = roc_curve(actual, predicted, pos_label=None)
    roc_auc = auc(fpr, tpr)
    plt.title('Test ROC AUC')
    plt.plot(fpr, tpr, 'b', label='AUC = %0.3f' % roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0,1],[0,1],'r--')
    plt.xlim([0.0,1.0])
    plt.ylim([0.0,1.0])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

In [5]:
# find training mean and stdd that can be used to normalise test data
mean, stdd = find_mean_stdd(train)
# normalise test data
x_test_en, y_test_en = normalise_and_reformat(test_en, mean, stdd)
x_test_ch, y_test_ch = normalise_and_reformat(test_ch, mean, stdd)
x_test_de, y_test_de = normalise_and_reformat(test_de, mean, stdd)

In [6]:
# reduce mel bands
x_test_en = x_test_en[:, :, :mels, :]
x_test_ch = x_test_ch[:, :, :mels, :]
x_test_de = x_test_de[:, :, :mels, :]

In [7]:
threshold = 0.5

In [None]:
# chinese test evaluation
print(metrics(x_test_ch, y_test_ch, threshold))
plot_ROC(model, x_test_ch, y_test_ch)

In [None]:
# german test evaluation
print(metrics(x_test_de, y_test_de, threshold))
plot_ROC(model, x_test_de, y_test_de)

In [None]:
# english test evaluation
print(metrics(x_test_en, y_test_en, threshold))
plot_ROC(model, x_test_en, y_test_en)