In [1]:
import csv
import numpy as np
import os
import pandas as pd
import scipy.interpolate
import sklearn.metrics
import sys
sys.path.append("../src")
import localmodule


if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO


from matplotlib import pyplot as plt
%matplotlib inline

# Define constants.
dataset_name = localmodule.get_dataset_name()
models_dir = localmodule.get_models_dir()
units = localmodule.get_units()
n_units = len(units)
n_trials = 10


In [42]:
# Define model directory
for aug_kind_str in ["none", "all"]:
#for aug_kind_str in ["all"]:
    for model_name in ["icassp-convnet", "pcen-convnet", "icassp-ntt-convnet"]:
    #for model_name in ["icassp-convnet", "icassp-add-convnet", "icassp-ntt-convnet"]:

        if model_name == "icassp-ntt-convnet" and aug_kind_str == "none":
            aug_kind_str = "all-but-noise"
        
        if not aug_kind_str == "none":
            model_name = "_".join([model_name, "aug-" + aug_kind_str])
        model_dir = os.path.join(models_dir, model_name)

        fns, fps, tns, tps, accs = [], [], [], [], []

        for test_unit_id in range(6):

            test_unit_str = units[test_unit_id]
            test_unit_dir = os.path.join(model_dir, test_unit_str)

            val_losses = []
            for trial_id in range(n_trials):
                trial_str = "trial-" + str(trial_id)
                trial_dir = os.path.join(test_unit_dir, trial_str)
                history_name = "_".join([
                    dataset_name,
                    model_name,
                    test_unit_str,
                    trial_str,
                    "history.csv"
                ])
                history_path = os.path.join(
                    trial_dir, history_name)
                try:
                    history_df = pd.read_csv(history_path)
                    val_loss = min(history_df["Validation loss"])
                except:
                    val_loss = np.inf
                val_losses.append(val_loss)

            val_losses = np.array(val_losses)
            best_trial_id = np.argmin(val_losses)
            best_trial_str = "trial-" + str(best_trial_id)
            best_trial_dir = os.path.join(test_unit_dir, best_trial_str)
            predictions_name = "_".join([
                dataset_name,
                model_name,
                "test-" + test_unit_str,
                best_trial_str,
                "predict-" + test_unit_str,
                "clip-predictions.csv"
            ])
            #print(test_unit_str, best_trial_str)
            prediction_path = os.path.join(
                best_trial_dir, predictions_name)

            # Load prediction.
            with open(prediction_path, 'r') as f:
                reader = csv.reader(f)
                rows = list(reader)
            rows = [",".join(row) for row in rows]
            rows = rows[1:]
            rows = "\n".join(rows)

            # Parse rows with correct header.
            df = pd.read_csv(StringIO(rows),
                names=[
                    "Dataset",
                    "Test unit",
                    "Prediction unit",
                    "Timestamp",
                    "Center Freq (Hz)",
                    "Augmentation",
                    "Key",
                    "Ground truth",
                    "Predicted probability"])
            y_pred = np.array(df["Predicted probability"])
            y_pred = (y_pred > 0.5).astype('int')

            # Load ground truth.
            y_true = np.array(df["Ground truth"])

            # Compute confusion matrix.
            tn, fp, fn, tp = sklearn.metrics.confusion_matrix(
                y_true, y_pred).ravel()

            acc = 100 * (tn+tp) / (tn+tp+fn+fp)

            tns.append(tn)
            fps.append(fp)
            fns.append(fn)
            tps.append(tp)
            accs.append(acc)

        fn = sum(fns)
        fp = sum(fps)
        tn = sum(tns)
        tp = sum(tps)
        acc = np.mean(accs)

        print(model_name, "{:5.2f}%".format(100 * (tn+tp) / (tn+tp+fn+fp)))
    print("")

icassp-convnet 92.77%
pcen-convnet 94.72%
icassp-ntt-convnet_aug-all-but-noise 93.84%

icassp-convnet_aug-all 93.56%


FileNotFoundError: [Errno 2] No such file or directory: '/scratch/vl1019/BirdVox-70k_models/pcen-convnet_aug-all/unit03/trial-2/BirdVox-70k_pcen-convnet_aug-all_test-unit03_trial-2_predict-unit03_clip-predictions.csv'

In [11]:
# Define model directory
model_name = "pcen-convnet"
aug_kind_str = "none"
if not aug_kind_str == "none":
    model_name = "_".join([model_name, "aug-" + aug_kind_str])
model_dir = os.path.join(models_dir, model_name)

fns, fps, tns, tps, accs = [], [], [], [], []

for test_unit_id in range(6):

    test_unit_str = units[test_unit_id]
    test_unit_dir = os.path.join(model_dir, test_unit_str)

    val_losses = []
    for trial_id in range(n_trials):
        trial_str = "trial-" + str(trial_id)
        trial_dir = os.path.join(test_unit_dir, trial_str)
        history_name = "_".join([
            dataset_name,
            model_name,
            test_unit_str,
            trial_str,
            "history.csv"
        ])
        history_path = os.path.join(
            trial_dir, history_name)
        try:
            history_df = pd.read_csv(history_path)
            val_loss = min(history_df["Validation loss"])
        except:
            val_loss = np.inf
        val_losses.append(val_loss)

    val_losses = np.array(val_losses)
    best_trial_id = np.argmin(val_losses)
    best_trial_str = "trial-" + str(best_trial_id)
    best_trial_dir = os.path.join(test_unit_dir, best_trial_str)
    predictions_name = "_".join([
        dataset_name,
        model_name,
        "test-" + test_unit_str,
        best_trial_str,
        "predict-" + test_unit_str,
        "clip-predictions.csv"
    ])
    print(test_unit_str, best_trial_str)
    prediction_path = os.path.join(
        best_trial_dir, predictions_name)

    # Load prediction.
    with open(prediction_path, 'r') as f:
        reader = csv.reader(f)
        rows = list(reader)
    rows = [",".join(row) for row in rows]
    rows = rows[1:]
    rows = "\n".join(rows)

    # Parse rows with correct header.
    df = pd.read_csv(StringIO(rows),
        names=[
            "Dataset",
            "Test unit",
            "Prediction unit",
            "Timestamp",
            "Center Freq (Hz)",
            "Augmentation",
            "Key",
            "Ground truth",
            "Predicted probability"])
    y_pred = np.array(df["Predicted probability"])
    y_pred = (y_pred > 0.5).astype('int')

    # Load ground truth.
    y_true = np.array(df["Ground truth"])

    # Compute confusion matrix.
    tn, fp, fn, tp = sklearn.metrics.confusion_matrix(
        y_true, y_pred).ravel()

    acc = 100 * (tn+tp) / (tn+tp+fn+fp)
    print(acc)
    
    tns.append(tn)
    fps.append(fp)
    fns.append(fn)
    tps.append(tp)
    accs.append(acc)

fn = sum(fns)
fp = sum(fps)
tn = sum(tns)
tp = sum(tps)
acc = np.mean(accs)

print("")
print(acc)
print(100 * (tn+tp) / (tn+tp+fn+fp))

unit01 trial-2
74.5727956254
unit02 trial-1
93.3509513742
unit03 trial-6
98.4198397893
unit05 trial-2
97.4734042553
unit07 trial-5
95.3445635528
unit10 trial-9
96.666179266

92.6379556438
94.7248743009


In [6]:
# Define model directory
model_name = "pcen-convnet"
aug_kind_str = "all"
if not aug_kind_str == "none":
    model_name = "_".join([model_name, "aug-" + aug_kind_str])
model_dir = os.path.join(models_dir, model_name)

fns, fps, tns, tps = [], [], [], []

test_unit_id = 0

test_unit_str = units[test_unit_id]
test_unit_dir = os.path.join(model_dir, test_unit_str)

for trial_id in range(n_trials):
    trial_str = "trial-" + str(trial_id)
    trial_dir = os.path.join(test_unit_dir, trial_str)
    predictions_name = "_".join([
        dataset_name,
        model_name,
        "test-" + test_unit_str,
        trial_str,
        "predict-" + test_unit_str,
        "clip-predictions.csv"
    ])
    
    prediction_path = os.path.join(
        trial_dir, predictions_name)

    # Load prediction.
    try:
        with open(prediction_path, 'r') as f:
            reader = csv.reader(f)
            rows = list(reader)
        rows = [",".join(row) for row in rows]
        rows = rows[1:]
        rows = "\n".join(rows)

        # Parse rows with correct header.
        df = pd.read_csv(StringIO(rows),
            names=[
                "Dataset",
                "Test unit",
                "Prediction unit",
                "Timestamp",
                "Center Freq (Hz)",
                "Augmentation",
                "Key",
                "Ground truth",
                "Predicted probability"])
        y_pred = np.array(df["Predicted probability"])
        y_pred = (y_pred > 0.5).astype('int')

        # Load ground truth.
        y_true = np.array(df["Ground truth"])

        # Compute confusion matrix.
        tn, fp, fn, tp = sklearn.metrics.confusion_matrix(
            y_true, y_pred).ravel()
        print(100 * (tn+tp) / (tn+tp+fn+fp))
    except:
        pass

63.1920710868
63.4483937116


In [7]:
# Define model directory
model_name = "pcen-convnet"
aug_kind_str = "none"
if not aug_kind_str == "none":
    model_name = "_".join([model_name, "aug-" + aug_kind_str])
model_dir = os.path.join(models_dir, model_name)

fns, fps, tns, tps, accs = [], [], [], [], []

for test_unit_id in range(6):

    test_unit_str = units[test_unit_id]
    test_unit_dir = os.path.join(model_dir, test_unit_str)

    val_losses = []
    for trial_id in range(n_trials):
        trial_str = "trial-" + str(trial_id)
        trial_dir = os.path.join(test_unit_dir, trial_str)
        history_name = "_".join([
            dataset_name,
            model_name,
            test_unit_str,
            trial_str,
            "history.csv"
        ])
        history_path = os.path.join(
            trial_dir, history_name)
        try:
            history_df = pd.read_csv(history_path)
            val_loss = min(history_df["Validation loss"])
        except:
            val_loss = np.inf
        val_losses.append(val_loss)

    val_losses = np.array(val_losses)
    best_trial_id = np.argmin(val_losses)
    best_trial_str = "trial-" + str(best_trial_id)
    best_trial_dir = os.path.join(test_unit_dir, best_trial_str)
    predictions_name = "_".join([
        dataset_name,
        model_name,
        "test-" + test_unit_str,
        best_trial_str,
        "predict-" + test_unit_str,
        "clip-predictions.csv"
    ])
    print(test_unit_str, best_trial_str)

unit01 trial-2
unit02 trial-1
unit03 trial-6
unit05 trial-2
unit07 trial-5
unit10 trial-9


In [106]:
# Define model directory
model_name = "icassp-ntt-convnet"
aug_kind_str = "all-but-noise"
if not aug_kind_str == "none":
    model_name = "_".join([model_name, "aug-" + aug_kind_str])
model_dir = os.path.join(models_dir, model_name)

fns, fps, tns, tps, accs = [], [], [], [], []

# Loop over units.
for test_unit_id in range(6):

    test_unit_str = units[test_unit_id]
    test_unit_dir = os.path.join(model_dir, test_unit_str)

    val_accs, val_losses = [], []
    
    # Loop over trials.
    for trial_id in range(n_trials):
        trial_str = "trial-" + str(trial_id)
        trial_dir = os.path.join(test_unit_dir, trial_str)
        history_name = "_".join([
            dataset_name,
            model_name,
            test_unit_str,
            trial_str,
            "history.csv"
        ])
        history_path = os.path.join(
            trial_dir, history_name)
        try:
            history_df = pd.read_csv(history_path)
            val_acc = max(history_df["Validation accuracy (%)"])
            val_loss = min(history_df["Validation loss"])
        except:
            val_acc = 0
            val_loss = np.inf
        val_accs.append(val_acc)
        val_losses.append(val_loss)

    val_accs = np.array(val_accs)
    val_losses = np.array(val_losses)
    
    best_trial_id = np.argmin(val_losses)
    best_trial_str = "trial-" + str(best_trial_id)
    best_trial_dir = os.path.join(test_unit_dir, best_trial_str)
    predictions_name = "_".join([
        dataset_name,
        model_name,
        "test-" + test_unit_str,
        best_trial_str,
        "predict-" + test_unit_str,
        "clip-predictions.csv"
    ])
    predictions_path = os.path.join(
        best_trial_dir, predictions_name)

    # Remove header, which has too few columns (hack).
    with open(predictions_path, 'r') as f:
        reader = csv.reader(f)
        rows = list(reader)
    rows = [",".join(row) for row in rows]
    rows = rows[1:]
    rows = "\n".join(rows)

    # Parse rows with correct header.
    df = pd.read_csv(StringIO(rows),
        names=[
            "Dataset",
            "Test unit",
            "Prediction unit",
            "Timestamp",
            "Center Freq (Hz)",
            "Augmentation",
            "Key",
            "Ground truth",
            "Predicted probability"])

    # Extract y_pred and y_true.
    y_pred = np.array((df["Predicted probability"] > 0.2)).astype("int")
    y_true = np.array(df["Ground truth"])

    # Compute confusion matrix.
    test_tn, test_fp, test_fn, test_tp =\
        sklearn.metrics.confusion_matrix(
            y_true, y_pred).ravel()

    # Compute accuracy.
    acc = 100 * (test_tn + test_tp) /\
        (test_tn + test_tp + test_fn + test_fp)

    print(acc)
    accs.append(acc)

print("")
print(np.mean(accs))

56.3055365687
96.1733615222
99.2154065621
96.248100304
96.4854517611
93.9391723936

89.7278381853


In [5]:
# Define model directory
model_name = "icassp-ntt-convnet"
aug_kind_str = "all"
if not aug_kind_str == "none":
    model_name = "_".join([model_name, "aug-" + aug_kind_str])
model_dir = os.path.join(models_dir, model_name)

fns, fps, tns, tps, accs = [], [], [], [], []

# Loop over units.
for test_unit_id in range(6):

    test_unit_str = units[test_unit_id]
    test_unit_dir = os.path.join(model_dir, test_unit_str)

    val_accs, val_losses = [], []
    
    # Loop over trials.
    for trial_id in range(n_trials):
        trial_str = "trial-" + str(trial_id)
        trial_dir = os.path.join(test_unit_dir, trial_str)
        history_name = "_".join([
            dataset_name,
            model_name,
            test_unit_str,
            trial_str,
            "history.csv"
        ])
        history_path = os.path.join(
            trial_dir, history_name)
        try:
            history_df = pd.read_csv(history_path)
            val_acc = max(history_df["Validation accuracy (%)"])
            val_loss = min(history_df["Validation loss"])
        except:
            val_acc = 0
            val_loss = np.inf
        val_accs.append(val_acc)
        val_losses.append(val_loss)

    val_accs = np.array(val_accs)
    val_losses = np.array(val_losses)
    
    best_trial_id = np.argmin(val_losses)
    best_trial_str = "trial-" + str(best_trial_id)
    best_trial_dir = os.path.join(test_unit_dir, best_trial_str)
    predictions_name = "_".join([
        dataset_name,
        model_name,
        "test-" + test_unit_str,
        best_trial_str,
        "predict-" + test_unit_str,
        "clip-predictions.csv"
    ])
    predictions_path = os.path.join(
        best_trial_dir, predictions_name)

    # Remove header, which has too few columns (hack).
    with open(predictions_path, 'r') as f:
        reader = csv.reader(f)
        rows = list(reader)
    rows = [",".join(row) for row in rows]
    rows = rows[1:]
    rows = "\n".join(rows)

    # Parse rows with correct header.
    df = pd.read_csv(StringIO(rows),
        names=[
            "Dataset",
            "Test unit",
            "Prediction unit",
            "Timestamp",
            "Center Freq (Hz)",
            "Augmentation",
            "Key",
            "Ground truth",
            "Predicted probability"])

    # Extract y_pred and y_true.
    y_pred = np.array((df["Predicted probability"] > 0.2)).astype("int")
    y_true = np.array(df["Ground truth"])

    # Compute confusion matrix.
    test_tn, test_fp, test_fn, test_tp =\
        sklearn.metrics.confusion_matrix(
            y_true, y_pred).ravel()

    # Compute accuracy.
    acc = 100 * (test_tn + test_tp) /\
        (test_tn + test_tp + test_fn + test_fp)

    print(acc)
    accs.append(acc)

print("")
print(np.mean(accs))

73.3424470267
97.3572938689
91.210358828
86.132218845
94.1730474732
83.0092118731

87.5374296525
