In [1]:
import csv
import numpy as np
import os
import pandas as pd
import scipy.interpolate
import sklearn.metrics
import sys
sys.path.append("../src")
import localmodule


if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO


from matplotlib import pyplot as plt
%matplotlib inline

# Define constants.
dataset_name = localmodule.get_dataset_name()
models_dir = localmodule.get_models_dir()
units = localmodule.get_units()
n_units = len(units)
n_trials = 10


In [11]:
# Define model directory
model_name = "pcen-convnet"
aug_kind_str = "none"
if not aug_kind_str == "none":
    model_name = "_".join([model_name, "aug-" + aug_kind_str])
model_dir = os.path.join(models_dir, model_name)

fns, fps, tns, tps, accs = [], [], [], [], []

for test_unit_id in range(6):

    test_unit_str = units[test_unit_id]
    test_unit_dir = os.path.join(model_dir, test_unit_str)

    val_losses = []
    for trial_id in range(n_trials):
        trial_str = "trial-" + str(trial_id)
        trial_dir = os.path.join(test_unit_dir, trial_str)
        history_name = "_".join([
            dataset_name,
            model_name,
            test_unit_str,
            trial_str,
            "history.csv"
        ])
        history_path = os.path.join(
            trial_dir, history_name)
        try:
            history_df = pd.read_csv(history_path)
            val_loss = min(history_df["Validation loss"])
        except:
            val_loss = np.inf
        val_losses.append(val_loss)

    val_losses = np.array(val_losses)
    best_trial_id = np.argmin(val_losses)
    best_trial_str = "trial-" + str(best_trial_id)
    best_trial_dir = os.path.join(test_unit_dir, best_trial_str)
    predictions_name = "_".join([
        dataset_name,
        model_name,
        "test-" + test_unit_str,
        best_trial_str,
        "predict-" + test_unit_str,
        "clip-predictions.csv"
    ])
    print(test_unit_str, best_trial_str)
    prediction_path = os.path.join(
        best_trial_dir, predictions_name)

    # Load prediction.
    with open(prediction_path, 'r') as f:
        reader = csv.reader(f)
        rows = list(reader)
    rows = [",".join(row) for row in rows]
    rows = rows[1:]
    rows = "\n".join(rows)

    # Parse rows with correct header.
    df = pd.read_csv(StringIO(rows),
        names=[
            "Dataset",
            "Test unit",
            "Prediction unit",
            "Timestamp",
            "Center Freq (Hz)",
            "Augmentation",
            "Key",
            "Ground truth",
            "Predicted probability"])
    y_pred = np.array(df["Predicted probability"])
    y_pred = (y_pred > 0.5).astype('int')

    # Load ground truth.
    y_true = np.array(df["Ground truth"])

    # Compute confusion matrix.
    tn, fp, fn, tp = sklearn.metrics.confusion_matrix(
        y_true, y_pred).ravel()

    acc = 100 * (tn+tp) / (tn+tp+fn+fp)
    print(acc)
    
    tns.append(tn)
    fps.append(fp)
    fns.append(fn)
    tps.append(tp)
    accs.append(acc)

fn = sum(fns)
fp = sum(fps)
tn = sum(tns)
tp = sum(tps)
acc = np.mean(accs)

print("")
print(acc)
print(100 * (tn+tp) / (tn+tp+fn+fp))

unit01 trial-2
74.5727956254
unit02 trial-1
93.3509513742
unit03 trial-6
98.4198397893
unit05 trial-2
97.4734042553
unit07 trial-5
95.3445635528
unit10 trial-9
96.666179266

92.6379556438
94.7248743009


In [12]:
# Define model directory
model_name = "pcen-convnet"
aug_kind_str = "none"
if not aug_kind_str == "none":
    model_name = "_".join([model_name, "aug-" + aug_kind_str])
model_dir = os.path.join(models_dir, model_name)

fns, fps, tns, tps = [], [], [], []

test_unit_id = 0

test_unit_str = units[test_unit_id]
test_unit_dir = os.path.join(model_dir, test_unit_str)

for trial_id in range(n_trials):
    trial_str = "trial-" + str(trial_id)
    trial_dir = os.path.join(test_unit_dir, trial_str)
    predictions_name = "_".join([
        dataset_name,
        model_name,
        "test-" + test_unit_str,
        trial_str,
        "predict-" + test_unit_str,
        "clip-predictions.csv"
    ])
    
    prediction_path = os.path.join(
        trial_dir, predictions_name)

    # Load prediction.
    try:
        with open(prediction_path, 'r') as f:
            reader = csv.reader(f)
            rows = list(reader)
        rows = [",".join(row) for row in rows]
        rows = rows[1:]
        rows = "\n".join(rows)

        # Parse rows with correct header.
        df = pd.read_csv(StringIO(rows),
            names=[
                "Dataset",
                "Test unit",
                "Prediction unit",
                "Timestamp",
                "Center Freq (Hz)",
                "Augmentation",
                "Key",
                "Ground truth",
                "Predicted probability"])
        y_pred = np.array(df["Predicted probability"])
        y_pred = (y_pred > 0.5).astype('int')

        # Load ground truth.
        y_true = np.array(df["Ground truth"])

        # Compute confusion matrix.
        tn, fp, fn, tp = sklearn.metrics.confusion_matrix(
            y_true, y_pred).ravel()
        print(100 * (tn+tp) / (tn+tp+fn+fp))
    except:
        pass

74.5727956254
75.5126452495
73.8209159262


In [7]:
# Define model directory
model_name = "pcen-convnet"
aug_kind_str = "none"
if not aug_kind_str == "none":
    model_name = "_".join([model_name, "aug-" + aug_kind_str])
model_dir = os.path.join(models_dir, model_name)

fns, fps, tns, tps, accs = [], [], [], [], []

for test_unit_id in range(6):

    test_unit_str = units[test_unit_id]
    test_unit_dir = os.path.join(model_dir, test_unit_str)

    val_losses = []
    for trial_id in range(n_trials):
        trial_str = "trial-" + str(trial_id)
        trial_dir = os.path.join(test_unit_dir, trial_str)
        history_name = "_".join([
            dataset_name,
            model_name,
            test_unit_str,
            trial_str,
            "history.csv"
        ])
        history_path = os.path.join(
            trial_dir, history_name)
        try:
            history_df = pd.read_csv(history_path)
            val_loss = min(history_df["Validation loss"])
        except:
            val_loss = np.inf
        val_losses.append(val_loss)

    val_losses = np.array(val_losses)
    best_trial_id = np.argmin(val_losses)
    best_trial_str = "trial-" + str(best_trial_id)
    best_trial_dir = os.path.join(test_unit_dir, best_trial_str)
    predictions_name = "_".join([
        dataset_name,
        model_name,
        "test-" + test_unit_str,
        best_trial_str,
        "predict-" + test_unit_str,
        "clip-predictions.csv"
    ])
    print(test_unit_str, best_trial_str)

unit01 trial-2
unit02 trial-1
unit03 trial-6
unit05 trial-2
unit07 trial-5
unit10 trial-9


In [27]:
# Define model directory
model_name = "icassp-ntt-convnet"
aug_kind_str = "all"
if not aug_kind_str == "none":
    model_name = "_".join([model_name, "aug-" + aug_kind_str])
model_dir = os.path.join(models_dir, model_name)

fns, fps, tns, tps, accs = [], [], [], [], []

for test_unit_id in range(6):

    test_unit_str = units[test_unit_id]
    test_unit_dir = os.path.join(model_dir, test_unit_str)

    val_losses = []
    for trial_id in range(n_trials):
        trial_str = "trial-" + str(trial_id)
        trial_dir = os.path.join(test_unit_dir, trial_str)
        history_name = "_".join([
            dataset_name,
            model_name,
            test_unit_str,
            trial_str,
            "history.csv"
        ])
        history_path = os.path.join(
            trial_dir, history_name)
        try:
            history_df = pd.read_csv(history_path)
            val_loss = min(history_df["Validation loss"])
        except:
            val_loss = np.inf
        val_losses.append(val_loss)

    val_losses = np.array(val_losses)
    best_trial_id = np.argmin(val_losses)
    best_trial_str = "trial-" + str(best_trial_id)
    best_trial_dir = os.path.join(test_unit_dir, best_trial_str)
    predictions_name = "_".join([
        dataset_name,
        model_name,
        "test-" + test_unit_str,
        best_trial_str,
        "predict-" + test_unit_str,
        "clip-predictions.csv"
    ])
    predictions_path = os.path.join(
        best_trial_dir, predictions_name)
    pd.read_csv(predictions_path)
    #print(test_unit_str, best_trial_str)

EmptyDataError: No columns to parse from file

In [31]:
pd.read_csv(predictions_path, ",")

Unnamed: 0,Unnamed: 1,Unnamed: 2,Dataset,Test unit,Prediction unit,Timestamp,Key,Predicted probability
BirdVox-70k,unit01,unit01,23161,0,original,unit01_000023161_00000_0_original,0,0.458793
BirdVox-70k,unit01,unit01,226742,0,original,unit01_000226742_00000_0_original,0,0.458793
BirdVox-70k,unit01,unit01,442514,0,original,unit01_000442514_00000_0_original,0,0.458793
BirdVox-70k,unit01,unit01,466895,0,original,unit01_000466895_00000_0_original,0,0.098814
BirdVox-70k,unit01,unit01,548571,0,original,unit01_000548571_00000_0_original,0,0.458793
BirdVox-70k,unit01,unit01,816761,0,original,unit01_000816761_00000_0_original,0,0.458793
BirdVox-70k,unit01,unit01,1483580,0,original,unit01_001483580_00000_0_original,0,0.998421
BirdVox-70k,unit01,unit01,1499428,0,original,unit01_001499428_00000_0_original,0,0.860690
BirdVox-70k,unit01,unit01,2972038,0,original,unit01_002972038_00000_0_original,0,0.414230
BirdVox-70k,unit01,unit01,3190247,0,original,unit01_003190247_00000_0_original,0,0.458793


In [19]:
predictions_path = os.path.join(trial_dir, predictions_name)
pd.read_csv(predictions_path)

FileNotFoundError: File b'/scratch/vl1019/BirdVox-70k_models/icassp-ntt-convnet_aug-all-but-noise/unit10/trial-9/BirdVox-70k_icassp-ntt-convnet_aug-all-but-noise_test-unit10_trial-6_predict-unit10_clip-predictions.csv' does not exist

In [16]:
predictions_name

'BirdVox-70k_icassp-ntt-convnet_aug-all-but-noise_test-unit10_trial-6_predict-unit10_clip-predictions.csv'

In [21]:
trial_dir

'/scratch/vl1019/BirdVox-70k_models/icassp-ntt-convnet_aug-all-but-noise/unit10/trial-9'