In [2]:
import csv
import numpy as np
import os
import pandas as pd
import scipy.interpolate
import sklearn.metrics
import sys
sys.path.append("../src")
import localmodule


if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO


from matplotlib import pyplot as plt
%matplotlib inline

# Define constants.
dataset_name = localmodule.get_dataset_name()
models_dir = localmodule.get_models_dir()
units = localmodule.get_units()
n_units = len(units)
n_trials = 10


In [77]:
# Define model directory
model_name = "pcen-convnet"
aug_kind_str = "all"
if not aug_kind_str == "none":
    model_name = "_".join([model_name, "aug-" + aug_kind_str])
model_dir = os.path.join(models_dir, model_name)


for test_unit_id in range(6):

    test_unit_str = units[test_unit_id]
    test_unit_dir = os.path.join(model_dir, test_unit_str)

    val_losses = []
    for trial_id in range(n_trials):
        trial_str = "trial-" + str(trial_id)
        trial_dir = os.path.join(test_unit_dir, trial_str)
        history_name = "_".join([
            dataset_name,
            model_name,
            test_unit_str,
            trial_str,
            "history.csv"
        ])
        history_path = os.path.join(
            trial_dir, history_name)
        try:
            history_df = pd.read_csv(history_path)
            val_loss = min(history_df["Validation loss"])
        except:
            val_loss = np.inf
        val_losses.append(val_loss)

    val_losses = np.array(val_losses)
    best_trial_id = np.argmin(val_losses)
    best_trial_str = "trial-" + str(best_trial_id)
    best_trial_dir = os.path.join(test_unit_dir, best_trial_str)
    predictions_name = "_".join([
        dataset_name,
        model_name,
        "test-" + test_unit_str,
        best_trial_str,
        "predict-" + test_unit_str,
        "clip-predictions.csv"
    ])
    print(val_losses)
    print(test_unit_str, best_trial_str)
    prediction_path = os.path.join(
        best_trial_dir, predictions_name)

    # Load prediction.
    with open(prediction_path, 'r') as f:
        reader = csv.reader(f)
        rows = list(reader)
    rows = [",".join(row) for row in rows]
    rows = rows[1:]
    rows = "\n".join(rows)

    # Parse rows with correct header.
    df = pd.read_csv(StringIO(rows),
        names=[
            "Dataset",
            "Test unit",
            "Prediction unit",
            "Timestamp",
            "Center Freq (Hz)",
            "Augmentation",
            "Key",
            "Ground truth",
            "Predicted probability"])
    y_pred = np.array(df["Predicted probability"])
    y_pred = (y_pred > 0.5).astype('int')

    # Load ground truth.
    y_true = np.array(df["Ground truth"])

    # Compute confusion matrix.
    tn, fp, fn, tp = sklearn.metrics.confusion_matrix(
        y_true, y_pred).ravel()

    print(100 * (tn+tp) / (tn+fp+fn+tp))
    print()

[ 0.70647608  0.70191752  4.80288719  0.70334759  0.63155138  0.15075385
  3.54959925         inf         inf         inf]
unit01 trial-5
63.1920710868

[ 4.43504432  0.70762155  1.05082178  0.69717627  4.21015886  0.40485319
  3.23831678         inf         inf         inf]
unit02 trial-5
92.177589852

[ 0.75255397  0.72310425  0.67721544  0.39027634  0.4300449   0.40504112
  3.02420958         inf         inf         inf]
unit03 trial-3
97.7559530341

[ 0.69350966  4.5609372   2.95516487  0.71740825  0.72541278  4.40426246
  0.71833662         inf         inf         inf]
unit05 trial-0


FileNotFoundError: [Errno 2] No such file or directory: '/scratch/vl1019/BirdVox-70k_models/pcen-convnet_aug-all/unit05/trial-0/BirdVox-70k_pcen-convnet_aug-all_test-unit05_trial-0_predict-unit05_clip-predictions.csv'

In [16]:
prediction_path

'/scratch/vl1019/BirdVox-70k_models/pcen-convnet/unit01/trial-2/BirdVox-70k_pcen-convnet_test-unit01_trial-2_predict-unit01_clip-predictions.csv'