In [None]:
from math import fsum
import keepsake

In [None]:
experiments = keepsake.experiments.list()
len(experiments)

In [None]:
type(experiments[-1].checkpoints[0])
type(experiments[-1])

In [None]:
experiments[-1]

In [None]:
steps = len(experiments[-1].checkpoints[:-1].metrics["ap"])
s = sum(experiments[-1].checkpoints[:-1].metrics["ap"])
print(steps)

In [None]:
# recall, precision, F-1, avg. iteration duration, total duration
def collect_aggregate_metrics(experiment: keepsake.experiment.Experiment):
    aggregate_metrics = {}
    aggregate_metrics["recall"] = experiment.checkpoints[-1].metrics["calculated_metrics"]["ALL"]["r"]
    aggregate_metrics["precision"] = experiment.checkpoints[-1].metrics["calculated_metrics"]["ALL"]["rels_found"] / experiment.checkpoints[-1].metrics["calculated_metrics"]["ALL"]["last_rel"]
    aggregate_metrics["f-1"] = 2 * ((aggregate_metrics["precision"] * aggregate_metrics["recall"]) / (aggregate_metrics["precision"] + aggregate_metrics["recall"]))
    aggregate_metrics["total_duration"] = experiment.checkpoints[-1].metrics["iteration_duration_seconds"]
    aggregate_metrics["avg_iteration_duration"] = fsum(experiment.checkpoints[:-1].metrics["iteration_duration_seconds"]) / (len(experiment.checkpoints) - 1)
    return aggregate_metrics

# stepwise recall, precision, duration
def collect_stepwise_metrics(experiment: keepsake.experiment.Experiment):
    stepwise_metrics = []
    for checkpoint in experiment.checkpoints[:-1]:
        step_metrics = {}
        step_metrics["iteration"] = checkpoint.metrics["iteration"]
        step_metrics["recall"] = checkpoint.metrics["running_true_recall"]
        step_metrics["sampled_num"] = checkpoint.metrics["sampled_num"]
        step_metrics["precision"] = checkpoint.metrics["running_true_r"] / checkpoint.metrics["sampled_num"]
        stepwise_metrics.append(step_metrics)
    
    return stepwise_metrics


def collect_metrics_from_experiment(experiment: keepsake.experiment.Experiment):
    aggregate_metrics = collect_metrics_from_experiment(experiment)
    stepwise_metrics = collect_stepwise_metrics(experiment)
    return {"aggregate": aggregate_metrics, "stepwise": stepwise_metrics}    
    

In [None]:
for experiment in experiments:
    # baseline AutoTAR and experimental Fuzzy ARTMAP runs
    # experiment.params["vectorizer_type"]
    if "AutoTAR" in experiment.params["run_group"] or "fam" in experiment.params["run_group"]:
        metrics = collect_metrics_from_experiment(experiment)


In [None]:
import matplotlib.pyplot as plt

In [None]:
# experiments[:-1].plot("running_true_recall")
# experiments.plot("running_true_recall")
exp_zero_data_x = []
exp_zero_data_y = []
for chk in experiments[0].checkpoints:
    if chk.metrics["metric_type"] == "step":
        exp_zero_data_y.append(chk.metrics["running_true_recall"])
        exp_zero_data_x.append(chk.metrics["sampled_num"])

In [None]:
exp_one_data_x = []
exp_one_data_y = []
for chk in experiments[1].checkpoints:
    if chk.metrics["metric_type"] == "step":
        exp_one_data_y.append(chk.metrics["running_true_recall"])
        exp_one_data_x.append(chk.metrics["sampled_num"])

In [None]:
plt.plot(exp_zero_data_x, exp_zero_data_y, label="0")
plt.plot(exp_one_data_x, exp_one_data_y, label="1")
plt.xlabel("sample_num")
plt.ylabel("running_true_recall")
plt.legend(bbox_to_anchor=(1, 1))
