In [None]:
import time
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pickle
import glob
# import tabulate
# from IPython.display import HTML, display
# import tabletext
import pandas as pd

from runstatistics import RunStatistics

In [None]:
experiment_name = 'FDENSER256_10/FDENSER*'
# experiment_name = 'DECAY256X/DECAY*'

experiments_path = 'D:/experiments/'
# experiments_path = '/content/gdrive/MyDrive/experiments/'

if '*' in experiment_name:
	folders = sorted(glob.glob(experiments_path + experiment_name))
else:
	folders = [experiments_path + experiment_name]

stats = []
for i, path in enumerate(folders):
	if i == 0 or i < len(folders):
		with open(path + '/statistics.pkl', 'rb') as f:
			stats.append(pickle.load(f))

if not stats:
	raise FileNotFoundError(f"No statistics found in {path}")
stat = stats[0]
while stats[-1].run_generation < stat.run_generation:   # delete last stat if incomplete
	del stats[-1]

def hms(seconds):
	return time.strftime("%H:%M:%S", time.gmtime(seconds))

def print_statistics(stats):
	stat = stats[0]
	print(f"\n{experiment_name}: {stat.run_generation+1} generations")
	total_evaluations = sum(stat.evaluations_total for stat in stats)
	k_fold_evaluations = sum(stat.evaluations_k_folds for stat in stats)
	cache_hits = sum(stat.evaluations_cache_hits for stat in stats)
	invalid = sum(stat.evaluations_invalid for stat in stats)
	print(f"{total_evaluations} evaluations, " + (f" {k_fold_evaluations} for k-folds), " if k_fold_evaluations else "") + f"{cache_hits} cache hits, {invalid} invalid")
	run_time = sum(stat.run_time for stat in stats)
	eval_time = sum(stat.eval_time for stat in stats)
	eval_time_this_run = sum(stat.eval_time_this_run for stat in stats)
	eval_time_k_folds = sum(stat.eval_time_k_folds for stat in stats)
	eval_time_k_folds_this_run = sum(stat.eval_time_k_folds_this_run for stat in stats)
	print(f"runtime {hms(run_time)}, evaluation time {hms(eval_time)} (this run {hms(eval_time_this_run)})" + (f", k-folds: {hms(eval_time_k_folds)} (this run {hms(eval_time_k_folds_this_run)})" if eval_time_k_folds else ""))

def reduced_legend(ax, population_size, additional_entries=1):
	""" hide labels for population plots except one """
	handles, labels = ax.get_legend_handles_labels()
	display = [0] + [i for i in range(population_size, population_size + additional_entries)]
	ax.legend([handle for i,handle in enumerate(handles) if i in display],
				[label for i,label in enumerate(labels) if i in display], loc = 'best', fontsize=15)
	ax.grid(True)

def default_ax():
	fig, ax = plt.subplots(figsize = (8, 6))
	return ax

def plot_metric(stat, m, ax=None):
	if ax is None:
		ax = default_ax()
	generation_metric = np.array(stat.metric_generation(m))
	(ngenerations, population_size) = generation_metric.shape
	xscale = np.arange(0, ngenerations)
	ax.set_title(f"{stat.metric_name(m)} (best: {round(stat.best.metric(m)[-1], 4)})", fontsize=25)
	for i in range(population_size):
		ax.plot(generation_metric[:, i], 'o', markersize=4, color ='#C0C0C0', alpha=0.5, label = 'population', zorder=-32)
	if len(stat.best.metric_k_fold(m)):
		ax.plot(xscale, stat.best.metric_k_fold(m), '*-', color='cyan', alpha=1, label="K-folds of best")
		ax.errorbar(xscale, stat.best.metric_k_fold(m), yerr = stat.best.metric_k_fold_std(m), color='cyan', alpha=1, zorder=10)
	ax.plot(stat.best_in_gen.metric(m), '*-', color = 'magenta', alpha=0.5, label='best in generation')
	ax.plot(stat.best.metric(m), '*-', color = 'blue', alpha=0.5, label='best')
	ax.set_xlim(0, ngenerations)
	ax.set_ylim(RunStatistics.metric_ylimits(m))
	if m == 1:
		ax.yaxis.set_major_locator(ticker.MultipleLocator(10000))
	reduced_legend(ax, population_size, 2)

def plot_different_accuracies(stat, ax=None):
	if ax is None:
		ax = default_ax()
	ngenerations = stat.run_generation + 1
	xscale = np.arange(0, ngenerations)
	ax.set_title('Different Accuracy Measures', fontsize=25)
	ax.plot(stat.best.train_accuracy, label="training accuracy")
	ax.plot(stat.best.val_accuracy, label="validation accuracy")
	ax.plot(stat.best.accuracy, color='blue', label="(test) accuracy")
	ax.plot(stat.best.final_test_accuracy, label="final test accuracy")
	if len(stat.best.k_fold_accuracy):
		ax.plot(xscale, stat.best.k_fold_accuracy, '*-', color='cyan', label="avg K-fold accuracy")
		ax.errorbar(xscale, stat.best.k_fold_accuracy, yerr = stat.best.k_fold_accuracy_std, color='cyan', zorder=10)
		ax.plot(stat.best.k_fold_final_accuracy, label="avg K-fold final accuracy")
	ax.set_xlim(0, ngenerations)
	ax.set_ylim(0.95, 1.0)
	ax.legend(fontsize=12)
	ax.grid(True)

def multi_plot_metric(stats, m, ax=None):
	if ax is None:
		ax = default_ax()
	nruns = len(stats)
	all_metrics = np.hstack([stat.metric_generation(m) for stat in stats])
	(ngenerations, all_population_size) = all_metrics.shape
	ax.set_title(f"{stats[0].metric_name(m)} over {nruns} runs", fontsize=25)
	for i in range(all_population_size):
		ax.plot(all_metrics[:, i], 'o', markersize=4, color ='#C0C0C0', alpha=0.5, label = 'population', zorder=-32)
	for stat in stats:
		ax.plot(stat.best.metric(m), '-', color = 'blue', alpha=0.5, label='best')
	ax.set_xlim(0, ngenerations)
	ax.set_ylim(RunStatistics.metric_ylimits(m))
	if m == 1:
		ax.yaxis.set_major_locator(ticker.MultipleLocator(10000))
	reduced_legend(ax, all_population_size)

def calculate_statistics(stat, m):
	values = [stat.best.metric(m)[-1] for stat in stats]
	worst = np.min(values)
	best = np.max(values)
	best_index = np.argmax(values)
	if m == 1:
		best, worst = worst, best
		best_index = np.argmin(values)
	return np.mean(values), np.std(values), worst, best, best_index

print_statistics(stats)
if len(stats) > 1:
	multi_plot_metric(stats, 0)
	multi_plot_metric(stats, 1)
	multi_plot_metric(stats, 2)

	print()
	columns = ['_', 'Average', 'Std', 'Worst', 'Best', 'Best run']
	data = []
	for m in range(0, 2+1):
		mean, std, worst, best, best_index = calculate_statistics(stat, m)
		data.append([stat.metric_name(m), mean, std, worst, best, best_index])
	df = pd.DataFrame(data, columns=columns)
	display(df)
	best_parameter_index = data[2][5]
	stat = stats[best_parameter_index]

plot_metric(stat, 0)
plot_metric(stat, 1)
plot_metric(stat, 2)
plot_different_accuracies(stat)

In [None]:
if stat.stepwidth_stats:
	fig, ax = plt.subplots()
	for (module, var, values) in stat.stepwidth_stats:
		ax.plot(values, label=f"{module}/{var}")
	ax.legend()