In [None]:
from matplotlib.ticker import MaxNLocator
from statistics import mean, median, stdev
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

%reload_ext autoreload
%autoreload 2

pd.set_option('display.max_rows', None)

In [None]:
datasets = ["abalone", "adult", "cancer", "card", "covtype", "gene", "glass", "heart", "horse", "madelon", "optdigits", "page-blocks", "pendigits", "poker", "satimage", "segmentation", "shuttle", "soybean", "spect", "thyroid", "vehicle", "waveform"]
reasons = ["consecincrease", "noprogress", "maxiter"]
fullreasons = ["$UP_4$", "$P_k(t) < 0.1$", "$t > 3000$"]
reasonmarkers = ["o", "s", "d"]

In [None]:
df = pd.read_csv("../log/performance_mlp.txt")
df["epochoverhead"] = df.totalepochs - df.bestepoch
# df = pd.pivot_table(df, index="dataset", aggfunc=median)

In [None]:
# Epoch overhead box plots
ZOOM3000 = False
zoomtext = "3000" if ZOOM3000 else "max"
fig, axs = plt.subplots(22, 1, figsize=(12, 18))
fig.tight_layout()
boxprops = dict(color="b")
flierprops = dict(markeredgecolor="#D3691D", markersize=8)
medianprops = dict(color="darkred")
whiskerprops = dict(color="b")
axi = 0
for dataset in datasets:
    ax = axs[axi]
    data = df[df.dataset ==  dataset].epochoverhead.to_list()
    bp = ax.boxplot(data, sym=".", widths=.5, boxprops=boxprops, medianprops=medianprops, whiskerprops=whiskerprops, flierprops=flierprops, patch_artist=True, vert=False)
    bp["boxes"][0].set_facecolor("azure")
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.tick_params(labelsize=12)
    ax.set_yticklabels([dataset])
    ax.set_xlim(left=0, right=3000) if ZOOM3000 else ax.set_xlim(left=0)
    ax.grid(True, color="#DDDDDD")
    if axi == 21: ax.set_xlabel("Number of epochs", fontsize=12)
    axi += 1
fig.suptitle(f"Distributions of epoch overheads per dataset (0 to {zoomtext} zoom)", fontsize=24)
fig.subplots_adjust(top=0.96, hspace=0.65)
plt.show()

In [None]:
# Epoch overhead scatter plot
rlimit = 3000
fig = plt.figure(figsize=(14, 14))
plt.rcParams.update({"font.size": 13})
dummy = plt.scatter(np.linspace(0, len(datasets), len(datasets)), list(reversed(datasets)))
dummy.remove() # dummy is to set the correct order for the Y axis
for r in range(3):
    data = df[df.stopreason == reasons[r]]
    plt.scatter(data.epochoverhead, data.dataset, c=f"C{r}", marker=reasonmarkers[r], s=60, alpha=0.5)
plt.legend(fullreasons, title="Stopping reason")
plt.xlim(left = -rlimit*0.05, right = rlimit*1.05)
plt.xlabel("epoch number")
plt.title(f"Distributions of epoch overheads per dataset (0 to {rlimit} zoom)", fontsize=24)
plt.grid(True, axis="x", color="#DDDDDD")
plt.show()

In [None]:
# Best epoch scatter plot
rlimit = 3000
fig = plt.figure(figsize=(14, 14))
plt.rcParams.update({"font.size": 13})
dummy = plt.scatter(np.linspace(0, len(datasets), len(datasets)), list(reversed(datasets)))
dummy.remove() # dummy is to set the correct order for the Y axis
for r in range(3):
    data = df[df.stopreason == reasons[r]]
    plt.scatter(data.bestepoch, data.dataset, c=f"C{r}", marker=reasonmarkers[r], s=60, alpha=0.5)
plt.legend(fullreasons, title="Stopping reason")
plt.xlim(left = -rlimit*0.05, right = rlimit*1.05)
plt.xlabel("epoch number")
plt.title(f"Distributions of $t_{{best}}$ values per dataset (0 to {rlimit} zoom)", fontsize=24)
plt.grid(True, axis="x", color="#DDDDDD")
plt.show()

In [None]:
meddf = pd.pivot_table(df.drop(["stopreason", "nin", "nout", "run", "atest", "atrain", "loss"], axis=1), index="dataset", aggfunc=median)
meddf = meddf[["ftrain", "ftest", "time", "bestepoch", "totalepochs", "epochoverhead"]]
fullmeddf = meddf.copy()
meddf.ftrain = meddf.ftrain.apply(round, args=(3,))
meddf.ftest = meddf.ftest.apply(round, args=(3,))
meddf.time = meddf.time.apply(lambda t: "%s" % float("%.4g" % t))
meddf.to_csv("mlp_results.csv")

In [None]:
llsqtime = pd.pivot_table(pd.read_csv("../log/performance_llsqreg.txt")[["dataset", "time"]], index="dataset", aggfunc=median).time
reltimes = fullmeddf.time / llsqtime
reltimes
# reltimes.aggregate(mean, axis="index")