In [None]:
from scipy.stats import ttest_rel, ttest_ind
from statistics import stdev
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

%reload_ext autoreload
%autoreload 2

In [None]:
datasets = ["cancer", "card", "gene", "glass", "heart", "horse", "mushroom", "soybean", "thyroid"]

In [None]:
uniform = pd.read_csv("../../log/prelim_ga/ga_uniform_preliminary.txt")
twopoint = pd.read_csv("../../log/prelim_ga/ga_twopoint_preliminary.txt")

In [None]:
diffsf1 = []
diffstime = []
for dataset in datasets:
    unif1 = uniform[uniform.dataset == dataset].ftest
    twof1 = twopoint[twopoint.dataset == dataset].ftest
    stat, pval = ttest_rel(unif1, twof1)
    if stat == 0 or np.isnan(stat):
        diffsf1.append("equal")
    elif stat > 0:
        diffsf1.append(f"Uniform, {pval < 0.05}")
    else:
        diffsf1.append(f"Two-point, {pval < 0.05}")
    unitime = uniform[uniform.dataset == dataset].time
    twotime = twopoint[twopoint.dataset == dataset].time
    stat, pval = ttest_rel(unitime, twotime)
    if stat == 0 or np.isnan(stat):
        diffstime.append("equal")
    elif stat < 0:
        diffstime.append(f"Uniform, {pval < 0.05}")
    else:
        diffstime.append(f"Two-point, {pval < 0.05}")

diffs = pd.DataFrame(np.array([diffsf1, diffstime]).T, index=datasets, columns=["f1", "time"])
diffs.to_csv("ga_ux-vs-2x.csv")

In [None]:
unimeans = pd.pivot_table(uniform.drop(columns=["sel"]), index="dataset")
unistds = pd.pivot_table(uniform.drop(columns=["sel"]), index="dataset", aggfunc=stdev)
twomeans = pd.pivot_table(twopoint.drop(columns=["sel"]), index="dataset")
twostds = pd.pivot_table(twopoint.drop(columns=["sel"]), index="dataset", aggfunc=stdev)
nclass = [datasets[i] + "\n(" + str(unimeans.nin[i]) + "-" + str(unimeans.nout[i]) + ")" for i in range(len(datasets))]

In [None]:
spc = np.arange(len(datasets))
w = 0.45
errstyle = dict(elinewidth=1, capsize=5)
errstyletr = dict(elinewidth=1, capsize=5, alpha=0.4)
fig = plt.figure(figsize=(12, 7))
plt.rcParams.update({"font.size": 13})
plt.bar(spc, unimeans.ftest, width=w, label="UX (test)", edgecolor="k", yerr=unistds.ftest, error_kw=errstyle)
plt.bar(spc, unimeans.ftrain, width=w, label="UX (train)", edgecolor="k", yerr=unistds.ftrain, alpha=0.1, error_kw=errstyletr)
plt.bar(spc + w, twomeans.ftest, width=w, label="2X (test)", edgecolor="k", yerr=twostds.ftest, error_kw=errstyle)
plt.bar(spc + w, twomeans.ftrain, width=w, label="2X (train)", edgecolor="k", yerr=twostds.ftrain, alpha=0.1, error_kw=errstyletr)
plt.legend(loc="lower left")
plt.xticks(spc + w / 2, nclass)
plt.xlabel("Dataset (#features - #classes)", fontsize=18)
plt.ylabel("$F_1$-score (mean $\pm$ stdev)", fontsize=18)
plt.title("Uniform crossover vs Two-point crossover in GA - $F_1$", fontsize=24)

In [None]:
fig = plt.figure(figsize=(12, 7))
plt.rcParams.update({"font.size": 13})
plt.bar(spc, unimeans.time, width=w, label="UX", edgecolor="k", yerr=unistds.time, error_kw=errstyle)
plt.bar(spc + w, twomeans.time, width=w, label="2X", edgecolor="k", color="C2", yerr=twostds.time, error_kw=errstyle)
plt.legend(loc="upper left")
plt.xticks(spc + w / 2, nclass)
plt.xlabel("Dataset (#features - #classes)", fontsize=18)
plt.ylabel("Time in seconds (mean $\pm$ stdev)", fontsize=18)
plt.title("Uniform crossover vs Two-point crossover in GA - training time", fontsize=24)

In [None]:
unimeans, unistds, twomeans, twostds = map(lambda x: x[x.index != "gene"], [unimeans, unistds, twomeans, twostds])
if datasets[2] == "gene":
    del datasets[2]
    del nclass[2]
spc = np.arange(len(datasets))
fig = plt.figure(figsize=(12, 7))
plt.rcParams.update({"font.size": 13})
plt.bar(spc, unimeans.time, width=w, label="UX", edgecolor="k", yerr=unistds.time, error_kw=errstyle)
plt.bar(spc + w, twomeans.time, width=w, label="2X", edgecolor="k", color="C2", yerr=twostds.time, error_kw=errstyle)
plt.legend(loc="upper left")
plt.xticks(spc + w / 2, nclass)
plt.xlabel("Dataset (#features - #classes)", fontsize=18)
plt.ylabel("Time in seconds (mean $\pm$ stdev)", fontsize=18)
plt.title("Uniform crossover vs Two-point crossover in GA - training time", fontsize=24)