In [1]:
import pandas as pd
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.ticker import ScalarFormatter
import numpy as np

In [2]:
def compute_mean_std_table(data: pd.DataFrame, column: str) -> pd.DataFrame:
    return round(
        data.groupby(column)
        .agg(
            {
                "Min Fidelity": ["mean", "std"],
            }
        )
        .rename_axis(column),
        4,
    )


def compute_f_statistic_p_value(
    data: pd.DataFrame, column: str
) -> pd.DataFrame:
    grouped_data = data.groupby(column)

    group_values = [group["Min Fidelity"].values for _, group in grouped_data]

    f_statistic, p_value = f_oneway(*group_values)

    return round(f_statistic, 4), round(p_value, 4)

In [3]:
excel_file_name = './excel_files/hyperparameter_tuning_results_genetic_algorithms.xlsx'

In [4]:
data = pd.read_excel(excel_file_name)
data = data.round(5)

In [5]:
data.sort_values(by="Min Fidelity", inplace=True, ascending=False)
data.head(3)

Unnamed: 0,Sequence Length,Std,Population Size,Num Generations,Crossover Rate,Elitism Rate,Init Sol Type,Num of Experiments,Min Fidelity,I,X,Y,Z,H,R_X_PI/4
0,32,0.03125,40,25,0.2,0.24,noiseless_ideal,1000,0.99736,0.99831,0.99789,0.99746,0.99736,0.9982,0.998
1,4,0.03125,40,25,0.8,0.12,noiseless_ideal,1000,0.99725,0.99785,0.99844,0.99767,0.99839,0.9974,0.99725
2,32,0.03125,40,25,0.4,0.24,noiseless_ideal,1000,0.99629,0.99757,0.99809,0.99872,0.9975,0.99629,0.99751


In [6]:
mean_min_fidelity = round(data["Min Fidelity"].mean(), 4)
std_min_fidelity = round(data["Min Fidelity"].std(), 4)
print(f"Mean min fidelity: {mean_min_fidelity} +- {std_min_fidelity}")

Mean min fidelity: 0.8694 +- 0.1934


In [7]:
hyperparameters = [
    "Sequence Length",
    "Std",
    "Population Size",
    "Num Generations",
    "Crossover Rate",
    "Init Sol Type",
    "Elitism Rate",
    "Num of Experiments"
]


for hp in hyperparameters:
    display(compute_mean_std_table(data, hp))
    f_statistic, p_value = compute_f_statistic_p_value(data, hp)
    print(f"F-statistic: {f_statistic}, P-value: {p_value}")

Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Sequence Length,Unnamed: 1_level_2,Unnamed: 2_level_2
4,0.911,0.0744
8,0.9121,0.0778
16,0.9143,0.0838
32,0.9089,0.1176
64,0.8905,0.1575
128,0.8685,0.1982
256,0.8147,0.2663
512,0.7356,0.3219


F-statistic: 87.464, P-value: 0.0


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Std,Unnamed: 1_level_2,Unnamed: 2_level_2
0.03125,0.8329,0.2711
0.0625,0.8813,0.1674
0.125,0.8941,0.093


F-statistic: 55.1345, P-value: 0.0


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Population Size,Unnamed: 1_level_2,Unnamed: 2_level_2
10,0.8211,0.2144
20,0.8787,0.1843
40,0.9085,0.1684


F-statistic: 106.2798, P-value: 0.0


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Num Generations,Unnamed: 1_level_2,Unnamed: 2_level_2
6,0.7906,0.2447
13,0.8846,0.1704
25,0.9331,0.1132


F-statistic: 301.1008, P-value: 0.0


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Crossover Rate,Unnamed: 1_level_2,Unnamed: 2_level_2
0.2,0.8706,0.1959
0.4,0.8705,0.1915
0.8,0.8672,0.1928


F-statistic: 0.2022, P-value: 0.8169


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Init Sol Type,Unnamed: 1_level_2,Unnamed: 2_level_2
noiseless_ideal,0.9465,0.048
normal_distro,0.7586,0.2863
uniform_distro,0.9033,0.0927


F-statistic: 608.0267, P-value: 0.0


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Elitism Rate,Unnamed: 1_level_2,Unnamed: 2_level_2
0.06,0.8694,0.192
0.12,0.8722,0.1926
0.24,0.8667,0.1956


F-statistic: 0.3946, P-value: 0.674


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Num of Experiments,Unnamed: 1_level_2,Unnamed: 2_level_2
60,0.7219,0.2559
120,0.8012,0.2355
130,0.8378,0.1929
240,0.8487,0.2249
250,0.9037,0.1355
260,0.8955,0.16
500,0.9395,0.1042
520,0.9207,0.1442
1000,0.9561,0.0886


F-statistic: 111.6837, P-value: 0.0


In [8]:
posthoc = pairwise_tukeyhsd(data['Min Fidelity'], data['Std'], alpha=0.05)
print(posthoc)

Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1 group2 meandiff p-adj   lower  upper  reject
----------------------------------------------------
0.03125 0.0625   0.0484    0.0   0.034 0.0628   True
0.03125  0.125   0.0612    0.0  0.0468 0.0756   True
 0.0625  0.125   0.0128 0.0944 -0.0016 0.0272  False
----------------------------------------------------


In [9]:
use_pdf = True
use_png = not use_pdf
if use_pdf:
    mpl.use("pdf")
file_extension = "pdf" if use_pdf else "png"

In [10]:
report_path= "/home/chriswise/github/Honours-Research-ML-for-QC/Report/sections/grad_free_results/figures/"
windows_path = "/mnt/c/Users/ChrisWiseLocal/OneDrive/Documents/Uni/UNSW/2023/Honours Research/Seminars/report_photos/"

In [11]:
path_to_save = report_path if use_pdf else windows_path

In [12]:
width = 3.487
height = width / 1.618
alpha_value = 0.75
title_font_size = 9
plt.rc("font", family="serif", serif="cm10")
plt.rc("text", usetex=True)
plt.rc("axes", labelsize=title_font_size - 2)

In [13]:
mean_min_fidelity = data.groupby("Sequence Length")["Min Fidelity"].mean()

In [14]:
print(sorted(data["Sequence Length"].unique()))

[4, 8, 16, 32, 64, 128, 256, 512]


In [15]:
default_blue = plt.rcParams['axes.prop_cycle'].by_key()['color'][0]

In [16]:
# width as measured in inkscape
width = 3.487
height = width / 1.618
alpha_value = 0.85
title_font_size = 9

fig, ax = plt.subplots(figsize=(width, height))

plt.rc("font", family="serif", serif="Times")
plt.rc("text", usetex=True)
plt.rc("axes", labelsize=title_font_size - 2)

plt.suptitle(
    "Mean Minimum Fidelity for Sequence Lengths (Genetic Algorithms)",
    fontsize=title_font_size,
)

sequence_lengths = sorted(data["Sequence Length"].unique())

mean_min_fidelity = data.groupby("Sequence Length")["Min Fidelity"].mean()
std_min_fidelity = data.groupby("Sequence Length")["Min Fidelity"].std()

upper_bounds = np.clip(mean_min_fidelity + std_min_fidelity, 0, 1)
lower_bounds = np.clip(mean_min_fidelity - std_min_fidelity, 0, 1)

corrected_std_positive = upper_bounds - mean_min_fidelity
corrected_std_negative = mean_min_fidelity - lower_bounds

ax.plot(sequence_lengths, mean_min_fidelity, color=default_blue, alpha=alpha_value)
ax.errorbar(
    sequence_lengths,
    mean_min_fidelity,
    yerr=[corrected_std_negative, corrected_std_positive],
    fmt=".",
    capsize=2,
    capthick=1,
    color=default_blue,
    alpha=alpha_value,
)
ax.set_xscale("log", base=2)

plt.xticks(sequence_lengths, fontsize=title_font_size - 2)
plt.yticks(fontsize=title_font_size - 2)

plt.xlabel("Sequence Length", fontsize=title_font_size - 2)
plt.ylabel("Mean Minimum Fidelity", fontsize=title_font_size - 2)
plt.ylim(-0.1, 1.1)

formatter = ScalarFormatter()
formatter.set_scientific(False)
ax.xaxis.set_major_formatter(formatter)


# fig.tight_layout()
plt.savefig(
    path_to_save + f"ga_results_sequence_length.{file_extension}",
    dpi=500,
    bbox_inches="tight",
)
# plt.show()

In [17]:
# width as measured in inkscape
width = 3.487
height = width / 1.618
alpha_value = 0.85
title_font_size = 9

fig, ax = plt.subplots(figsize=(width, height))

plt.rc("font", family="serif", serif="Times")
plt.rc("text", usetex=True)
plt.rc("axes", labelsize=title_font_size - 2)

plt.suptitle(
    "Mean Minimum Fidelity for Experiments Performed (Genetic Algorithms)",
    fontsize=title_font_size,
)

num_experiments = sorted(data["Num of Experiments"].unique())

mean_min_fidelity = data.groupby("Num of Experiments")["Min Fidelity"].mean()
std_min_fidelity = data.groupby("Num of Experiments")["Min Fidelity"].std()

upper_bounds = np.clip(mean_min_fidelity + std_min_fidelity, 0, 1)
lower_bounds = np.clip(mean_min_fidelity - std_min_fidelity, 0, 1)

corrected_std_positive = upper_bounds - mean_min_fidelity
corrected_std_negative = mean_min_fidelity - lower_bounds

ax.plot(
    num_experiments, mean_min_fidelity, color=default_blue, alpha=alpha_value
)
ax.errorbar(
    num_experiments,
    mean_min_fidelity,
    yerr=[corrected_std_negative, corrected_std_positive],
    fmt=".",
    capsize=2,
    capthick=1,
    color=default_blue,
    alpha=alpha_value,
)

# ax.set_xscale("log", base=2)
plt.xticks([100 * x for x in range(0, 11)], fontsize=title_font_size - 2)
plt.yticks(fontsize=title_font_size - 2)

plt.xlabel("Relative Number of Experiments Performed", fontsize=title_font_size - 2)
plt.ylabel("Mean Minimum Fidelity", fontsize=title_font_size - 2)
plt.ylim(-0.1, 1.1)

formatter = ScalarFormatter()
formatter.set_scientific(False)
ax.xaxis.set_major_formatter(formatter)


# fig.tight_layout()
plt.savefig(
    path_to_save + f"ga_results_num_experiments.{file_extension}",
    dpi=500,
    bbox_inches="tight",
)
# plt.show()