In [1]:
import pandas as pd
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.ticker import ScalarFormatter
import numpy as np

In [2]:
def compute_mean_std_table(data: pd.DataFrame, column: str) -> pd.DataFrame:
    return round(
        data.groupby(column)
        .agg(
            {
                "Min Fidelity": ["mean", "std"],
            }
        )
        .rename_axis(column),
        4,
    )


def compute_f_statistic_p_value(
    data: pd.DataFrame, column: str
) -> pd.DataFrame:
    grouped_data = data.groupby(column)

    group_values = [group["Min Fidelity"].values for _, group in grouped_data]

    f_statistic, p_value = f_oneway(*group_values)

    return round(f_statistic, 4), round(p_value, 4)

In [3]:
excel_file_name = './excel_files/hyperparameter_tuning_results_hill_climbing.xlsx'

In [4]:
data = pd.read_excel(excel_file_name)
data = data.round(4)

In [5]:
data.sort_values(by="Min Fidelity", inplace=True, ascending=False)
data.head(3)

Unnamed: 0,Sequence Length,Num Iters,Std,Init Sol Type,Num of Experiments,Min Fidelity,I,X,Y,Z,H,R_X_PI/4
0,32,1000,0.0312,noiseless_ideal,1000,0.998,0.998,0.9987,0.9997,0.9991,0.9986,0.9985
1,32,500,0.0312,noiseless_ideal,500,0.9979,0.9983,0.9982,0.9994,0.9991,0.9987,0.9979
2,8,1000,0.0312,noiseless_ideal,1000,0.9973,0.9987,0.9982,0.9974,0.9985,0.9975,0.9973


In [6]:
mean_min_fidelity = round(data["Min Fidelity"].mean(), 4)
std_min_fidelity = round(data["Min Fidelity"].std(), 4)
print(f"Mean min fidelity: {mean_min_fidelity} +- {std_min_fidelity}")

Mean min fidelity: 0.9286 +- 0.1301


In [7]:
hyperparameters = [
    "Sequence Length",
    "Num Iters",
    "Std",
    "Init Sol Type",
    'Num of Experiments'
]


for hp in hyperparameters:
    display(compute_mean_std_table(data, hp))
    f_statistic, p_value = compute_f_statistic_p_value(data, hp)
    print(f"F-statistic: {f_statistic}, P-value: {p_value}")

Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Sequence Length,Unnamed: 1_level_2,Unnamed: 2_level_2
4,0.944,0.0808
8,0.9527,0.0758
16,0.9658,0.0385
32,0.9666,0.0392
64,0.9722,0.039
128,0.9273,0.1469
256,0.8716,0.1968
512,0.8288,0.2043


F-statistic: 8.6571, P-value: 0.0


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Num Iters,Unnamed: 1_level_2,Unnamed: 2_level_2
125,0.8983,0.1401
250,0.9237,0.1358
500,0.9503,0.1056
1000,0.9422,0.1321


F-statistic: 3.0717, P-value: 0.0278


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Std,Unnamed: 1_level_2,Unnamed: 2_level_2
0.0312,0.8644,0.2228
0.0625,0.9528,0.0978
0.125,0.9548,0.0379
0.25,0.9426,0.0448


F-statistic: 11.4297, P-value: 0.0


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Init Sol Type,Unnamed: 1_level_2,Unnamed: 2_level_2
noiseless_ideal,0.9714,0.0728
normal_distro,0.902,0.1512
uniform_distro,0.9125,0.1417


F-statistic: 11.1402, P-value: 0.0


Unnamed: 0_level_0,Min Fidelity,Min Fidelity
Unnamed: 0_level_1,mean,std
Num of Experiments,Unnamed: 1_level_2,Unnamed: 2_level_2
125,0.8983,0.1401
250,0.9237,0.1358
500,0.9503,0.1056
1000,0.9422,0.1321


F-statistic: 3.0717, P-value: 0.0278


In [8]:
posthoc = pairwise_tukeyhsd(data['Min Fidelity'], data["Init Sol Type"], alpha=0.05)

display(posthoc.summary())

group1,group2,meandiff,p-adj,lower,upper,reject
noiseless_ideal,normal_distro,-0.0694,0.0,-0.1067,-0.0321,True
noiseless_ideal,uniform_distro,-0.0589,0.0007,-0.0962,-0.0216,True
normal_distro,uniform_distro,0.0105,0.7838,-0.0268,0.0478,False


In [9]:
use_pdf = True
use_png = not use_pdf
if use_pdf:
    mpl.use("pdf")
file_extension = "pdf" if use_pdf else "png"

In [10]:
default_blue = plt.rcParams['axes.prop_cycle'].by_key()['color'][0]

In [11]:
report_path= "/home/chriswise/github/Honours-Research-ML-for-QC/Report/sections/grad_free_results/figures/"
windows_path = "/mnt/c/Users/ChrisWiseLocal/OneDrive/Documents/Uni/UNSW/2023/Honours Research/Seminars/report_photos/"

In [12]:
path_to_save = report_path if use_pdf else windows_path

In [13]:
width = 3.487
height = width / 1.618
alpha_value = 0.75
title_font_size = 9
plt.rc("font", family="serif", serif="cm10")
plt.rc("text", usetex=True)
plt.rc("axes", labelsize=title_font_size - 2)

In [14]:
mean_min_fidelity = data.groupby("Sequence Length")["Min Fidelity"].mean()

In [15]:
print(sorted(data["Sequence Length"].unique()))

[4, 8, 16, 32, 64, 128, 256, 512]


In [16]:
# width as measured in inkscape
width = 3.487
height = width / 1.618
alpha_value = 0.85
title_font_size = 9

fig, ax = plt.subplots(figsize=(width, height))

plt.rc("font", family="serif", serif="Times")
plt.rc("text", usetex=True)
plt.rc("axes", labelsize=title_font_size - 2)

plt.suptitle(
    "Mean Minimum Fidelity for Sequence Lengths (Hill Climbing)",
    fontsize=title_font_size,
)

sequence_lengths = sorted(data["Sequence Length"].unique())

mean_min_fidelity = data.groupby("Sequence Length")["Min Fidelity"].mean()
std_min_fidelity = data.groupby("Sequence Length")["Min Fidelity"].std()

upper_bounds = np.clip(mean_min_fidelity + std_min_fidelity, 0, 1)
lower_bounds = np.clip(mean_min_fidelity - std_min_fidelity, 0, 1)

corrected_std_positive = upper_bounds - mean_min_fidelity
corrected_std_negative = mean_min_fidelity - lower_bounds

ax.plot(sequence_lengths, mean_min_fidelity, color=default_blue, alpha=alpha_value)
ax.errorbar(
    sequence_lengths,
    mean_min_fidelity,
    yerr=[corrected_std_negative, corrected_std_positive],
    fmt=".",
    capsize=2,
    capthick=1,
    color=default_blue,
    alpha=alpha_value,
)
ax.set_xscale("log", base=2)

plt.xticks(sequence_lengths, fontsize=title_font_size - 2)
plt.yticks(fontsize=title_font_size - 2)

plt.xlabel("Sequence Length", fontsize=title_font_size - 2)
plt.ylabel("Mean Minimum Fidelity", fontsize=title_font_size - 2)
plt.ylim(-0.1, 1.1)

formatter = ScalarFormatter()
formatter.set_scientific(False)
ax.xaxis.set_major_formatter(formatter)


# fig.tight_layout()
plt.savefig(
    path_to_save + f"hc_results_sequence_length.{file_extension}",
    dpi=500,
    bbox_inches="tight",
)
# plt.show()

In [17]:
# width as measured in inkscape
width = 3.487
height = width / 1.618
alpha_value = 0.85
title_font_size = 9

fig, ax = plt.subplots(figsize=(width, height))

plt.rc("font", family="serif", serif="Times")
plt.rc("text", usetex=True)
plt.rc("axes", labelsize=title_font_size - 2)

plt.suptitle(
    "Mean Minimum Fidelity for Experiments Performed (Hill Climbing)",
    fontsize=title_font_size,
)

num_experiments = sorted(data["Num of Experiments"].unique())

mean_min_fidelity = data.groupby("Num of Experiments")["Min Fidelity"].mean()
std_min_fidelity = data.groupby("Num of Experiments")["Min Fidelity"].std()

upper_bounds = np.clip(mean_min_fidelity + std_min_fidelity, 0, 1)
lower_bounds = np.clip(mean_min_fidelity - std_min_fidelity, 0, 1)

corrected_std_positive = upper_bounds - mean_min_fidelity
corrected_std_negative = mean_min_fidelity - lower_bounds

ax.plot(
    num_experiments, mean_min_fidelity, color=default_blue, alpha=alpha_value
)
ax.errorbar(
    num_experiments,
    mean_min_fidelity,
    yerr=[corrected_std_negative, corrected_std_positive],
    fmt=".",
    capsize=2,
    capthick=1,
    color=default_blue,
    alpha=alpha_value,
)

# ax.set_xscale("log", base=2)
plt.xticks([100 * x for x in range(0, 11)], fontsize=title_font_size - 2)
plt.yticks(fontsize=title_font_size - 2)

plt.xlabel("Relative Number of Experiments Performed", fontsize=title_font_size - 2)
plt.ylabel("Mean Minimum Fidelity", fontsize=title_font_size - 2)
plt.ylim(-0.1, 1.1)

formatter = ScalarFormatter()
formatter.set_scientific(False)
ax.xaxis.set_major_formatter(formatter)


# fig.tight_layout()
plt.savefig(
    path_to_save + f"hc_results_num_experiments.{file_extension}",
    dpi=500,
    bbox_inches="tight",
)
# plt.show()