In [1]:
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams.update({
#    "text.usetex": True,
#    "font.family": "mathptmx",
    "font.size": 14
})

In [2]:
def get_config_label(config) -> str:
    return config[0] + ', ' + (
        'constr.' if config[1] else 'unconstr.'
    )

In [3]:
df = pd.read_csv('../results/thesis/perf_KBPGP_vs_GPL.csv')

# remove wave power.
df = df[df.Problem != 'Wave power']

for r2_col in ['Train-R2', 'Test-R2', 'Extra-R2']:
    df.loc[df[r2_col] < 0.0, r2_col] = 0.0
    df[r2_col] = df[r2_col].fillna(0.0)


def plot_subgroup(i, config, group, axs):
    # feasibility ratio
        ax = axs[0][i]

        if i == 0: ax.set_ylabel('Fea. ratio')
        ax.set_xlabel(get_config_label(config))
        ax.xaxis.set_label_position('top')
        ax.xaxis.labelpad = 1
        ax.set_ylim(bottom=0.0, top=1.0)
        ax.tick_params(direction='in', length=5)
        
        x_features = ['Train-Fea-Ratio', 'Avg-Train-Fea-Ratio']
        x_labels   = ['Best', 'Avg', 'Worst']
        group.boxplot(column=x_features, ax=ax)
        ax.set_xticklabels(x_labels, rotation=0)

        # data fitting.
        ax = axs[1][i]

        if i == 0: ax.set_ylabel('NMSE')
        ax.set_ylim(bottom=0.0, top=1.0)
        ax.tick_params(direction='in', length=5)
        
        x_features = ['Train-R2', 'Test-R2', 'Extra-R2', 'Avg-Train-R2']
        x_labels   = ['Train', 'Test', 'Extra', 'Avg']
        group.boxplot(column=x_features, ax=ax)
        ax.set_xticklabels(x_labels, rotation=0)


def plot_group(problem, problem_group):
    subgroups = problem_group.groupby(['Data-Config', 'Algo-Config'])
    
    fig, axs = plt.subplots(2, subgroups.ngroups, figsize=((15/6)*subgroups.ngroups, 4), sharey=True)
    fig.suptitle(problem, weight='bold', y=0.0)
    
    for i, (config, group) in enumerate(subgroups):
        plot_subgroup(i, config, group, axs)

In [None]:
df.head(5)

### Confidence Interval

In [None]:
A = 'Test-NMSE'  # A vs B
B = 'Test-Fea-Ratio'

A_measure_name = 'NMSE Test'
B_measure_name = 'Fea. Test'

Algo1 = 'KBP-GP'
Algo2 = 'GP-L'


from scipy import stats
import numpy as np
import matplotlib

fig, axs = plt.subplots(1, 4, figsize=(11, 12), sharey=True)
nproblems = 0
problem_labels = []

cmap = matplotlib.colormaps.get_cmap('magma')
bar_colors = {'nonoise': cmap(0.3), 'noisy': cmap(0.4), 'dataset': cmap(0.0)}
problem_prefix = {'nonoise': '', 'noisy': '-N', 'dataset': '-D'}
fmt = {'nonoise': 'o', 'noisy': '^', 'dataset': 's'}
#fmt = {True: 'o', False: 'x'}
i = 0

for data_conf in ['nonoise', 'noisy', 'dataset']:

    problem_bars = []

    for problem, problem_group in df.loc[df['Data-Config'] == data_conf].groupby('Problem'):

        bars = {ax_idx: [] for ax_idx in range(4)}

        for algo_config, algo_config_group in problem_group.groupby('Algo-Config'):
            
            for measure in [A, B]:
                offset = 0 if measure == A else 2
                ax_idx = offset + (0 if algo_config == 'KBP-GP' else 1)
                ax = axs[ax_idx]

                values = algo_config_group[measure].to_numpy()
                values[values < 0.0] = 0.0
                median = np.median(values)
                l, u = values.min(), values.max()
                if l != u:
                    l, u = stats.t.interval(0.95, values.size-1, loc=median, scale=stats.sem(values))
                l = max(0.0, l)
                u = min(1.0, u)

                bars[ax_idx] = (median, l, u, data_conf)
                #ax.errorbar(median, i, xerr=[[median-l],[u-median]], fmt=fmt[data_conf], color=bar_colors[data_conf], elinewidth=1.5, capsize=6, label=data_conf)

        nproblems += 1
        if problem.startswith('feynman-'):
            problem = problem.removeprefix('feynman-').upper()
        problem_label = problem + problem_prefix[data_conf]
        problem_bars.append((problem_label, bars))

    problem_bars = sorted(problem_bars, reverse=False, key=lambda b: b[1][0][0])
    for problem_lbl, bars in problem_bars:
        problem_labels.append(problem_lbl)
        for ax_idx, (median, l, u, data_conf) in bars.items():
            #constr = ax_idx == 1 or ax_idx == 3
            #if constr: ax_idx -= 1
            axs[ax_idx].errorbar(median, i, xerr=[[median-l],[u-median]], fmt=fmt[data_conf], color=bar_colors[data_conf], elinewidth=1.5, capsize=6, label=data_conf, clip_on=False)
            #if not constr: i += 1
        i += 1

for ax in axs:
    ax.set_yticks(np.arange(nproblems))
    ax.tick_params(direction='in', length=0)
    ax.grid(linewidth=0.3)
    ax.margins(0.025)
    for side in ['top','bottom','left','right']:
        ax.spines[side].set_linewidth(0)

axs[0].set_xlim([0.0, 1.0])
axs[0].set_yticklabels(problem_labels)
axs[0].tick_params(axis='y', pad=10)
axs[0].set_title(Algo1)
axs[0].set_xlabel(A_measure_name)

axs[1].set_xlim([0.0, 1.0])
axs[1].set_title(Algo2)
axs[1].set_xlabel(A_measure_name)

axs[2].set_xlim([0.0, 1.0])
axs[2].set_title(Algo1)
axs[2].set_xlabel(B_measure_name)

axs[3].set_xlim([0.0, 1.0])
axs[3].set_title(Algo2)
axs[3].set_xlabel(B_measure_name)

#fig.suptitle('Data and Knowledge Results (95% CI)', weight='bold', y=0.0)
plt.gca().invert_yaxis()

def legend_without_duplicate_labels(figure):
    handles, labels = plt.gca().get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    figure.legend(by_label.values(), by_label.keys(), loc='lower center', ncol=3)

legend_without_duplicate_labels(fig)

plt.gca().set_rasterization_zorder(0)
plt.savefig('../figs/thesis/KBP-GP_vs_GP-L_TestAcc.pdf', bbox_inches='tight')
plt.show()

### Statistic Test

In [None]:
from scipy.stats import ttest_ind

problem_name_prefix = {'nonoise': '', 'noisy': '-N', 'dataset': '-D'}
header = ['Problem', 'p-value (constr. > unconstr.)', 'p-value (constr. ≠ unconstr.)']
table = []

for data_conf in ['nonoise', 'noisy', 'dataset']:
    for problem, problem_group in df.loc[df['Data-Config'] == data_conf].groupby('Problem'):

        constr_group = None
        unconstr_group = None
        
        for algo_config, constr_group in problem_group.groupby('Algo-Config'):
            values = constr_group['Test-R2'].to_numpy()
            if algo_config == 'KBP-GP': constr_group = values
            else: unconstr_group = values
        
        stat_gr, pval_gr = ttest_ind(constr_group, unconstr_group, equal_var=False, alternative='greater')
        stat_neq, pval_neq = ttest_ind(constr_group, unconstr_group, equal_var=False, alternative='two-sided')
        problem_name = problem + problem_name_prefix[data_conf]

        table.append([problem_name, pval_gr, pval_neq])

pd.DataFrame(table, columns=header).sort_values(by=[''])

### Box Plots

In [None]:
#plot_group('All', df)
#for problem, problem_group in df.groupby('Problem'):
#    plot_group(problem, problem_group)