In [None]:
import pickle
from pathlib import Path
from scipy.stats import ttest_ind
import numpy as np
import pandas as pd

In [None]:
exp_dir = Path("../../experiments")
dir_A = [
    exp_dir / "saas_hc",
]
dir_B = Path("../acopp")
# (A - B) / B
target_p_values = [0.05]

In [None]:
df_A = []
for _dir in dir_A:
    with open(_dir / "profit_table.pkl", "rb") as f:
        df = pickle.load(f)
        df = df.T.sort_index().T
        df_A.append(df)

columns = list(df_A[0].columns)

In [None]:
with open(dir_B / "profit_table.pkl", "rb") as f:
    df_B = pickle.load(f)
df_B = df_B.loc[:, columns]

In [None]:
def print_row(temp_df_A, temp_df_B, name):
    gain_percent = (temp_df_A - temp_df_B) / temp_df_B * 100
    victory_percent = (gain_percent >= 0).sum() / len(gain_percent) * 100
    average_gain_percent = gain_percent.mean()
    print(f"{name}\t\t{victory_percent:.4f}\t\t{average_gain_percent:.4f}")

In [None]:
def make_temp(df):
    temp_df = df.T.apply(np.array, axis=1).reset_index()
    temp_df.columns = ["instance", "profit_array"]
    temp_df['mean'] = temp_df.apply(lambda row: row.profit_array.mean(), axis=1)
    temp_df.sort_values(by=["instance"], inplace=True)
    # temp_df.set_index("instance", inplace=True)
    return temp_df

In [None]:
print("name\t\tvictory_percent\t\taverage_gain_percent")
operators = ['min', 'mean', 'max']

for _op in operators:
    temp_df_B = df_B.describe().T[_op]
    for idx, _dir in enumerate(dir_A):
        temp_df_A = df_A[idx].describe().T[_op]
        name = f"{_dir.name}_{_op}"
        print_row(temp_df_A, temp_df_B, name)

temp_df_B = make_temp(df_B)
for idx, _dir in enumerate(dir_A):
    temp_df_A = make_temp(df_A[idx])
    p_values = np.zeros(len(columns))
    for i in range(len(columns)):
        a = temp_df_A.iloc[i].profit_array
        b = temp_df_B.iloc[i].profit_array
        ttest_res = ttest_ind(a, b)
        p_values[i] = ttest_res[-1]
    for _target_pvalue in target_p_values:
        mask = (p_values <= _target_pvalue)
        # print(mask.sum())
        name = f"{_dir.name} (pvalue = {_target_pvalue})"
        print_row(temp_df_A.loc[mask, "mean"], temp_df_B.loc[mask, "mean"], name)


# make_gain_percent

In [None]:
temp_df_A = df_A[0].describe().T['mean']
temp_df_B = df_B.describe().T['mean']
mean_gain = (temp_df_A - temp_df_B) / temp_df_B * 100
mean_gain = mean_gain.reset_index()
mean_gain.columns = ["instance", "gain_percent"]
mean_gain.info()

In [None]:
acopp_mean_df = pd.read_csv("../acopp/acopp_mean.csv")
acopp_mean_df.columns = ["instance", "acopp_profit"]
acopp_mean_df.info()

In [None]:
new_df = pd.merge(mean_gain, acopp_mean_df)
new_df.sort_values(by=["gain_percent"], inplace=True)

In [None]:
new_df.to_csv("./gain_percent.csv", index=False)