In [1]:
import pandas as pd
import pingouin as pg

In [2]:
indicators = ["RMSE", "PCC"]
models = ["self","conv","self+conv","self_conv"]
phases = ["phase 1", "phase 2", "phase 3"]
sets = ["train","test"]
waveforms = ["electric","pv","wind","mpv"]
cols = ["pval","diff"]
multi_index = pd.MultiIndex.from_product([indicators, models, phases, phases])
multi_columns = pd.MultiIndex.from_product([waveforms, sets, cols])
df_result = pd.DataFrame(index=multi_index, columns=multi_columns)
df_result = df_result.sort_index()

In [3]:
for waveform in waveforms:
    df = pd.read_csv(f"../results/002_all/backup/features_{waveform}.csv", header=[0, 1], index_col=[0, 1, 2])
    for indicator in indicators:
        for model in models:
            for set in sets:
                filtered_df = df[(df.index.get_level_values(0) == indicator) & (df.index.get_level_values(2) == model)].loc[:, (df.columns.get_level_values(1) == set)]
                melted_df = filtered_df.melt(var_name='category', value_name='value', ignore_index=False)
                melted_df = melted_df.reset_index()
                welch_anova_results = pg.welch_anova(data=melted_df, dv='value', between='level_1')
                games_howell_results = pg.pairwise_gameshowell(data=melted_df, dv='value', between='level_1', effsize='hedges')
                for i in range(games_howell_results.shape[0]):
                    priority_order = ['phase 3', 'phase 2']

                    A_val = games_howell_results["A"][i]
                    B_val = games_howell_results["B"][i]

                    A_priority = priority_order.index(A_val) if A_val in priority_order else float('inf')
                    B_priority = priority_order.index(B_val) if B_val in priority_order else float('inf')

                    if A_priority <= B_priority:
                        index_3 = A_val
                        index_4 = B_val
                        diff_value = games_howell_results.loc[i, "diff"]
                    else:
                        index_3 = B_val
                        index_4 = A_val
                        diff_value = -games_howell_results.loc[i, "diff"]

                    pval_value = games_howell_results.loc[i, "pval"]

                    df_result.loc[(indicator, model, index_3, index_4), (waveform, set, "pval")] = pval_value
                    df_result.loc[(indicator, model, index_3, index_4), (waveform, set, "diff")] = diff_value
df_result.dropna(inplace=True)
df_result.to_excel(f"../results/002_all/backup/phase_pval.xlsx")

In [4]:
indicators = ["RMSE", "PCC"]
models = ["self","conv","self+conv","self_conv","linear"]
phases = ["phase 2", "phase 3"]
sets = ["train","test"]
waveforms = ["electric","pv","wind","mpv"]
cols = ["pval","diff"]
multi_index = pd.MultiIndex.from_product([indicators, phases, models, models])
multi_columns = pd.MultiIndex.from_product([waveforms, sets, cols])
df_result = pd.DataFrame(index=multi_index, columns=multi_columns)
df_result = df_result.sort_index()

In [5]:
for waveform in waveforms:
    df = pd.read_csv(f"../results/002_all/backup/features_{waveform}.csv", header=[0, 1], index_col=[0, 1, 2])
    for indicator in indicators:
        for phase in phases:
            for set in sets:
                filtered_df = df[(df.index.get_level_values(0) == indicator) & (df.index.get_level_values(1) == phase)].loc[:, (df.columns.get_level_values(1) == set)]
                #####################################
                static_df = df[(df.index.get_level_values(0) == indicator) & (df.index.get_level_values(1) == "static") & (df.index.get_level_values(2) == "linear")].loc[:, (df.columns.get_level_values(1) == set)]
                current_index = static_df.index
                level_1_values = current_index.get_level_values(1)
                new_level_1_values = [phase] * len(level_1_values)
                new_multi_index = pd.MultiIndex.from_arrays(
                    [current_index.get_level_values(0), new_level_1_values, current_index.get_level_values(2)],
                    names=current_index.names # 保持原来的级别名称
                )
                static_df.index = new_multi_index
                filtered_df = pd.concat([filtered_df, static_df], axis=0)
                ####################################
                melted_df = filtered_df.melt(var_name='category', value_name='value', ignore_index=False)
                melted_df = melted_df.reset_index()
                welch_anova_results = pg.welch_anova(data=melted_df, dv='value', between='level_2')
                if welch_anova_results.loc[0, "p-unc"] < 0.05:
                    print(f"{indicator}, {waveform}, {phase}, {set}, welch_anova significant")
                games_howell_results = pg.pairwise_gameshowell(data=melted_df, dv='value', between='level_2', effsize='hedges')
                for i in range(games_howell_results.shape[0]):
                    priority_order = ['self_conv', 'self+conv', 'conv']

                    A_val = games_howell_results["A"][i]
                    B_val = games_howell_results["B"][i]

                    A_priority = priority_order.index(A_val) if A_val in priority_order else float('inf')
                    B_priority = priority_order.index(B_val) if B_val in priority_order else float('inf')

                    if A_priority <= B_priority:
                        index_3 = A_val
                        index_4 = B_val
                        diff_value = games_howell_results.loc[i, "diff"]
                    else:
                        index_3 = B_val
                        index_4 = A_val
                        diff_value = -games_howell_results.loc[i, "diff"]

                    pval_value = games_howell_results.loc[i, "pval"]

                    df_result.loc[(indicator, phase, index_3, index_4), (waveform, set, "pval")] = pval_value
                    df_result.loc[(indicator, phase, index_3, index_4), (waveform, set, "diff")] = diff_value
df_result.dropna(inplace=True)
df_result.to_excel(f"../results/002_all/backup/structure_pval.xlsx")

  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
  adj_grandmean = (weights * grp.mean(nu