From f28c1ff1a99cfb27c2762a2baf23c21b18177bce Mon Sep 17 00:00:00 2001 From: Paulo Augusto Date: Tue, 6 Jun 2023 21:37:09 -0300 Subject: [PATCH] #32 multiple improvements to plots --- ica_benchmark/visualization/multiple_runs.py | 472 +++++++++++++------ 1 file changed, 329 insertions(+), 143 deletions(-) diff --git a/ica_benchmark/visualization/multiple_runs.py b/ica_benchmark/visualization/multiple_runs.py index d01c86d..56c4b23 100644 --- a/ica_benchmark/visualization/multiple_runs.py +++ b/ica_benchmark/visualization/multiple_runs.py @@ -10,51 +10,84 @@ def annotate_bars(ax, labels): - for i, (bar, err_line, label) in enumerate(zip(ax.patches, ax.get_lines(), labels)): + bottom_lim, top_lim = ax.get_ylim() + mid = (top_lim - bottom_lim) / 2 + for bar, err_line, label in zip(ax.patches, ax.get_lines(), labels): _x = bar.get_x() + bar.get_width() / 2 - # _y = (p.get_y() + p.get_height() - len(label) / 50) / 2 - _y = bar.get_y() + err_line.get_ydata()[1] + (err_line.get_ydata()[1] - bar.get_height()) + bar_height = bar.get_height() + if bar_height > mid: + _y = (bar.get_y() + err_line.get_ydata()[0]) / 2 + else: + _y = (top_lim + err_line.get_ydata()[1]) / 2 + # err_line.get_ydata() -> (bottom error, top error) ax.text(_x, _y, label, ha="center", rotation=90) -def ranked_barplot(results_df, grouping_col="algorithm", x_col="uid", val_col="kappa", figsize=None, x_label=None, save_filepath=None): +def best_per_group_barplot( + results_df, + grouping_cols="algorithm", + x_col="uid", + val_col="kappa", + figsize=None, + x_label=None, + save_filepath=None, + ylim=None, + title=None, +): x_label = x_label if x_label is not None else x_col - + ylim = ylim if ylim is not None else (0, 1) + title = title if title is not None else f"Best {val_col} per {x_label}" + grouping_cols = ( + grouping_cols if isinstance(grouping_cols, list) else [grouping_cols] + ) # Select best algorithm per subject + merge_cols = [x_col] + grouping_cols highest_df = ( - results_df - .groupby([x_col, grouping_col], as_index=False) + results_df.groupby(merge_cols, as_index=False) .mean() .sort_values(by=val_col, ascending=False) .drop_duplicates(subset=x_col) ) - # Filter original results_df to only include best algorithm per subject - df = ( - results_df - .merge(highest_df[[x_col, grouping_col]], on=[x_col, grouping_col], how="inner") - .sort_values(by=x_col, ascending=True) + # Filter original results_df to only include best algorithm per subject + df = results_df.merge(highest_df[merge_cols], how="inner").sort_values( + by=x_col, ascending=True ) - label_df = highest_df[[x_col, grouping_col]].sort_values(by=x_col, ascending=True) + label_df = highest_df[merge_cols].sort_values(by=x_col, ascending=True) fig = plt.figure(figsize=figsize, dpi=150) ax = plt.gca() ax.grid() - sns.barplot(x=x_col, y=val_col, data=df, ax=ax, n_boot=N_BOOT, order=label_df[x_col]) + sns.barplot( + x=x_col, y=val_col, data=df, ax=ax, n_boot=N_BOOT, order=label_df[x_col] + ) - annotate_bars(ax, label_df[grouping_col]) - ax.tick_params(labelrotation=45) - ax.set_ylim((0, 1)) + bar_labels = label_df[grouping_cols].apply(lambda x: "\n".join(map(str, x)), axis=1) + annotate_bars(ax, bar_labels) + ax.tick_params(labelrotation=60) + ax.set_ylim(ylim) ax.set_xlabel(x_label) - ax.set_title(f"Best {val_col} per {x_label}", fontsize=12) + ax.set_title(title, fontsize=12) + fig.tight_layout() if save_filepath is not None: fig.savefig(save_filepath) - -def detailed_barplot(results_df, x_col="uid", hue_col="algorithm", val_col="Kappa", order_col="run", save_filepath=None, w=5, cmap="nipy_spectral", x_label=None): - if x_label is None: - x_label = x_col + +def detailed_barplot( + results_df, + x_col="uid", + hue_col="algorithm", + val_col="Kappa", + key_cols="run", + save_filepath=None, + w=5, + cmap="nipy_spectral", + x_label=None, + title=None, +): + x_label = x_label if x_label is not None else x_col + title = title if title is not None else f"{val_col} per {x_label}, per {hue_col}" fig = plt.figure(figsize=(20, 6), dpi=120) ax = plt.gca() @@ -64,18 +97,9 @@ def detailed_barplot(results_df, x_col="uid", hue_col="algorithm", val_col="Kapp hues = results_df[hue_col].unique() n_hues = len(hues) - hue_color_dict = { - hue: cmap(i / n_hues) - for i, hue - in enumerate(hues) - } + hue_color_dict = {hue: cmap(i / n_hues) for i, hue in enumerate(hues)} legends = [ - Patch( - facecolor=hue_color_dict[hue], - edgecolor=None, - label=hue - ) - for hue in hues + Patch(facecolor=hue_color_dict[hue], edgecolor=None, label=hue) for hue in hues ] x_values = results_df[x_col].unique() @@ -85,8 +109,7 @@ def detailed_barplot(results_df, x_col="uid", hue_col="algorithm", val_col="Kapp x_list = list() # Sort algorithms by mean Kappa value ordered_hue = ( - x_df - .groupby(hue_col, as_index=False) + x_df.groupby(hue_col, as_index=False) .mean() .sort_values(by=val_col)[hue_col] .to_numpy() @@ -97,7 +120,7 @@ def detailed_barplot(results_df, x_col="uid", hue_col="algorithm", val_col="Kapp for hue in ordered_hue: hue_df = x_df[x_df[hue_col] == hue] if hue_df[val_col].nunique() > 1: - res = bootstrap((hue_df[val_col], ), np.mean, n_resamples=100) + res = bootstrap((hue_df[val_col],), np.mean, n_resamples=100) low = res.confidence_interval.low high = res.confidence_interval.high else: @@ -109,27 +132,47 @@ def detailed_barplot(results_df, x_col="uid", hue_col="algorithm", val_col="Kapp x_list.append(x_c) ax.bar(x_c, avg, width=w, color=hue_color_dict[hue], yerr=([low], [high])) - if (hue != best_hue): - - pvalue = wilcoxon(hue_df[val_col], best_hue_df[val_col], alternative="less", zero_method="zsplit").pvalue - if (pvalue < 0.05): - ax.text(x_c, -0.03 if avg > 0 else 0.03, "*", ha="center", va="center", fontsize=20, color="r") + if hue != best_hue: + pvalue = wilcoxon( + hue_df[val_col], + best_hue_df[val_col], + alternative="less", + zero_method="zsplit", + ).pvalue + if pvalue < 0.05: + ax.text( + x_c, + -0.03 if avg > 0 else 0.03, + "*", + ha="center", + va="center", + fontsize=20, + color="r", + ) x_c += w x_c += w * 2 mid = np.mean(x_list) - ax.text(mid, -0.1, x, horizontalalignment="center", va="center_baseline", fontsize=15, rotation=-45) + ax.text( + mid, + -0.1, + x, + horizontalalignment="center", + verticalalignment="top", + fontsize=15, + rotation=-60, + ) ax.set_xlabel(x_label, fontsize=20) for loc in ["right", "left", "top", "bottom"]: ax.spines[loc].set_visible(False) ax.set_xticks([]) - ax.set_xticklabels(ax.get_xticklabels(), rotation=45, fontsize=12) + ax.set_xticklabels(ax.get_xticklabels(), rotation=60, fontsize=12) ax.set_yticks(np.arange(0, 1.01, 0.1)) ax.grid() - ax.legend(handles=legends, loc=(1, .2), fontsize=15) + ax.legend(handles=legends, loc=(1, 0.2), fontsize=15) ax.set_ylabel(val_col, fontsize=20) - ax.set_title(f"{val_col} per {x_col}, per {hue_col}", fontsize=20) + ax.set_title(title, fontsize=20) # Ensure figure doesnt get cropped during save fig.tight_layout() @@ -137,86 +180,139 @@ def detailed_barplot(results_df, x_col="uid", hue_col="algorithm", val_col="Kapp fig.savefig(save_filepath) -def average_barplot(results_df, x_col="algorithm", grouping_col="uid", val_col="Kappa", order_col="run", save_filepath=None, w=5, cmap="nipy_spectral", x_label=None, n_boots=N_BOOT): - if x_label is None: - x_label = x_col +def average_barplot( + results_df, + x_col="algorithm", + grouping_cols="uid", + val_col="Kappa", + key_cols="run", + save_filepath=None, + w=5, + cmap="nipy_spectral", + x_label=None, + n_boots=N_BOOT, + title=None, +): + x_label = x_col if x_label is None else x_label + key_cols = [key_cols] if isinstance(key_cols, str) else key_cols + title = title if title is not None else f"Average {val_col} for each {x_col}" + grouping_cols = [grouping_cols] if isinstance(grouping_cols, str) else grouping_cols fig = plt.figure(figsize=(20, 6), dpi=120) ax = plt.gca() x_c = w / 2 cmap = plt.get_cmap(cmap) - + hues = results_df[x_col].unique() n_hues = len(hues) - hue_color_dict = { - hue: cmap(i / n_hues) - for i, hue - in enumerate(hues) - } + hue_color_dict = {hue: cmap(i / n_hues) for i, hue in enumerate(hues)} legends = [ - Patch( - facecolor=hue_color_dict[hue], - edgecolor=None, - label=hue - ) - for hue in hues + Patch(facecolor=hue_color_dict[hue], edgecolor=None, label=hue) for hue in hues ] - ordered_x_df = results_df.groupby([x_col, grouping_col]).mean().groupby(x_col).mean().sort_values(by=val_col).reset_index() + ordered_x_df = ( + results_df.groupby([x_col, *grouping_cols]) + .mean() + .groupby(x_col) + .mean() + .sort_values(by=val_col) + .reset_index() + ) best_x = ordered_x_df[x_col].to_numpy()[-1] best_x_df = results_df[results_df[x_col] == best_x] x_c = 0 for x in ordered_x_df[x_col]: x_c += 1.5 * w - + x_df = results_df[results_df[x_col] == x] - res = bootstrap((x_df[val_col], ), np.mean, n_resamples=n_boots) + res = bootstrap((x_df[val_col],), np.mean, n_resamples=n_boots) avg = x_df[val_col].mean() - ax.bar(x_c, avg, w, yerr=([avg - res.confidence_interval.low], [res.confidence_interval.high - avg]), color=hue_color_dict[x]) + ax.bar( + x_c, + avg, + w, + yerr=( + [avg - res.confidence_interval.low], + [res.confidence_interval.high - avg], + ), + color=hue_color_dict[x], + ) ax.set_xticks([]) - ax.text(x_c, -0.025, x, horizontalalignment="center", fontsize=15) - ax.text(x_c, -0.05, r"$\bar\rho={:.3f}$".format(avg), horizontalalignment="center", fontsize=15, usetex=True) - if (x != best_x): - pvalue = wilcoxon( - x_df.sort_values(by=[grouping_col])[val_col], - best_x_df.sort_values(by=[grouping_col])[val_col], - alternative="less", - zero_method="zsplit" - ).pvalue - - if (pvalue < 0.05): - ax.text(x_c, res.confidence_interval.high, "*", horizontalalignment="center", fontsize=25, color="r") - #ax.legend(handles=legends, loc=(1, .2), fontsize=15) + ax.text( + x_c, + -0.025, + "{}\nrho={:.3f}".format(x, avg), + horizontalalignment="center", + verticalalignment="top", + fontsize=15, + rotation=90, + ) + # ax.text(x_c, -0.05, r"$\bar\rho={:.3f}$".format(avg), horizontalalignment="center", fontsize=15, usetex=True, rotation=60) + if x != best_x: + merge_cols = [*grouping_cols, *key_cols] + best_metric_arr = best_x_df.merge(x_df[merge_cols], on=merge_cols)[val_col] + try: + pvalue = wilcoxon( + x_df[val_col], + best_metric_arr, + alternative="less", + zero_method="zsplit", + ).pvalue + except ValueError as value_exception: + raise ValueError( + f"Could not calculate pvalue for by mering on {merge_cols}. Most likely you have not specified all key_cols.\n" + + str(value_exception) + ) from value_exception + + if pvalue < 0.05: + ax.text( + x_c, + res.confidence_interval.high, + "*", + horizontalalignment="center", + fontsize=25, + color="r", + ) + + ax.set_xlabel(x_label, fontsize=20) ax.set_ylabel(val_col, fontsize=20) ax.grid() - ax.set_title(f"Average {val_col} for each {x_col}", fontsize=20) + ax.set_title(title, fontsize=20) + fig.tight_layout() if save_filepath is not None: fig.savefig(save_filepath) def boxplot_algorithms(results_df, metric="kappa", save_filepath=None): - algorithms = results_df.groupby(["uid", "algorithm"]).mean().groupby("algorithm").mean().sort_values(by=metric).reset_index().algorithm + algorithms = ( + results_df.groupby(["uid", "algorithm"]) + .mean() + .groupby("algorithm") + .mean() + .sort_values(by=metric) + .reset_index() + .algorithm + ) fig = plt.figure(figsize=(12, 6)) ax = plt.gca() sns.boxplot( x="Algorithm", y=metric, - data=results_df.rename( - columns=dict(algorithm="Algorithm") - ), + data=results_df.rename(columns=dict(algorithm="Algorithm")), order=algorithms, - ax=ax + ax=ax, ) - ax.set_xticklabels(ax.get_xticklabels(), rotation=45, fontsize=12) + ax.set_xticklabels(ax.get_xticklabels(), rotation=60, fontsize=12) for loc in ["right", "left", "top", "bottom"]: ax.spines[loc].set_visible(False) ax.set_xlabel(ax.xaxis.get_label().get_text(), fontsize=15) ax.set_ylabel(ax.yaxis.get_label().get_text(), fontsize=15) + fig.tight_layout() if save_filepath is not None: fig.savefig(save_filepath) @@ -228,12 +324,10 @@ def boxplot_subjects(results_df, metric="kappa", save_filepath=None): x="uid", y=metric, hue="Algorithm", - data=results_df.rename( - columns=dict(algorithm="Algorithm") - ), - ax=ax + data=results_df.rename(columns=dict(algorithm="Algorithm")), + ax=ax, ) - ax.set_xticklabels(ax.get_xticklabels(), rotation=45, fontsize=12) + ax.set_xticklabels(ax.get_xticklabels(), rotation=60, fontsize=12) for loc in ["right", "left", "top", "bottom"]: ax.spines[loc].set_visible(False) @@ -246,6 +340,7 @@ def boxplot_subjects(results_df, metric="kappa", save_filepath=None): if __name__ == "__main__": import argparse + parser = argparse.ArgumentParser() parser.add_argument( "-path", @@ -253,7 +348,7 @@ def boxplot_subjects(results_df, metric="kappa", save_filepath=None): dest="path", default="/home/paulo/Documents/GIT/BCI_MsC/experiments/issue_12/results.csv", type=Path, - help="Path to results.csv" + help="Path to results.csv", ) parser.add_argument( "-save_folder", @@ -261,7 +356,7 @@ def boxplot_subjects(results_df, metric="kappa", save_filepath=None): dest="save_folder", default="./plots", type=Path, - help="folder_to_save" + help="folder_to_save", ) args = parser.parse_args() @@ -272,59 +367,150 @@ def boxplot_subjects(results_df, metric="kappa", save_filepath=None): results_folder = args.save_folder results_folder.mkdir(exist_ok=True) - for classifier in classifiers: - - ranked_barplot( - results_df.query("classifier == @classifier"), - x_col="uid", - val_col="Kappa", - grouping_col="algorithm", - save_filepath=results_folder / f"{classifier}.png" - ) + # for classifier in classifiers: + # classifier_df = results_df[results_df.classifier == classifier] + # best_per_group_barplot( + # classifier_df, + # x_col="uid", + # val_col="Kappa", + # grouping_cols="algorithm", + # save_filepath=results_folder + # / f"best_algorithm_per_subject_for_{classifier}.png", + # x_label="Subject", + # ) + + # detailed_barplot( + # classifier_df, + # x_col="uid", + # hue_col="algorithm", + # val_col="Kappa", + # key_cols="run", + # save_filepath=results_folder / f"detailed_{classifier}.png", + # x_label="Subject", + # ) + # detailed_barplot( + # classifier_df, + # x_col="algorithm", + # hue_col="uid", + # val_col="Kappa", + # key_cols="run", + # save_filepath=results_folder / f"algorithm_comparison_for_{classifier}.png", + # x_label="Subject", + # ) + # average_barplot( + # classifier_df, + # x_col="algorithm", + # grouping_cols="uid", + # val_col="Kappa", + # key_cols="run", + # save_filepath=results_folder + # / f"average_per_algorithm_for_{classifier}.png", + # n_boots=N_BOOT, + # ) + # average_barplot( + # classifier_df, + # x_col="uid", + # grouping_cols="algorithm", + # val_col="Kappa", + # key_cols="run", + # save_filepath=results_folder / f"average_per_subject_for_{classifier}.png", + # n_boots=N_BOOT, + # ) + # for algorithm in algorithms: + # algorithm_df = results_df[results_df.algorithm == algorithm] + # best_per_group_barplot( + # algorithm_df, + # x_col="uid", + # val_col="Kappa", + # grouping_cols="classifier", + # save_filepath=results_folder + # / f"best_classifer_per_subject_for_{algorithm}.png", + # x_label="Subject", + # ) + # detailed_barplot( + # algorithm_df, + # x_col="uid", + # hue_col="classifier", + # val_col="Kappa", + # key_cols="run", + # save_filepath=results_folder / f"detailed_{algorithm}.png", + # x_label="Subject", + # ) + # detailed_barplot( + # algorithm_df, + # x_col="classifier", + # hue_col="uid", + # val_col="Kappa", + # key_cols="run", + # save_filepath=results_folder / f"classifier_comparison_for_{algorithm}.png", + # x_label="Subject", + # ) + # average_barplot( + # algorithm_df, + # x_col="classifier", + # grouping_cols="uid", + # val_col="Kappa", + # key_cols="run", + # save_filepath=results_folder + # / f"average_per_classifier_for_{algorithm}.png", + # n_boots=N_BOOT, + # ) + # average_barplot( + # algorithm_df, + # x_col="uid", + # grouping_cols="classifier", + # val_col="Kappa", + # key_cols="run", + # save_filepath=results_folder / f"average_per_subject_for_{algorithm}.png", + # n_boots=N_BOOT, + # ) + + best_per_group_barplot( + results_df, + grouping_cols=["algorithm"], + x_col="classifier", + val_col="Kappa", + figsize=None, + x_label="Classifier", + save_filepath=results_folder / "classifier_scores.png", + ) - detailed_barplot( - results_df.query("classifier == @classifier"), - x_col="uid", - hue_col="algorithm", - val_col="Kappa", - order_col="run", - save_filepath=results_folder / f"detailed_{classifier}.png", - w=5, - cmap="nipy_spectral", - x_label=None - ) + best_per_group_barplot( + results_df, + grouping_cols=["classifier"], + x_col="algorithm", + val_col="Kappa", + figsize=None, + x_label="Algorithm", + save_filepath=results_folder / "algorithm_scores.png", + ) - for algorithm in algorithms: - ranked_barplot( - results_df.query("algorithm == @algorithm"), - x_col="uid", - val_col="Kappa", - grouping_col="classifier", - save_filepath=results_folder / f"{algorithm}.png" - ) + average_barplot( + results_df, + x_col="uid", + grouping_cols=["algorithm", "classifier"], + val_col="Kappa", + key_cols="run", + save_filepath=results_folder / "average_per_subject.png", + n_boots=N_BOOT, + ) - detailed_barplot( - results_df.query("algorithm == @algorithm"), - x_col="uid", - hue_col="classifier", - val_col="Kappa", - order_col="run", - save_filepath=results_folder / f"detailed_{algorithm}.png", - w=5, - cmap="nipy_spectral", - x_label=None - ) + average_barplot( + results_df, + x_col="algorithm", + grouping_cols=["uid", "classifier"], + val_col="Kappa", + key_cols="run", + save_filepath=results_folder / "average_per_algorithm.png", + n_boots=N_BOOT, + ) - for algorithm in algorithms: - - detailed_barplot( - results_df.query("algorithm == @algorithm"), - x_col="classifier", - hue_col="uid", - val_col="Kappa", - order_col="run", - save_filepath=results_folder / f"classifier_comparison_{algorithm}.png", - w=5, - cmap="nipy_spectral", - x_label=None - ) + average_barplot( + results_df, + x_col="classifier", + grouping_cols=["uid", "algorithm"], + val_col="Kappa", + key_cols="run", + save_filepath=results_folder / "average_per_classifier.png", + n_boots=N_BOOT, + )