In [None]:
import pickle
from matplotlib import pyplot as plt
import seaborn as sns
import json
import pandas as pd
import os
import numpy as np

In [None]:
# from IPython.display import display, HTML
# display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
pickle_files = ["./reference_data/hh.pickle", "./reference_data/memory_bound.pickle", "./reference_data/compute_bound.pickle"]

In [None]:
results = {}
def load_pickle_result_file(pickle_files, results):
    def _merge(a, b, path=None):
        if path is None: path = []
        for key in b:
            if key in a:
                if isinstance(a[key], dict) and isinstance(b[key], dict):
                    _merge(a[key], b[key], path + [str(key)])
                elif a[key] == b[key]:
                    pass # same leaf value
                else:
                    raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
            else:
                a[key] = b[key]
        return a
    for pickle_file in pickle_files:
        with open(pickle_file, 'rb') as handle:
            results = _merge(results, pickle.load(handle))
    return results

results = load_pickle_result_file(pickle_files, results)

In [None]:
def _get_flags_string(flags):
    return flags.replace(" ", "_").replace('-','').replace('=','_')

In [None]:
# colors = ['#f0f9e8','#bae4bc','#7bccc4','#43a2ca','#0868ac'] # print friendly colors
# sns.set_palette(sns.color_palette(colors))
# sns.set_palette("Set3") # NEURON paper palette
colors = ['#6baed6', '#0570b0', '#66c2a4','#238b45','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252']
# pick colors according to the following order which matches the order of 
# intel #6baed6
# intel svml #0570b0
# gcc #66c2a4
# gcc_svml #238b45
# clang #fdd49e
# clang_svml #fc8d59
# mod2ir #9ebcda
# mod2ir_svml #8c96c6
# mor2it_jit_svml #969696
# mod2ir_jit_sleef #525252
sns.set_palette(sns.color_palette(colors))
def generate_graph_pandas(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False):
    os.makedirs(output_dir, exist_ok=True)
    compiler_flags = json.loads(compilers_comparison_config)
    for modname in results:
        # state
        bar_data_state_cpu_panda = {}
        bar_data_state_cpu_panda["architecture"] = []
        bar_data_state_cpu_panda["compiler"] = []
        bar_data_state_cpu_panda["runtime"] = []
        # current
        bar_data_cur_cpu_panda = {}
        bar_data_cur_cpu_panda["architecture"] = []
        bar_data_cur_cpu_panda["compiler"] = []
        bar_data_cur_cpu_panda["runtime"] = []
        for architecture in results[modname]:
            for compiler in compiler_flags:
                if compiler in results[modname][architecture] and architecture in compiler_flags[compiler]:
                    for flags in compiler_flags[compiler][architecture]:
                        if compiler == "nmodl_jit":
                            state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                            cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        else:
                            state_kernel_name = "nrn_state_ext"
                            cur_kernel_name = "nrn_cur_ext"
                        if compiler == "clang" and "jit" in flags:
                            compiler_name = "mod2ir"
                        elif compiler == "nmodl_jit":
                            compiler_name = "mod2ir_jit"
                        else:
                            compiler_name = compiler
                        if "svml" in flags or "SVML" in flags:
                            compiler_name = compiler_name + "_svml"
                        elif "sleef" in flags or "SLEEF" in flags:
                            compiler_name = compiler_name + "_sleef"
                        if architecture == "default":
                            architecture_label = "auto-scalar"
                        elif architecture == "nehalem":
                            architecture_label = "nehalem-sse2"
                        elif architecture == "broadwell":
                            architecture_label = "broadwell-avx2"
                        else: # skylake-avx512
                            architecture_label = architecture
                        bar_data_state_cpu_panda["architecture"].append(architecture_label)
                        bar_data_state_cpu_panda["compiler"].append(compiler_name)
                        if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                            bar_data_state_cpu_panda["runtime"].append(0)
                        else:
                            bar_data_state_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                        bar_data_cur_cpu_panda["architecture"].append(architecture_label)
                        bar_data_cur_cpu_panda["compiler"].append(compiler_name)
                        if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                            bar_data_cur_cpu_panda["runtime"].append(0)
                        else:
                            bar_data_cur_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
        fig, axes = plt.subplots(1, 2, squeeze=False, figsize=(48,24))
        df_state = pd.DataFrame(bar_data_state_cpu_panda, columns=["architecture", "compiler", "runtime"])
        sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_state, ax=axes[0,0])
        axes[0,0].xaxis.label.set_visible(False)
        axes[0,0].yaxis.label.set_visible(False)
        axes[0,0].set_title("nrn_state runtime for {}".format(modname))
        axes[0,0].get_legend().remove()
        if print_values:
          for i in axes[0,0].containers:
            axes[0,0].bar_label(i,)
        df_cur = pd.DataFrame(bar_data_cur_cpu_panda, columns=["architecture", "compiler", "runtime"])
        sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_cur, ax=axes[0,1])
        axes[0,1].xaxis.label.set_visible(False)
        axes[0,1].yaxis.label.set_visible(False)
        axes[0,1].set_title("nrn_cur runtime for {}".format(modname))
        if print_values:
          for i in axes[0,1].containers:
            axes[0,1].bar_label(i,)
        fig.text(0.5, 0.04, 'Target Microarchitecture-Instruction Set', ha='center', va='center')
        fig.text(0.06, 0.5, 'Runtime (s)', ha='center', va='center', rotation='vertical')
        plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
        plt.savefig("{}/{}_benchmark_{}.pdf".format(output_dir, modname, graph_suffix), format="pdf", bbox_inches="tight")
        plt.show()
        plt.close()

compilers_comparison_config = """
{
  "intel": {
    "default": [
      "-O2 -prec-div"
    ],
    "nehalem": [
      "-O2 -msse2 -prec-div -fimf-use-svml"
    ],
    "broadwell": [
      "-O2 -march=broadwell -mtune=broadwell -prec-div -fimf-use-svml"
    ],
    "skylake-avx512": [
      "-O2 -march=skylake-avx512 -mtune=skylake -prec-div -fimf-use-svml"
    ]
  },
  "gcc": {
    "default": [
      "-O3 -ffast-math -ftree-vectorize",
      "-O3 -ffast-math -ftree-vectorize -mveclibabi=svml"
    ],
    "nehalem": [
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ],
    "broadwell": [
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ]
  },
  "clang": {
    "default": [
      "-O3 -ffast-math",
      "-O3 -ffast-math -fveclib=SVML",
      "-O3 -ffast-math jit SVML",
      "-O3 -ffast-math jit SLEEF"
    ],
    "nehalem": [
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp jit SVML",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp jit SLEEF"
    ],
    "broadwell": [
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp jit SVML",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp jit SLEEF"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "nmodl_jit": {
    "default": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "nehalem": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "broadwell": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""

generate_graph_pandas(results, compilers_comparison_config, "cpu_all_compilers", "graphs_output_pandas")

In [None]:
compilers_comparison_config = """
{
  "intel": {
    "nehalem": [
      "-O2 -msse2 -prec-div -fimf-use-svml"
    ],
    "broadwell": [
      "-O2 -march=broadwell -mtune=broadwell -prec-div -fimf-use-svml"
    ],
    "skylake-avx512": [
      "-O2 -march=skylake-avx512 -mtune=skylake -prec-div -fimf-use-svml"
    ]
  },
  "nmodl_jit": {
    "nehalem": [
      "SVML_nnancontractafn"
    ],
    "broadwell": [
      "SVML_nnancontractafn"
    ],
    "skylake-avx512": [
      "SVML_nnancontractafn"
    ]
  }
}
"""
colors = ['#0570b0','#969696']
sns.set_palette(sns.color_palette(colors))

generate_graph_pandas(results, compilers_comparison_config, "cpu_intel_vs_nmodl", "graphs_output_pandas")

In [None]:
def generate_graph_pandas_gpu(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False):
    compiler_flags = json.loads(compilers_comparison_config)
    for modname in results:
        df = None
        bar_data_gpu_panda = {}
        bar_data_gpu_panda["kernel"] = []
        bar_data_gpu_panda["compiler"] = []
        bar_data_gpu_panda["runtime"] = []
        architecture = "nvptx64"
        for compiler in results[modname][architecture]:
            if compiler in compiler_flags and architecture in compiler_flags[compiler]:
                for flags in compiler_flags[compiler][architecture]:
                    dict_label = "{}_{}_{}".format(architecture, compiler, _get_flags_string(flags))
                    if compiler == "nmodl_jit":
                        state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                        cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                    else:
                        state_kernel_name = "nrn_state_ext"
                        cur_kernel_name = "nrn_cur_ext"
                    if compiler == "clang" and "jit" in flags:
                        compiler_name = "mod2ir"
                    elif compiler == "nmodl_jit":
                        compiler_name = "mod2ir_jit"
                    else:
                        compiler_name = compiler
                    bar_data_gpu_panda["kernel"].append("nrn_state")
                    bar_data_gpu_panda["compiler"].append(compiler_name)
                    if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                        bar_data_gpu_panda["runtime"].append(0)
                    else:
                        bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                    bar_data_gpu_panda["kernel"].append("nrn_current")
                    bar_data_gpu_panda["compiler"].append(compiler_name)
                    if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                        bar_data_gpu_panda["runtime"].append(0)
                    else:
                        bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
        df_state = pd.DataFrame(bar_data_gpu_panda, columns=["kernel", "compiler", "runtime"])
        fig, ax = plt.subplots(figsize=(6,6))
        ax = sns.barplot(x='kernel', y='runtime', hue='compiler', data=df_state, ax=ax)
        if print_values:
            for i in ax.containers:
                ax.bar_label(i,)
        plt.xlabel("Kernel Name")
        plt.ylabel("Runtime (s)")
        plt.title("OpenACC and MOD2IR comparison for {}".format(modname))
        plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
        plt.savefig("{}/{}_benchmark_{}.pdf".format(output_dir, modname, graph_suffix), format="pdf", bbox_inches="tight")
        plt.show()
        plt.close()

compilers_comparison_config = """
{
  "nvhpc": {
    "nvptx64": [
      "-O3 -gpu=nordc,fastmath"
    ]
  },
  "nmodl_jit": {
    "nvptx64": [
      "libdevice_nnancontractafn"
    ]
  }
}
"""
colors = ['#b2df8a','#bdbdbd']
sns.set_palette(sns.color_palette(colors))

generate_graph_pandas_gpu(results, compilers_comparison_config, "hh_openacc_vs_nmodl", "graphs_output_pandas")

In [None]:
colors = ['#0570b0','#66c2a4','#238b45','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252']
# pick colors according to the following order which matches the order of 
# intel #023858
# gcc #66c2a4
# gcc_svml #238b45
# clang #fdd49e
# clang_svml #fc8d59
# mod2ir #9ebcda
# mod2ir_svml #8c96c6
# mor2it_jit_svml #969696
# mod2ir_jit_sleef #525252
sns.set_palette(sns.color_palette(colors))
compilers_comparison_config = """
{
  "intel": {
    "broadwell": [
      "-O2 -march=broadwell -mtune=broadwell -prec-div -fimf-use-svml"
    ],
    "skylake-avx512": [
      "-O2 -march=skylake-avx512 -mtune=skylake -prec-div -fimf-use-svml"
    ]
  },
  "clang": {
    "broadwell": [
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp jit SVML",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp jit SLEEF"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "nmodl_jit": {
    "broadwell": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""

generate_graph_pandas(results, compilers_comparison_config, "cpu_intel_clang_mod2ir_compilers", "graphs_output_pandas")

In [None]:
results_expsyn = {}
results_expsyn = load_pickle_result_file(["./reference_data/expsyn_icc_clang.pickle"], results_expsyn)

In [None]:
compilers_comparison_config = """
{
  "intel": {
    "default": [
      "-O2 -prec-div"
    ],
    "nehalem": [
      "-O2 -msse2 -prec-div -fimf-use-svml"
    ],
    "broadwell": [
      "-O2 -march=broadwell -mtune=broadwell -prec-div -fimf-use-svml"
    ],
    "skylake-avx512": [
      "-O2 -march=skylake-avx512 -mtune=skylake -prec-div -fimf-use-svml"
    ]
  },
  "clang": {
    "default": [
      "-O3 -ffast-math",
      "-O3 -ffast-math -fveclib=SVML",
      "-O3 -ffast-math jit SVML",
      "-O3 -ffast-math jit SLEEF"
    ],
    "nehalem": [
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp jit SVML",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp jit SLEEF"
    ],
    "broadwell": [
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp jit SVML",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp jit SLEEF"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "gcc": {
    "default": [
      "-O3 -ffast-math -ftree-vectorize",
      "-O3 -ffast-math -ftree-vectorize -mveclibabi=svml"
    ],
    "nehalem": [
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ],
    "broadwell": [
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ]
  },
  "nmodl_jit": {
    "default": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "nehalem": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "broadwell": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""
generate_graph_pandas(results_expsyn, compilers_comparison_config, "cpu_intel_clang_mod2ir_compilers_expsyn", "graphs_output_pandas")

In [None]:
results_hh = {}
results_hh = load_pickle_result_file(["./reference_data/hh_ic_clang_gcc_w_wout_svml.pickle"], results_hh)
colors = ['#6baed6', '#0570b0', '#66c2a4','#238b45','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252']
# pick colors according to the following order which matches the order of 
# intel #6baed6
# intel svml #0570b0
# gcc #66c2a4
# gcc_svml #238b45
# clang #fdd49e
# clang_svml #fc8d59
# mod2ir #9ebcda
# mod2ir_svml #8c96c6
# mor2it_jit_svml #969696
# mod2ir_jit_sleef #525252
sns.set_palette(sns.color_palette(colors))
compilers_comparison_config = """
{
  "intel": {
    "default": [
      "-O2 -prec-div",
      "-O2 -prec-div -fimf-use-svml"
    ],
    "nehalem": [
      "-O2 -msse2 -prec-div",
      "-O2 -msse2 -prec-div -fimf-use-svml"
    ],
    "broadwell": [
      "-O2 -march=broadwell -mtune=broadwell -prec-div",
      "-O2 -march=broadwell -mtune=broadwell -prec-div -fimf-use-svml"
    ],
    "skylake-avx512": [
      "-O2 -march=skylake-avx512 -mtune=skylake -prec-div -fopenmp",
      "-O2 -march=skylake-avx512 -mtune=skylake -prec-div -fimf-use-svml -fopenmp"
    ]
  },
  "gcc": {
    "default": [
      "-O3 -ffast-math -ftree-vectorize",
      "-O3 -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ],
    "nehalem": [
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ],
    "broadwell": [
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ]
  },
  "clang": {
    "default": [
      "-O3 -ffast-math",
      "-O3 -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -ffast-math jit SVML",
      "-O3 -ffast-math jit SLEEF"
    ],
    "nehalem": [
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp jit SVML",
      "-O3 -march=nehalem -mtune=nehalem -ffast-math -fopenmp jit SLEEF"
    ],
    "broadwell": [
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp jit SVML",
      "-O3 -march=broadwell -mtune=broadwell -ffast-math -fopenmp jit SLEEF"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "nmodl_jit": {
    "default": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "nehalem": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "broadwell": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""
generate_graph_pandas(results_hh, compilers_comparison_config, "hh_icc_clang_gcc_w_wout_svml", "graphs_output_pandas")

In [None]:
colors = ['#6baed6', '#0570b0', '#66c2a4','#238b45','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252']
# pick colors according to the following order which matches the order of 
# intel #6baed6
# intel svml #0570b0
# gcc #66c2a4
# gcc_svml #238b45
# clang #fdd49e
# clang_svml #fc8d59
# mod2ir #9ebcda
# mod2ir_svml #8c96c6
# mor2it_jit_svml #969696
# mod2ir_jit_sleef #525252
sns.set_palette(sns.color_palette(colors))
compilers_comparison_config = """
{
  "intel": {
    "default": [
      "-O2 -prec-div",
      "-O2 -prec-div -fimf-use-svml"
    ],
    "skylake-avx512": [
      "-O2 -march=skylake-avx512 -mtune=skylake -prec-div -fopenmp",
      "-O2 -march=skylake-avx512 -mtune=skylake -prec-div -fimf-use-svml -fopenmp"
    ]
  },
  "gcc": {
    "default": [
      "-O3 -ffast-math -ftree-vectorize",
      "-O3 -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ]
  },
  "clang": {
    "default": [
      "-O3 -ffast-math",
      "-O3 -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -ffast-math jit SVML",
      "-O3 -ffast-math jit SLEEF"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "nmodl_jit": {
    "default": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""
generate_graph_pandas(results_hh, compilers_comparison_config, "hh_icc_clang_gcc_w_wout_svml", "graphs_output_pandas")

In [None]:
expsyn_cpu_results = {}
expsyn_cpu_results= load_pickle_result_file(["./reference_data/expsyn_cpu_results.pickle"], expsyn_cpu_results)

In [None]:
generate_graph_pandas(expsyn_cpu_results, compilers_comparison_config, "expsyn_icc_clang_gcc_w_wout_svml", "graphs_output_pandas")

In [None]:
expsyn_gpu_results = {}
expsyn_gpu_results = load_pickle_result_file(["./reference_data/expsyn_gpu.pickle"], expsyn_gpu_results)

In [None]:
compilers_comparison_config = """
{
  "nvhpc": {
    "nvptx64": [
      "-O3 -gpu=nordc,fastmath"
    ]
  },
  "nmodl_jit": {
    "nvptx64": [
      "libdevice_nnancontractafn"
    ]
  }
}
"""
colors = ['#b2df8a','#bdbdbd']
sns.set_palette(sns.color_palette(colors))

generate_graph_pandas_gpu(expsyn_gpu_results, compilers_comparison_config, "exp_openacc_vs_nmodl", "graphs_output_pandas")

In [None]:
gpu_results = {}
gpu_results = load_pickle_result_file(["./reference_data/hh_gpu.pickle", "./reference_data/expsyn_gpu.pickle"], gpu_results)

In [None]:
def generate_graph_pandas_gpu_combined(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False):
    compiler_flags = json.loads(compilers_comparison_config)
    fig = plt.figure(figsize=(12, 6))
    for i, modname in enumerate(results):
        df = None
        bar_data_gpu_panda = {}
        bar_data_gpu_panda["kernel"] = []
        bar_data_gpu_panda["compiler"] = []
        bar_data_gpu_panda["runtime"] = []
        architecture = "nvptx64"
        for compiler in results[modname][architecture]:
            if compiler in compiler_flags and architecture in compiler_flags[compiler]:
                for flags in compiler_flags[compiler][architecture]:
                    dict_label = "{}_{}_{}".format(architecture, compiler, _get_flags_string(flags))
                    if compiler == "nmodl_jit":
                        state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                        cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                    else:
                        state_kernel_name = "nrn_state_ext"
                        cur_kernel_name = "nrn_cur_ext"
                    if compiler == "clang" and "jit" in flags:
                        compiler_name = "mod2ir"
                    elif compiler == "nmodl_jit":
                        compiler_name = "mod2ir_jit"
                    else:
                        compiler_name = compiler
                    bar_data_gpu_panda["kernel"].append("nrn_state")
                    bar_data_gpu_panda["compiler"].append(compiler_name)
                    if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                        bar_data_gpu_panda["runtime"].append(0)
                    else:
                        bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                    bar_data_gpu_panda["kernel"].append("nrn_current")
                    bar_data_gpu_panda["compiler"].append(compiler_name)
                    if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                        bar_data_gpu_panda["runtime"].append(0)
                    else:
                        bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
        df_state = pd.DataFrame(bar_data_gpu_panda, columns=["kernel", "compiler", "runtime"])
        ax = fig.add_subplot(1, 2, i+1)
        ax = sns.barplot(x='kernel', y='runtime', hue='compiler', data=df_state, ax=ax)
        if print_values:
            for i in ax.containers:
                ax.bar_label(i,)
        plt.xlabel("Kernel Name")
        if i == 0:
            plt.ylabel("Runtime (s)")
        else:
            ax.set(ylabel=None)
        plt.title("OpenACC and MOD2IR comparison for {}".format(modname))
        # plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
        # plt.savefig("{}/{}_benchmark_{}.pdf".format(output_dir, modname, graph_suffix), format="pdf", bbox_inches="tight")
    plt.savefig("{}/gpu_combined_benchmark_{}.pdf".format(output_dir, graph_suffix), format="pdf", bbox_inches="tight")
    plt.show()
    plt.close()

compilers_comparison_config = """
{
  "nvhpc": {
    "nvptx64": [
      "-O3 -gpu=nordc,fastmath"
    ]
  },
  "nmodl_jit": {
    "nvptx64": [
      "libdevice_nnancontractafn"
    ]
  }
}
"""
colors = ['#b2df8a','#bdbdbd']
sns.set_palette(sns.color_palette(colors))

generate_graph_pandas_gpu_combined(gpu_results, compilers_comparison_config, "hh_expsyn_gpu", "graphs_output_pandas")


In [None]:
hh_expsyn_avx512f_results = {}
hh_expsyn_avx512f_results = load_pickle_result_file(["./reference_data/hh_expsyn_mavx512f.pickle"], hh_expsyn_avx512f_results)
colors = ['#6baed6', '#0570b0', '#66c2a4','#238b45','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252']
# pick colors according to the following order which matches the order of 
# intel #6baed6
# intel svml #0570b0
# gcc #66c2a4
# gcc_svml #238b45
# clang #fdd49e
# clang_svml #fc8d59
# mod2ir #9ebcda
# mod2ir_svml #8c96c6
# mor2it_jit_svml #969696
# mod2ir_jit_sleef #525252
sns.set_palette(sns.color_palette(colors))
compilers_comparison_config = """
{
  "intel": {
    "default": [
      "-O2 -prec-div",
      "-O2 -prec-div -fimf-use-svml"
    ],
    "skylake-avx512": [
      "-O2 -mavx512f -prec-div -fopenmp",
      "-O2 -mavx512f -prec-div -fimf-use-svml -fopenmp"
    ]
  },
  "gcc": {
    "default": [
      "-O3 -ffast-math -ftree-vectorize",
      "-O3 -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ]
  },
  "clang": {
    "default": [
      "-O3 -ffast-math",
      "-O3 -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -ffast-math jit SVML",
      "-O3 -ffast-math jit SLEEF"
    ],
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "nmodl_jit": {
    "default": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ],
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""
generate_graph_pandas(hh_expsyn_avx512f_results, compilers_comparison_config, "mavfx512f", "graphs_output_pandas", True)

In [None]:
def compare_average_diff_percentage(original_vector, comparison_vector):
    diff_vector = []
    for i in range(len(original_vector)):
        diff_vector.append(100 * (original_vector[i] - comparison_vector[i]) / original_vector[i])
    return np.sum(diff_vector) / len(diff_vector)

def return_results_from_config(results, compilers_comparison_config, modnames):
    return_vec = []
    compiler_flags = json.loads(compilers_comparison_config)
    for modname in results:
        if modname in modnames:
          for architecture in results[modname]:
              for compiler in compiler_flags:
                  if compiler in results[modname][architecture] and architecture in compiler_flags[compiler]:
                      for flags in compiler_flags[compiler][architecture]:
                          if compiler == "nmodl_jit":
                              state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                              cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                          else:
                              state_kernel_name = "nrn_state_ext"
                              cur_kernel_name = "nrn_cur_ext"
                          # print("{} {} {} {}".format(modname, architecture, compiler, flags))
                          if _get_flags_string(flags) in results[modname][architecture][compiler]:
                              return_vec.append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                          if _get_flags_string(flags) in results[modname][architecture][compiler]:
                              return_vec.append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
    return return_vec

def calculate_overall_averages(results, modnames):

  intel_compiler_comparison_config = """
  {
    "intel": {
      "skylake-avx512": [
        "-O2 -mavx512f -prec-div -fimf-use-svml -fopenmp"
      ]
    }
  }
  """

  intel_results = return_results_from_config(hh_expsyn_avx512f_results, intel_compiler_comparison_config, modnames)
  # print("intel_results: {}".format(intel_results))

  mod2ir_jit_svml_compiler_comparison_config = """
  {
    "nmodl_jit": {
      "skylake-avx512": [
        "SVML_nnancontractafn"
      ]
    }
  }
  """
  mod2ir_jit_svml_results = return_results_from_config(hh_expsyn_avx512f_results, mod2ir_jit_svml_compiler_comparison_config, modnames)
  # print("mod2ir_jit_svml_results: {}".format(mod2ir_jit_svml_results))

  mod2ir_jit_sleef_compiler_comparison_config = """
  {
    "nmodl_jit": {
      "skylake-avx512": [
        "SLEEF_nnancontractafn"
      ]
    }
  }
  """
  mod2ir_jit_sleef_results = return_results_from_config(hh_expsyn_avx512f_results, mod2ir_jit_sleef_compiler_comparison_config, modnames)
  # print("mod2ir_jit_sleef_results: {}".format(mod2ir_jit_svml_results))

  print("Intel vs MOD2IR SVML diff (%) {} : {}".format(modnames, compare_average_diff_percentage(intel_results, mod2ir_jit_svml_results)))
  print("Intel vs MOD2IR SLEEF diff (%) {} : {}".format(modnames, compare_average_diff_percentage(intel_results, mod2ir_jit_sleef_results)))

  mo2ir_svml_sleef_compiler_comparison_config = """
  {
    "clang": {
      "default": [
        "-O3 -ffast-math jit SVML",
        "-O3 -ffast-math jit SLEEF"
      ],
      "skylake-avx512": [
        "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
        "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
      ]
    }
  }
  """

  mod2ir_svml_sleef_results = return_results_from_config(hh_expsyn_avx512f_results, mo2ir_svml_sleef_compiler_comparison_config, modnames)
  # print("mod2ir_svml_sleef_results: {}".format(mod2ir_svml_sleef_results))

  mo2ir_jit_svml_sleef_compiler_comparison_config = """
  {
    "nmodl_jit": {
      "default": [
        "SVML_nnancontractafn",
        "SLEEF_nnancontractafn"
      ],
      "skylake-avx512": [
        "SVML_nnancontractafn",
        "SLEEF_nnancontractafn"
      ]
    }
  }
  """

  mod2ir_jit_svml_sleef_results = return_results_from_config(hh_expsyn_avx512f_results, mo2ir_jit_svml_sleef_compiler_comparison_config, modnames)
  # print("mod2ir_jit_svml_sleef_results: {}".format(mod2ir_jit_svml_sleef_results))

  print("MOD2IR vs MOD2IR JIT diff (%) {} : {}".format(modnames, compare_average_diff_percentage(mod2ir_svml_sleef_results, mod2ir_jit_svml_sleef_results)))

calculate_overall_averages(hh_expsyn_avx512f_results, ["hh"])
calculate_overall_averages(hh_expsyn_avx512f_results, ["expsyn"])
calculate_overall_averages(hh_expsyn_avx512f_results, ["hh", "expsyn"])

In [None]:
def generate_graph_pandas_cpu_combined(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False, xaxis_label=None, plot_size=(12,6), show_xlabels=False):
    os.makedirs(output_dir, exist_ok=True)
    compiler_flags = json.loads(compilers_comparison_config)
    fig, axes = plt.subplots(1, 3, squeeze=False, figsize=plot_size)
    ax_index = 0
    for modname in results:
        # state
        bar_data_state_cpu_panda = {}
        bar_data_state_cpu_panda["architecture"] = []
        bar_data_state_cpu_panda["compiler"] = []
        bar_data_state_cpu_panda["runtime"] = []
        # current
        bar_data_cur_cpu_panda = {}
        bar_data_cur_cpu_panda["architecture"] = []
        bar_data_cur_cpu_panda["compiler"] = []
        bar_data_cur_cpu_panda["runtime"] = []
        for architecture in results[modname]:
            for compiler in compiler_flags:
                if compiler in results[modname][architecture] and architecture in compiler_flags[compiler]:
                    for flags in compiler_flags[compiler][architecture]:
                        if compiler == "nmodl_jit":
                            state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                            cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        else:
                            state_kernel_name = "nrn_state_ext"
                            cur_kernel_name = "nrn_cur_ext"
                        if compiler == "clang" and "jit" in flags:
                            compiler_name = "mod2ir"
                        elif compiler == "nmodl_jit":
                            compiler_name = "mod2ir_jit"
                        else:
                            compiler_name = compiler
                        if "svml" in flags or "SVML" in flags:
                            compiler_name = compiler_name + "_svml"
                        elif "sleef" in flags or "SLEEF" in flags:
                            compiler_name = compiler_name + "_sleef"
                        if architecture == "default":
                            architecture_label = "auto-scalar"
                        elif architecture == "nehalem":
                            architecture_label = "nehalem-sse2"
                        elif architecture == "broadwell":
                            architecture_label = "broadwell-avx2"
                        else: # skylake-avx512
                            architecture_label = architecture
                        if modname != "expsyn":
                            bar_data_state_cpu_panda["architecture"].append(architecture_label)
                            bar_data_state_cpu_panda["compiler"].append(compiler_name)
                            if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                                bar_data_state_cpu_panda["runtime"].append(0)
                            else:
                                bar_data_state_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                        bar_data_cur_cpu_panda["architecture"].append(architecture_label)
                        bar_data_cur_cpu_panda["compiler"].append(compiler_name)
                        if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                            bar_data_cur_cpu_panda["runtime"].append(0)
                        else:
                            bar_data_cur_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
        if modname != "expsyn":
            df_state = pd.DataFrame(bar_data_state_cpu_panda, columns=["architecture", "compiler", "runtime"])
            sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_state, ax=axes[0,ax_index])
            axes[0,ax_index].xaxis.label.set_visible(False)
            axes[0,ax_index].yaxis.label.set_visible(False)
            axes[0,ax_index].set_title("nrn_state_{}".format(modname))
            axes[0,ax_index].get_legend().remove()
            if not show_xlabels:
                axes[0,ax_index].get_xaxis().set_visible(False)
            if print_values:
                for i in axes[0,ax_index].containers:
                    axes[0,ax_index].bar_label(i,)
            ax_index += 1
        df_cur = pd.DataFrame(bar_data_cur_cpu_panda, columns=["architecture", "compiler", "runtime"])
        sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_cur, ax=axes[0,ax_index])
        axes[0,ax_index].xaxis.label.set_visible(False)
        axes[0,ax_index].yaxis.label.set_visible(False)
        axes[0,ax_index].set_title("nrn_cur_{}".format(modname))
        axes[0,ax_index].get_legend().remove()
        if not show_xlabels:
            axes[0,ax_index].get_xaxis().set_visible(False)
        if print_values:
          for i in axes[0,ax_index].containers:
            axes[0,ax_index].bar_label(i,)
        ax_index += 1
    if xaxis_label is not None:
        fig.text(0.5, 0.04, xaxis_label, ha='center', va='center')
    # else:
    #     fig.text(0.5, 0.04, 'Target Microarchitecture-Instruction Set', ha='center', va='center')
    fig.text(0.06, 0.5, 'Runtime (s)', ha='center', va='center', rotation='vertical')
    plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
    plt.savefig("{}/combined_benchmark_{}.pdf".format(output_dir, graph_suffix), format="pdf", bbox_inches="tight")
    plt.show()
    plt.close()

In [None]:
colors = ['#6baed6', '#0570b0', '#66c2a4','#238b45','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252']
# pick colors according to the following order which matches the order of 
# intel #6baed6
# intel svml #0570b0
# gcc #66c2a4
# gcc_svml #238b45
# clang #fdd49e
# clang_svml #fc8d59
# mod2ir #9ebcda
# mod2ir_svml #8c96c6
# mor2it_jit_svml #969696
# mod2ir_jit_sleef #525252
sns.set_palette(sns.color_palette(colors))
compilers_comparison_config = """
{
  "intel": {
    "skylake-avx512": [
      "-O2 -mavx512f -prec-div -fopenmp",
      "-O2 -mavx512f -prec-div -fimf-use-svml -fopenmp"
    ]
  },
  "gcc": {
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ]
  },
  "clang": {
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "nmodl_jit": {
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""
generate_graph_pandas_cpu_combined(hh_expsyn_avx512f_results, compilers_comparison_config, "hh_expsyn_cpu", "graphs_output_pandas", False, xaxis_label="Target Microarchitecture-Instruction Set", show_xlabels=True)

In [None]:
hh_expsyn_gpu_1024x128 = {}
hh_expsyn_gpu_1024x128 = load_pickle_result_file(["./reference_data/hh_gpu_20mil_1024x128.pickle", "./reference_data/expsyn_gpu_100mil_1024x128.pickle"], hh_expsyn_gpu_1024x128)
compilers_comparison_config = """
{
  "nvhpc": {
    "nvptx64": [
      "-O3 -gpu=nordc,fastmath"
    ]
  },
  "nmodl_jit": {
    "nvptx64": [
      "libdevice_nnancontractafn"
    ]
  }
}
"""
colors = ['#b2df8a','#bdbdbd']
sns.set_palette(sns.color_palette(colors))

generate_graph_pandas_cpu_combined(hh_expsyn_gpu_1024x128, compilers_comparison_config, "hh_expsyn_gpu", "graphs_output_pandas", print_values=False, plot_size=(13,5))

In [None]:
def generate_graph_pandas_combined_relative(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False, xaxis_label=None, plot_size=(12,6), baseline_name="intel_svml"):
    os.makedirs(output_dir, exist_ok=True)
    compiler_flags = json.loads(compilers_comparison_config)
    fig, axes = plt.subplots(1, 3, squeeze=False, figsize=plot_size)
    ax_index = 0
    for modname in results:
        # state
        bar_data_state_cpu_panda = {}
        bar_data_state_cpu_panda["architecture"] = []
        bar_data_state_cpu_panda["compiler"] = []
        bar_data_state_cpu_panda["runtime"] = []
        # current
        bar_data_cur_cpu_panda = {}
        bar_data_cur_cpu_panda["architecture"] = []
        bar_data_cur_cpu_panda["compiler"] = []
        bar_data_cur_cpu_panda["runtime"] = []
        baseline_cur = 0.0
        for architecture in results[modname]:
            for compiler in compiler_flags:
                if compiler in results[modname][architecture] and architecture in compiler_flags[compiler]:
                    for flags in compiler_flags[compiler][architecture]:
                        if compiler == "nmodl_jit":
                            state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                            cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        else:
                            state_kernel_name = "nrn_state_ext"
                            cur_kernel_name = "nrn_cur_ext"
                        if compiler == "clang" and "jit" in flags:
                            compiler_name = "mod2ir"
                        elif compiler == "nmodl_jit":
                            compiler_name = "mod2ir_jit"
                        else:
                            compiler_name = compiler
                        if "svml" in flags or "SVML" in flags:
                            compiler_name = compiler_name + "_svml"
                            if architecture != "nvptx64" and compiler == "intel":
                                baseline_state = results[modname][architecture]["intel"][_get_flags_string(flags)][state_kernel_name][0]
                                baseline_cur = results[modname][architecture]["intel"][_get_flags_string(flags)][cur_kernel_name][0]
                        elif "sleef" in flags or "SLEEF" in flags:
                            compiler_name = compiler_name + "_sleef"
                        if architecture == "default":
                            architecture_label = "auto-scalar"
                        elif architecture == "nehalem":
                            architecture_label = "nehalem-sse2"
                        elif architecture == "broadwell":
                            architecture_label = "broadwell-avx2"
                        elif architecture == "nvptx64":
                            architecture_label = architecture
                            if compiler == "nvhpc":
                                baseline_state = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][state_kernel_name][0]
                                baseline_cur = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][cur_kernel_name][0]
                        else: # skylake-avx512
                            architecture_label = architecture
                        if modname != "expsyn":
                            bar_data_state_cpu_panda["architecture"].append(architecture_label)
                            bar_data_state_cpu_panda["compiler"].append(compiler_name)
                            if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                                bar_data_state_cpu_panda["runtime"].append(0)
                            else:
                                bar_data_state_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                        bar_data_cur_cpu_panda["architecture"].append(architecture_label)
                        bar_data_cur_cpu_panda["compiler"].append(compiler_name)
                        if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                            bar_data_cur_cpu_panda["runtime"].append(0)
                        else:
                            bar_data_cur_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
        for i, runtime in enumerate(bar_data_state_cpu_panda["runtime"]):
            bar_data_state_cpu_panda["runtime"][i] = runtime/baseline_state
        for i, runtime in enumerate(bar_data_cur_cpu_panda["runtime"]):
            bar_data_cur_cpu_panda["runtime"][i] = runtime/baseline_cur
        pd.options.display.float_format = "{:,.2f}".format
        if modname != "expsyn":
            df_state = pd.DataFrame(bar_data_state_cpu_panda, columns=["architecture", "compiler", "runtime"])
            print(df_state, type(df_state))
            sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_state, ax=axes[0,ax_index])
            axes[0,ax_index].axhline(1., ls='--', color ="black")
            axes[0,ax_index].xaxis.label.set_visible(False)
            axes[0,ax_index].yaxis.label.set_visible(False)
            axes[0,ax_index].set_title("nrn_state_{}".format(modname))
            axes[0,ax_index].get_legend().remove()
            if xaxis_label is not None:
                axes[0,ax_index].get_xaxis().set_visible(False)
            if print_values:
                for i in axes[0,ax_index].containers:
                    axes[0,ax_index].bar_label(i,)
            ax_index += 1
        df_cur = pd.DataFrame(bar_data_cur_cpu_panda, columns=["architecture", "compiler", "runtime"])
        ax = sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_cur, ax=axes[0,ax_index])
        axes[0,ax_index].axhline(1., ls='--', color ="black")
        print(df_cur, type(df_cur))
        #axes[0,ax_index].axhline(1., linewidth=2, color=(0, 0, 0, 0.9))
        axes[0,ax_index].xaxis.label.set_visible(False)
        axes[0,ax_index].yaxis.label.set_visible(False)
        axes[0,ax_index].set_title("nrn_cur_{}".format(modname))
        axes[0,ax_index].get_legend().remove()
        if xaxis_label is not None:
                axes[0,ax_index].get_xaxis().set_visible(False)
        if print_values:
          for i in axes[0,ax_index].containers:
            axes[0,ax_index].bar_label(i,)
        ax_index += 1
            
    #if xaxis_label is not None:
    #    fig.text(0.5, 0.04, xaxis_label, ha='center', va='center')
    #else:
    #    fig.text(0.5, 0.04, 'Target Microarchitecture-Instruction Set', ha='center', va='center')
    fig.text(0.06, 0.5, 'Relative Performance ({} = 1)'.format(baseline_name), ha='center', va='center', rotation='vertical')
    plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
    plt.savefig("{}/combined_benchmark_{}.pdf".format(output_dir, graph_suffix), format="pdf", bbox_inches="tight")
    plt.show()
    plt.close()

In [None]:
colors = ['#6baed6', '#0570b0', '#66c2a4','#238b45','#b2df8a','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252']
# pick colors according to the following order which matches the order of 
# intel #6baed6
# intel svml #0570b0
# gcc #66c2a4
# gcc_svml #238b45
# nvhpc #b2df8a
# clang #fdd49e
# clang_svml #fc8d59
# mod2ir #9ebcda
# mod2ir_svml #8c96c6
# mor2it_jit_svml #969696
# mod2ir_jit_sleef #525252
sns.set_palette(sns.color_palette(colors))
compilers_comparison_config = """
{
  "intel": {
    "skylake-avx512": [
      "-O2 -mavx512f -prec-div -fopenmp",
      "-O2 -mavx512f -prec-div -fimf-use-svml -fopenmp"
    ]
  },
  "gcc": {
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ]
  },
  "nvhpc": {
    "skylake-avx512": [
      "-fast -O3 -mp=autopar -tp=skylake -Msafeptr=all -Minfo -Mvect=simd:512,gather -mavx512vbmi -mavx512vbmi2 -mavx512vl"
    ]
  },
  "clang": {
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "nmodl_jit": {
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""
hh_expsyn_cpu_results = {}
hh_expsyn_cpu_results = load_pickle_result_file(["./reference_data/hh_expsyn_mavx512f.pickle", "./reference_data/hh_expsyn_nvhpc_cpu.pickle"], hh_expsyn_cpu_results)
json_object = json.dumps(hh_expsyn_cpu_results, indent = 4) 
generate_graph_pandas_combined_relative(hh_expsyn_cpu_results, compilers_comparison_config, "hh_expsyn_cpu_relative", "graphs_output_pandas", False, xaxis_label="skylake-avx512 Target Microarchitecture", plot_size=(10,3.5))

In [None]:
hh_expsyn_gpu_1024x128 = {}
hh_expsyn_gpu_1024x128 = load_pickle_result_file(["./reference_data/hh_gpu_20mil_1024x128.pickle", "./reference_data/expsyn_gpu_100mil_1024x128.pickle"], hh_expsyn_gpu_1024x128)
compilers_comparison_config = """
{
  "nvhpc": {
    "nvptx64": [
      "-O3 -gpu=nordc,fastmath"
    ]
  },
  "nmodl_jit": {
    "nvptx64": [
      "libdevice_nnancontractafn"
    ]
  }
}
"""
colors = ['#b2df8a','#969696']
sns.set_palette(sns.color_palette(colors))

generate_graph_pandas_combined_relative(hh_expsyn_gpu_1024x128, compilers_comparison_config, "hh_expsyn_gpu_relative", "graphs_output_pandas", xaxis_label="NVPTX64 Architecture", print_values=False, plot_size=(8,8), baseline_name="nvhpc")

In [None]:
def generate_graph_pandas_combined_relative_gpu(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False, xaxis_label=None, plot_size=(12,6), baseline_name="intel_svml"):
    os.makedirs(output_dir, exist_ok=True)
    compiler_flags = json.loads(compilers_comparison_config)
    fig, axes = plt.subplots(1, 1, squeeze=False, figsize=plot_size)
    ax = axes[0,0]
    bar_data_gpu_panda = {}
    bar_data_gpu_panda["kernel"] = []
    bar_data_gpu_panda["compiler"] = []
    bar_data_gpu_panda["runtime"] = []
    baseline_kernel = {}
    for modname in results:
        for architecture in results[modname]:
            for compiler in compiler_flags:
                if compiler in results[modname][architecture] and architecture in compiler_flags[compiler]:
                    for flags in compiler_flags[compiler][architecture]:
                        if compiler == "nmodl_jit":
                            state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                            cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        else:
                            state_kernel_name = "nrn_state_ext"
                            cur_kernel_name = "nrn_cur_ext"
                        label_state_name = "nrn_state_{}".format(modname.replace("-", "_"))
                        label_cur_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        if compiler == "clang" and "jit" in flags:
                            compiler_name = "mod2ir"
                        elif compiler == "nmodl_jit":
                            compiler_name = "mod2ir_jit"
                        else:
                            compiler_name = compiler
                        if architecture == "nvptx64":
                            architecture_label = architecture
                            if compiler == "nvhpc":
                                baseline_kernel[label_state_name] = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][state_kernel_name][0]
                                baseline_kernel[label_cur_name] = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][cur_kernel_name][0]
                        else: # skylake-avx512
                            architecture_label = architecture
                        if modname != "expsyn":
                            bar_data_gpu_panda["kernel"].append(label_state_name)
                            bar_data_gpu_panda["compiler"].append(compiler_name)
                            if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                                bar_data_gpu_panda["runtime"].append(0)
                            else:
                                bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                        bar_data_gpu_panda["kernel"].append(label_cur_name)
                        bar_data_gpu_panda["compiler"].append(compiler_name)
                        if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                            bar_data_gpu_panda["runtime"].append(0)
                        else:
                            bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
    for i, runtime in enumerate(bar_data_gpu_panda["runtime"]):
        kernel = bar_data_gpu_panda["kernel"][i]
        print("Scaling kernel {} arch {}".format(kernel, bar_data_gpu_panda["compiler"][i]))
        bar_data_gpu_panda["runtime"][i] = runtime/baseline_kernel[kernel]
    pd.options.display.float_format = "{:,.2f}".format
    print(bar_data_gpu_panda)
    df_kernels = pd.DataFrame(bar_data_gpu_panda, columns=["kernel", "compiler", "runtime"])
    print(df_kernels, type(df_kernels))
    sns.barplot(x='kernel', y='runtime', hue='compiler', data=df_kernels, ax=ax)
    ax.axhline(1., ls='--', color ="black")
    ax.xaxis.label.set_visible(False)
    # ax.yaxis.label.set_visible(False)
    plt.ylabel('Relative Performance ({} = 1)'.format(baseline_name))
    # ax.get_legend().remove()
    plt.legend(loc="lower left")
    # if xaxis_label is not None:
    #     ax.get_xaxis().set_visible(False)
    if print_values:
        for i in ax.containers:
            ax.bar_label(i,)            
    #if xaxis_label is not None:
    #    fig.text(0.5, 0.04, xaxis_label, ha='center', va='center')
    #else:
    #    fig.text(0.5, 0.04, 'Target Microarchitecture-Instruction Set', ha='center', va='center')
    # fig.text(0.06, 0.5, 'Relative Performance ({} = 1)'.format(baseline_name), ha='center', va='center', rotation='vertical')
    # plt.xlabel("Kernel Name")
    # plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
    plt.savefig("{}/combined_benchmark_{}.pdf".format(output_dir, graph_suffix), format="pdf", bbox_inches="tight")
    plt.show()
    plt.close()

In [None]:
hh_expsyn_gpu_1024x128 = {}
hh_expsyn_gpu_1024x128 = load_pickle_result_file(["./reference_data/hh_gpu_20mil_1024x128.pickle", "./reference_data/expsyn_gpu_100mil_1024x128.pickle"], hh_expsyn_gpu_1024x128)
compilers_comparison_config = """
{
  "nvhpc": {
    "nvptx64": [
      "-O3 -gpu=nordc,fastmath"
    ]
  },
  "nmodl_jit": {
    "nvptx64": [
      "libdevice_nnancontractafn"
    ]
  }
}
"""
colors = ['#b2df8a','#969696']
sns.set_palette(sns.color_palette(colors))

generate_graph_pandas_combined_relative_gpu(hh_expsyn_gpu_1024x128, compilers_comparison_config, "hh_expsyn_gpu_relative_one_plot", "graphs_output_pandas", xaxis_label="NVPTX64 Architecture", print_values=False, plot_size=(4,3), baseline_name="nvhpc")

In [None]:
def generate_graph_pandas_combined_relative_log(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False, xaxis_label=None, plot_size=(12,6), baseline_name="intel_svml"):
    os.makedirs(output_dir, exist_ok=True)
    compiler_flags = json.loads(compilers_comparison_config)
    fig, axes = plt.subplots(1, 3, squeeze=False, figsize=plot_size)
    ax_index = 0
    for modname in results:
        # state
        bar_data_state_cpu_panda = {}
        bar_data_state_cpu_panda["architecture"] = []
        bar_data_state_cpu_panda["compiler"] = []
        bar_data_state_cpu_panda["runtime"] = []
        # current
        bar_data_cur_cpu_panda = {}
        bar_data_cur_cpu_panda["architecture"] = []
        bar_data_cur_cpu_panda["compiler"] = []
        bar_data_cur_cpu_panda["runtime"] = []
        baseline_cur = 0.0
        for architecture in results[modname]:
            for compiler in compiler_flags:
                if compiler in results[modname][architecture] and architecture in compiler_flags[compiler]:
                    for flags in compiler_flags[compiler][architecture]:
                        if compiler == "nmodl_jit":
                            state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                            cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        else:
                            state_kernel_name = "nrn_state_ext"
                            cur_kernel_name = "nrn_cur_ext"
                        if compiler == "clang" and "jit" in flags:
                            compiler_name = "mod2ir"
                        elif compiler == "nmodl_jit":
                            compiler_name = "mod2ir_jit"
                        else:
                            compiler_name = compiler
                        if "svml" in flags or "SVML" in flags:
                            compiler_name = compiler_name + "_svml"
                            if architecture != "nvptx64" and compiler == "intel":
                                baseline_state = results[modname][architecture]["intel"][_get_flags_string(flags)][state_kernel_name][0]
                                baseline_cur = results[modname][architecture]["intel"][_get_flags_string(flags)][cur_kernel_name][0]
                        elif "sleef" in flags or "SLEEF" in flags:
                            compiler_name = compiler_name + "_sleef"
                        if architecture == "default":
                            architecture_label = "auto-scalar"
                        elif architecture == "nehalem":
                            architecture_label = "nehalem-sse2"
                        elif architecture == "broadwell":
                            architecture_label = "broadwell-avx2"
                        elif architecture == "nvptx64":
                            architecture_label = architecture
                            if compiler == "nvhpc":
                                baseline_state = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][state_kernel_name][0]
                                baseline_cur = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][cur_kernel_name][0]
                        else: # skylake-avx512
                            architecture_label = architecture
                        if modname != "expsyn":
                            bar_data_state_cpu_panda["architecture"].append(architecture_label)
                            bar_data_state_cpu_panda["compiler"].append(compiler_name)
                            if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                                bar_data_state_cpu_panda["runtime"].append(0)
                            else:
                                bar_data_state_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                        bar_data_cur_cpu_panda["architecture"].append(architecture_label)
                        bar_data_cur_cpu_panda["compiler"].append(compiler_name)
                        if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                            bar_data_cur_cpu_panda["runtime"].append(0)
                        else:
                            bar_data_cur_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
        for i, runtime in enumerate(bar_data_state_cpu_panda["runtime"]):
            bar_data_state_cpu_panda["runtime"][i] = baseline_state/runtime
        for i, runtime in enumerate(bar_data_cur_cpu_panda["runtime"]):
            bar_data_cur_cpu_panda["runtime"][i] = baseline_cur/runtime
        pd.options.display.float_format = "{:,.2f}".format
        if modname != "expsyn":
            df_state = pd.DataFrame(bar_data_state_cpu_panda, columns=["architecture", "compiler", "runtime"])
            print(df_state, type(df_state))
            sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_state, ax=axes[0,ax_index])
            axes[0,ax_index].set_yscale('symlog', base=2, linthresh=0.015)
            axes[0,ax_index].set_ylim(0.125,2)
            axes[0,ax_index].set_yticks([0.125, 0.25, 0.5, 1, 2])
            axes[0,ax_index].axhline(1., ls='--', color ="black")
            axes[0,ax_index].xaxis.label.set_visible(False)
            axes[0,ax_index].yaxis.label.set_visible(False)
            axes[0,ax_index].set_title("nrn_state_{}".format(modname))
            axes[0,ax_index].get_legend().remove()
            if xaxis_label is not None:
                axes[0,ax_index].get_xaxis().set_visible(False)
            if print_values:
                for i in axes[0,ax_index].containers:
                    axes[0,ax_index].bar_label(i,)
            ax_index += 1
        df_cur = pd.DataFrame(bar_data_cur_cpu_panda, columns=["architecture", "compiler", "runtime"])
        ax = sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_cur, ax=axes[0,ax_index])
        axes[0,ax_index].axhline(1., ls='--', color ="black")
        print(df_cur, type(df_cur))
        axes[0,ax_index].set_yscale('symlog', base=2, linthresh=0.015)
        axes[0,ax_index].set_ylim(0.125,2)
        axes[0,ax_index].set_yticks([0.125, 0.25, 0.5, 1, 2])
        # axes[0,ax_index].set_yticklabels([0.5, 1, 2, 4, 8])
        #axes[0,ax_index].axhline(1., linewidth=2, color=(0, 0, 0, 0.9))
        axes[0,ax_index].xaxis.label.set_visible(False)
        axes[0,ax_index].yaxis.label.set_visible(False)
        axes[0,ax_index].set_title("nrn_cur_{}".format(modname))
        axes[0,ax_index].get_legend().remove()
        if xaxis_label is not None:
                axes[0,ax_index].get_xaxis().set_visible(False)
        if print_values:
          for i in axes[0,ax_index].containers:
            axes[0,ax_index].bar_label(i,)
        ax_index += 1
            
    #if xaxis_label is not None:
    #    fig.text(0.5, 0.04, xaxis_label, ha='center', va='center')
    #else:
    #    fig.text(0.5, 0.04, 'Target Microarchitecture-Instruction Set', ha='center', va='center')
    fig.text(0.06, 0.5, 'Speedup relative to {}'.format(baseline_name), ha='center', va='center', rotation='vertical')
    plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
    plt.savefig("{}/combined_benchmark_{}.pdf".format(output_dir, graph_suffix), format="pdf", bbox_inches="tight")
    plt.show()
    plt.close()

In [None]:
colors = ['#6baed6', '#0570b0', '#66c2a4','#238b45','#b2df8a','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252']
# pick colors according to the following order which matches the order of 
# intel #6baed6
# intel svml #0570b0
# gcc #66c2a4
# gcc_svml #238b45
# nvhpc #b2df8a
# clang #fdd49e
# clang_svml #fc8d59
# mod2ir #9ebcda
# mod2ir_svml #8c96c6
# mor2it_jit_svml #969696
# mod2ir_jit_sleef #525252
sns.set_palette(sns.color_palette(colors))
compilers_comparison_config = """
{
  "intel": {
    "skylake-avx512": [
      "-O2 -mavx512f -prec-div -fopenmp",
      "-O2 -mavx512f -prec-div -fimf-use-svml -fopenmp"
    ]
  },
  "gcc": {
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ]
  },
  "nvhpc": {
    "skylake-avx512": [
      "-fast -O3 -mp=autopar -tp=skylake -Msafeptr=all -Minfo -Mvect=simd:512,gather -mavx512vbmi -mavx512vbmi2 -mavx512vl"
    ]
  },
  "clang": {
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "nmodl_jit": {
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""
hh_expsyn_cpu_results = {}
hh_expsyn_cpu_results = load_pickle_result_file(["./reference_data/hh_expsyn_mavx512f.pickle", "./reference_data/hh_expsyn_nvhpc_cpu.pickle"], hh_expsyn_cpu_results)
json_object = json.dumps(hh_expsyn_cpu_results, indent = 4) 
generate_graph_pandas_combined_relative_log(hh_expsyn_cpu_results, compilers_comparison_config, "hh_expsyn_cpu_relative_log", "graphs_output_pandas", False, xaxis_label="skylake-avx512 Target Microarchitecture", plot_size=(10,3.5))

In [None]:
def generate_graph_pandas_combined_relative_gpu_log(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False, xaxis_label=None, plot_size=(12,6), baseline_name="intel_svml"):
    os.makedirs(output_dir, exist_ok=True)
    compiler_flags = json.loads(compilers_comparison_config)
    fig, axes = plt.subplots(1, 1, squeeze=False, figsize=plot_size)
    ax = axes[0,0]
    bar_data_gpu_panda = {}
    bar_data_gpu_panda["kernel"] = []
    bar_data_gpu_panda["compiler"] = []
    bar_data_gpu_panda["runtime"] = []
    baseline_kernel = {}
    for modname in results:
        for architecture in results[modname]:
            for compiler in compiler_flags:
                if compiler in results[modname][architecture] and architecture in compiler_flags[compiler]:
                    for flags in compiler_flags[compiler][architecture]:
                        if compiler == "nmodl_jit":
                            state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                            cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        else:
                            state_kernel_name = "nrn_state_ext"
                            cur_kernel_name = "nrn_cur_ext"
                        label_state_name = "nrn_state_{}".format(modname.replace("-", "_"))
                        label_cur_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        if compiler == "clang" and "jit" in flags:
                            compiler_name = "mod2ir"
                        elif compiler == "nmodl_jit":
                            compiler_name = "mod2ir_jit"
                        else:
                            compiler_name = compiler
                        if architecture == "nvptx64":
                            architecture_label = architecture
                            if compiler == "nvhpc":
                                baseline_kernel[label_state_name] = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][state_kernel_name][0]
                                baseline_kernel[label_cur_name] = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][cur_kernel_name][0]
                        else: # skylake-avx512
                            architecture_label = architecture
                        if modname != "expsyn":
                            bar_data_gpu_panda["kernel"].append(label_state_name)
                            bar_data_gpu_panda["compiler"].append(compiler_name)
                            if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                                bar_data_gpu_panda["runtime"].append(0)
                            else:
                                bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                        bar_data_gpu_panda["kernel"].append(label_cur_name)
                        bar_data_gpu_panda["compiler"].append(compiler_name)
                        if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                            bar_data_gpu_panda["runtime"].append(0)
                        else:
                            bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
    for i, runtime in enumerate(bar_data_gpu_panda["runtime"]):
        kernel = bar_data_gpu_panda["kernel"][i]
        print("Scaling kernel {} arch {}".format(kernel, bar_data_gpu_panda["compiler"][i]))
        bar_data_gpu_panda["runtime"][i] = baseline_kernel[kernel]/runtime
    pd.options.display.float_format = "{:,.2f}".format
    print(bar_data_gpu_panda)
    df_kernels = pd.DataFrame(bar_data_gpu_panda, columns=["kernel", "compiler", "runtime"])
    print(df_kernels, type(df_kernels))
    sns.barplot(x='kernel', y='runtime', hue='compiler', data=df_kernels, ax=ax)
    ax.axhline(1., ls='--', color ="black")
    ax.xaxis.label.set_visible(False)
    ax.set_yscale('symlog', base=2, linthresh=0.015)
    ax.set_ylim(0.5, 2)
    ax.set_yticks([0.5, 1, 2])
    # ax.yaxis.label.set_visible(False)
    plt.ylabel('Speedup relative to {}'.format(baseline_name))
    # ax.get_legend().remove()
    plt.legend(loc="upper right")
    # if xaxis_label is not None:
    #     ax.get_xaxis().set_visible(False)
    if print_values:
        for i in ax.containers:
            ax.bar_label(i,)            
    #if xaxis_label is not None:
    #    fig.text(0.5, 0.04, xaxis_label, ha='center', va='center')
    #else:
    #    fig.text(0.5, 0.04, 'Target Microarchitecture-Instruction Set', ha='center', va='center')
    # fig.text(0.06, 0.5, 'Relative Performance ({} = 1)'.format(baseline_name), ha='center', va='center', rotation='vertical')
    # plt.xlabel("Kernel Name")
    # plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
    plt.savefig("{}/combined_benchmark_{}.pdf".format(output_dir, graph_suffix), format="pdf", bbox_inches="tight")
    plt.show()
    plt.close()

In [None]:
hh_expsyn_gpu_1024x128 = {}
hh_expsyn_gpu_1024x128 = load_pickle_result_file(["./reference_data/hh_gpu_20mil_1024x128.pickle", "./reference_data/expsyn_gpu_100mil_1024x128.pickle"], hh_expsyn_gpu_1024x128)
compilers_comparison_config = """
{
  "nvhpc": {
    "nvptx64": [
      "-O3 -gpu=nordc,fastmath"
    ]
  },
  "nmodl_jit": {
    "nvptx64": [
      "libdevice_nnancontractafn"
    ]
  }
}
"""
colors = ['#b2df8a','#969696']
sns.set_palette(sns.color_palette(colors))

generate_graph_pandas_combined_relative_gpu_log(hh_expsyn_gpu_1024x128, compilers_comparison_config, "hh_expsyn_gpu_relative_one_plot_log", "graphs_output_pandas", xaxis_label="NVPTX64 Architecture", print_values=False, plot_size=(4,3), baseline_name="nvhpc")

In [None]:
def generate_graph_pandas_combined_relative_log_hatches(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False, xaxis_label=None, plot_size=(12,6), fontsize=14, baseline_name="intel_svml", hatches=['/', '//', '+', '++', 'X', '-', '--', '|', '||', '.', 'o'], hatches_colors=None, colors=['#6baed6', '#0570b0', '#66c2a4','#238b45','#b2df8a','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252']):
    # pick colors according to the following order which matches the order of 
    # intel #6baed6
    # intel svml #0570b0
    # gcc #66c2a4
    # gcc_svml #238b45
    # nvhpc #b2df8a
    # clang #fdd49e
    # clang_svml #fc8d59
    # mod2ir #9ebcda
    # mod2ir_svml #8c96c6
    # mor2it_jit_svml #969696
    # mod2ir_jit_sleef #525252
    sns.set_palette(sns.color_palette(colors))
    os.makedirs(output_dir, exist_ok=True)
    compiler_flags = json.loads(compilers_comparison_config)
    fig, axes = plt.subplots(1, 3, squeeze=False, figsize=plot_size)
    ax_index = 0
    for modname in results:
        # state
        bar_data_state_cpu_panda = {}
        bar_data_state_cpu_panda["architecture"] = []
        bar_data_state_cpu_panda["compiler"] = []
        bar_data_state_cpu_panda["runtime"] = []
        # current
        bar_data_cur_cpu_panda = {}
        bar_data_cur_cpu_panda["architecture"] = []
        bar_data_cur_cpu_panda["compiler"] = []
        bar_data_cur_cpu_panda["runtime"] = []
        baseline_cur = 0.0
        for architecture in results[modname]:
            for compiler in compiler_flags:
                if compiler in results[modname][architecture] and architecture in compiler_flags[compiler]:
                    for flags in compiler_flags[compiler][architecture]:
                        if compiler == "nmodl_jit":
                            state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                            cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        else:
                            state_kernel_name = "nrn_state_ext"
                            cur_kernel_name = "nrn_cur_ext"
                        if compiler == "clang" and "jit" in flags:
                            compiler_name = "mod2ir"
                        elif compiler == "nmodl_jit":
                            compiler_name = "mod2ir_jit"
                        else:
                            compiler_name = compiler
                        if "svml" in flags or "SVML" in flags:
                            compiler_name = compiler_name + "_svml"
                            if architecture != "nvptx64" and compiler == "intel":
                                baseline_state = results[modname][architecture]["intel"][_get_flags_string(flags)][state_kernel_name][0]
                                baseline_cur = results[modname][architecture]["intel"][_get_flags_string(flags)][cur_kernel_name][0]
                        elif "sleef" in flags or "SLEEF" in flags:
                            compiler_name = compiler_name + "_sleef"
                        if architecture == "default":
                            architecture_label = "auto-scalar"
                        elif architecture == "nehalem":
                            architecture_label = "nehalem-sse2"
                        elif architecture == "broadwell":
                            architecture_label = "broadwell-avx2"
                        elif architecture == "nvptx64":
                            architecture_label = architecture
                            if compiler == "nvhpc":
                                baseline_state = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][state_kernel_name][0]
                                baseline_cur = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][cur_kernel_name][0]
                        else: # skylake-avx512
                            architecture_label = architecture
                        if modname != "expsyn":
                            bar_data_state_cpu_panda["architecture"].append(architecture_label)
                            bar_data_state_cpu_panda["compiler"].append(compiler_name)
                            if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                                bar_data_state_cpu_panda["runtime"].append(0)
                            else:
                                bar_data_state_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                        bar_data_cur_cpu_panda["architecture"].append(architecture_label)
                        bar_data_cur_cpu_panda["compiler"].append(compiler_name)
                        if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                            bar_data_cur_cpu_panda["runtime"].append(0)
                        else:
                            bar_data_cur_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
        for i, runtime in enumerate(bar_data_state_cpu_panda["runtime"]):
            bar_data_state_cpu_panda["runtime"][i] = baseline_state/runtime
        for i, runtime in enumerate(bar_data_cur_cpu_panda["runtime"]):
            bar_data_cur_cpu_panda["runtime"][i] = baseline_cur/runtime
        pd.options.display.float_format = "{:,.2f}".format
        if modname != "expsyn":
            df_state = pd.DataFrame(bar_data_state_cpu_panda, columns=["architecture", "compiler", "runtime"])
            # print(df_state, type(df_state))
            sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_state, ax=axes[0,ax_index])
            axes[0,ax_index].set_yscale('symlog', base=2, linthresh=0.015)
            axes[0,ax_index].set_ylim(0.125,2)
            axes[0,ax_index].set_yticks([0.125, 0.25, 0.5, 1, 2])
            axes[0,ax_index].axhline(1., ls='--', color ="black")
            axes[0,ax_index].xaxis.label.set_visible(False)
            axes[0,ax_index].yaxis.label.set_visible(False)
            axes[0,ax_index].set_title("nrn_state_{}".format(modname), fontsize=fontsize)
            axes[0,ax_index].get_legend().remove()
            if xaxis_label is not None:
                axes[0,ax_index].get_xaxis().set_visible(False)
            if print_values:
                for i in axes[0,ax_index].containers:
                    axes[0,ax_index].bar_label(i,)
            import itertools
            num_locations = len(bar_data_state_cpu_panda["runtime"])
            assert num_locations == len(hatches), "Number of bars should be equal to length of hatches"
            hatches = itertools.cycle(hatches)
            for i, bar in enumerate(axes[0,ax_index].patches):
                # print("i: {} bar: {}".format(i, bar))
                # if i % num_locations == 0:
                hatch = next(hatches)
                bar.set_hatch(hatch)
            if hatches_colors is not None:
                hatches_colors = itertools.cycle(hatches_colors)
                for i, bar in enumerate(axes[0,ax_index].patches):
                    hatch_color = next(hatches_colors)
                    bar.set_edgecolor(hatch_color)
            ax_index += 1
        df_cur = pd.DataFrame(bar_data_cur_cpu_panda, columns=["architecture", "compiler", "runtime"])
        ax = sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_cur, ax=axes[0,ax_index])
        axes[0,ax_index].axhline(1., ls='--', color ="black")
        # print(df_cur, type(df_cur))
        axes[0,ax_index].set_yscale('symlog', base=2, linthresh=0.015)
        axes[0,ax_index].set_ylim(0.125,2)
        axes[0,ax_index].set_yticks([0.125, 0.25, 0.5, 1, 2])
        # axes[0,ax_index].set_yticklabels([0.5, 1, 2, 4, 8])
        #axes[0,ax_index].axhline(1., linewidth=2, color=(0, 0, 0, 0.9))
        axes[0,ax_index].xaxis.label.set_visible(False)
        axes[0,ax_index].yaxis.label.set_visible(False)
        axes[0,ax_index].set_title("nrn_cur_{}".format(modname), fontsize=fontsize)
        axes[0,ax_index].get_legend().remove()
        if xaxis_label is not None:
                axes[0,ax_index].get_xaxis().set_visible(False)
        if print_values:
          for i in axes[0,ax_index].containers:
            axes[0,ax_index].bar_label(i,)
        hatches = itertools.cycle(hatches)
        for i, bar in enumerate(axes[0,ax_index].patches):
            # print("i: {} bar: {}".format(i, bar))
            # if i % num_locations == 0:
            hatch = next(hatches)
            bar.set_hatch(hatch)
        if hatches_colors is not None:
            hatches_colors = itertools.cycle(hatches_colors)
            for i, bar in enumerate(axes[0,ax_index].patches):
                hatch_color = next(hatches_colors)
                bar.set_edgecolor(hatch_color)
        ax_index += 1
            
    #if xaxis_label is not None:
    #    fig.text(0.5, 0.04, xaxis_label, ha='center', va='center')
    #else:
    #    fig.text(0.5, 0.04, 'Target Microarchitecture-Instruction Set', ha='center', va='center')
    fig.text(0.06, 0.5, 'Speedup over {}'.format(baseline_name), ha='center', va='center', rotation='vertical', fontsize=fontsize)
    plt.legend(bbox_to_anchor=(1,1), loc="upper left", fontsize=fontsize)
    plt.savefig("{}/combined_benchmark_{}.pdf".format(output_dir, graph_suffix), format="pdf", bbox_inches="tight")
    plt.show()
    plt.close()

In [None]:
compilers_comparison_config = """
{
  "intel": {
    "skylake-avx512": [
      "-O2 -mavx512f -prec-div -fopenmp",
      "-O2 -mavx512f -prec-div -fimf-use-svml -fopenmp"
    ]
  },
  "gcc": {
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ]
  },
  "nvhpc": {
    "skylake-avx512": [
      "-fast -O3 -mp=autopar -tp=skylake -Msafeptr=all -Minfo -Mvect=simd:512,gather -mavx512vbmi -mavx512vbmi2 -mavx512vl"
    ]
  },
  "clang": {
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "nmodl_jit": {
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""
hh_expsyn_cpu_results = {}
hh_expsyn_cpu_results = load_pickle_result_file(["./reference_data/hh_expsyn_mavx512f.pickle", "./reference_data/hh_expsyn_nvhpc_cpu.pickle"], hh_expsyn_cpu_results)
json_object = json.dumps(hh_expsyn_cpu_results, indent = 4) 
generate_graph_pandas_combined_relative_log_hatches(hh_expsyn_cpu_results, compilers_comparison_config, "hh_expsyn_cpu_relative_log", "graphs_output_pandas", False, xaxis_label="skylake-avx512 Target Microarchitecture", plot_size=(13,3.5), fontsize=11, hatches=['/', '\\', '//', '\\\\', '--', '+', '++', '/|', '-\\', '', 'X'], hatches_colors=['#6baed6', '#0570b0', '#66c2a4','#238b45','#b2df8a','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252'], colors=['w', 'w', 'w','w','w','w','w','w','w','w','w'])
generate_graph_pandas_combined_relative_log_hatches(hh_expsyn_cpu_results, compilers_comparison_config, "hh_expsyn_cpu_relative_log_darker_hatchgroupped", "graphs_output_pandas", False, xaxis_label="skylake-avx512 Target Microarchitecture", plot_size=(13,3.5), fontsize=11, hatches=['/', '\\', '//', '\\\\', '--', '+', '++', '/|', '-\\', '', 'X'], hatches_colors=['#4292c6', '#2171b5', '#41ab5d','#238b45','#006d2c','#fd8d3c','#a63603','#6a51a3','#54278f','#737373','#252525'], colors=['w', 'w', 'w','w','w','w','w','w','w','w','w'])
generate_graph_pandas_combined_relative_log_hatches(hh_expsyn_cpu_results, compilers_comparison_config, "hh_expsyn_cpu_relative_log_filled", "graphs_output_pandas", False, xaxis_label="skylake-avx512 Target Microarchitecture", plot_size=(13,3.5), fontsize=11, hatches=['/', '\\', '//', '\\\\', '--', '+', '++', '/|', '-\\', '', 'X'], hatches_colors=['#252525', '#252525', '#252525','#252525','#252525','#252525','#252525','#252525','#252525','#252525','#252525'])
generate_graph_pandas_combined_relative_log_hatches(hh_expsyn_cpu_results, compilers_comparison_config, "hh_expsyn_cpu_relative_log_filled_light", "graphs_output_pandas", False, xaxis_label="skylake-avx512 Target Microarchitecture", plot_size=(13,3.5), fontsize=11, hatches=['/', '\\', '//', '\\\\', '--', '+', '++', '/|', '-\\', '', 'X'], colors=['#deebf7', '#9ecae1', '#c7e9c0','#a1d99b','#74c476','#fee6ce','#fdd0a2','#dadaeb','#bcbddc','#d9d9d9','#bdbdbd'])
generate_graph_pandas_combined_relative_log_hatches(hh_expsyn_cpu_results, compilers_comparison_config, "hh_expsyn_cpu_relative_log_filled_light_hatchesgroupped", "graphs_output_pandas", False, xaxis_label="skylake-avx512 Target Microarchitecture", plot_size=(13,3.5), fontsize=11, hatches=['/', '\\', '//', '\\\\', '--', '+', '++', '/|', '-\\', '', 'X'], hatches_colors=['#252525', '#252525', '#252525','#252525','#252525','#252525','#252525','#252525','#252525','#252525','#252525'], colors=['#deebf7', '#9ecae1', '#c7e9c0','#a1d99b','#74c476','#fee6ce','#fdd0a2','#dadaeb','#bcbddc','#d9d9d9','#bdbdbd'])

In [None]:
def generate_graph_pandas_combined_relative_gpu_log_hatches(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False, xaxis_label=None, plot_size=(12,6), fontsize=14, baseline_name="intel_svml", hatches=['X', '-']):
    os.makedirs(output_dir, exist_ok=True)
    compiler_flags = json.loads(compilers_comparison_config)
    fig, axes = plt.subplots(1, 1, squeeze=False, figsize=plot_size)
    ax = axes[0,0]
    bar_data_gpu_panda = {}
    bar_data_gpu_panda["kernel"] = []
    bar_data_gpu_panda["compiler"] = []
    bar_data_gpu_panda["runtime"] = []
    baseline_kernel = {}
    for modname in results:
        for architecture in results[modname]:
            for compiler in compiler_flags:
                if compiler in results[modname][architecture] and architecture in compiler_flags[compiler]:
                    for flags in compiler_flags[compiler][architecture]:
                        if compiler == "nmodl_jit":
                            state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                            cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        else:
                            state_kernel_name = "nrn_state_ext"
                            cur_kernel_name = "nrn_cur_ext"
                        label_state_name = "nrn_state_{}".format(modname.replace("-", "_"))
                        label_cur_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        if compiler == "clang" and "jit" in flags:
                            compiler_name = "mod2ir"
                        elif compiler == "nmodl_jit":
                            compiler_name = "mod2ir_jit"
                        else:
                            compiler_name = compiler
                        if architecture == "nvptx64":
                            architecture_label = architecture
                            if compiler == "nvhpc":
                                baseline_kernel[label_state_name] = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][state_kernel_name][0]
                                baseline_kernel[label_cur_name] = results[modname][architecture]["nvhpc"][_get_flags_string(flags)][cur_kernel_name][0]
                        else: # skylake-avx512
                            architecture_label = architecture
                        if modname != "expsyn":
                            bar_data_gpu_panda["kernel"].append(label_state_name)
                            bar_data_gpu_panda["compiler"].append(compiler_name)
                            if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                                bar_data_gpu_panda["runtime"].append(0)
                            else:
                                bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][0])
                        bar_data_gpu_panda["kernel"].append(label_cur_name)
                        bar_data_gpu_panda["compiler"].append(compiler_name)
                        if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                            bar_data_gpu_panda["runtime"].append(0)
                        else:
                            bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][0])
    for i, runtime in enumerate(bar_data_gpu_panda["runtime"]):
        kernel = bar_data_gpu_panda["kernel"][i]
        print("Scaling kernel {} arch {}".format(kernel, bar_data_gpu_panda["compiler"][i]))
        bar_data_gpu_panda["runtime"][i] = baseline_kernel[kernel]/runtime
    pd.options.display.float_format = "{:,.2f}".format
    print(bar_data_gpu_panda)
    df_kernels = pd.DataFrame(bar_data_gpu_panda, columns=["kernel", "compiler", "runtime"])
    print(df_kernels, type(df_kernels))
    sns.barplot(x='kernel', y='runtime', hue='compiler', data=df_kernels, ax=ax)
    ax.axhline(1., ls='--', color ="black")
    ax.xaxis.label.set_visible(False)
    ax.xaxis.set_tick_params(labelsize=fontsize)
    ax.set_yscale('symlog', base=2, linthresh=0.015)
    ax.set_ylim(0.5, 2)
    ax.set_yticks([0.5, 1, 2])
    import itertools
    # hatches = itertools.cycle(hatches)
    # hatch = next(hatches)
    hatches_colors = ['#b2df8a','#969696']
    for i, bar in enumerate(ax.patches):
        print("i: {} bar:{}".format(i, bar))
        hatch_index = 0 if i < len(set(bar_data_gpu_panda["kernel"])) else 1
        hatch = hatches[hatch_index]
        bar.set_hatch(hatch)
        bar.set_edgecolor(hatches_colors[hatch_index])
    # ax.yaxis.label.set_visible(False)
    plt.ylabel('Speedup over {}'.format(baseline_name), fontsize=fontsize)
    # ax.get_legend().remove()
    plt.legend(loc="upper right", fontsize=fontsize)
    # if xaxis_label is not None:
    #     ax.get_xaxis().set_visible(False)
    if print_values:
        for i in ax.containers:
            ax.bar_label(i,)            
    #if xaxis_label is not None:
    #    fig.text(0.5, 0.04, xaxis_label, ha='center', va='center')
    #else:
    #    fig.text(0.5, 0.04, 'Target Microarchitecture-Instruction Set', ha='center', va='center')
    # fig.text(0.06, 0.5, 'Relative Performance ({} = 1)'.format(baseline_name), ha='center', va='center', rotation='vertical')
    # plt.xlabel("Kernel Name")
    # plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
    plt.savefig("{}/combined_benchmark_{}.pdf".format(output_dir, graph_suffix), format="pdf", bbox_inches="tight")
    plt.show()
    plt.close()

In [None]:
hh_expsyn_gpu_1024x128 = {}
hh_expsyn_gpu_1024x128 = load_pickle_result_file(["./reference_data/hh_gpu_20mil_1024x128.pickle", "./reference_data/expsyn_gpu_100mil_1024x128.pickle"], hh_expsyn_gpu_1024x128)
compilers_comparison_config = """
{
  "nvhpc": {
    "nvptx64": [
      "-O3 -gpu=nordc,fastmath"
    ]
  },
  "nmodl_jit": {
    "nvptx64": [
      "libdevice_nnancontractafn"
    ]
  }
}
"""
colors = ['w','w']
sns.set_palette(sns.color_palette(colors))

generate_graph_pandas_combined_relative_gpu_log_hatches(hh_expsyn_gpu_1024x128, compilers_comparison_config, "hh_expsyn_gpu_relative_one_plot_log", "graphs_output_pandas", xaxis_label="NVPTX64 Architecture", print_values=False, plot_size=(7,4.5), baseline_name="nvhpc")

In [None]:
def generate_graph_pandas_cpu_combined_variance(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False, xaxis_label=None, plot_size=(12,6), show_xlabels=False):
    os.makedirs(output_dir, exist_ok=True)
    compiler_flags = json.loads(compilers_comparison_config)
    fig, axes = plt.subplots(1, 3, squeeze=False, figsize=plot_size)
    ax_index = 0
    for modname in results:
        # state
        bar_data_state_cpu_panda = {}
        bar_data_state_cpu_panda["architecture"] = []
        bar_data_state_cpu_panda["compiler"] = []
        bar_data_state_cpu_panda["runtime"] = []
        # current
        bar_data_cur_cpu_panda = {}
        bar_data_cur_cpu_panda["architecture"] = []
        bar_data_cur_cpu_panda["compiler"] = []
        bar_data_cur_cpu_panda["runtime"] = []
        for architecture in results[modname]:
            for compiler in compiler_flags:
                if compiler in results[modname][architecture] and architecture in compiler_flags[compiler]:
                    for flags in compiler_flags[compiler][architecture]:
                        if compiler == "nmodl_jit":
                            state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                            cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                        else:
                            state_kernel_name = "nrn_state_ext"
                            cur_kernel_name = "nrn_cur_ext"
                        if compiler == "clang" and "jit" in flags:
                            compiler_name = "mod2ir"
                        elif compiler == "nmodl_jit":
                            compiler_name = "mod2ir_jit"
                        else:
                            compiler_name = compiler
                        if "svml" in flags or "SVML" in flags:
                            compiler_name = compiler_name + "_svml"
                        elif "sleef" in flags or "SLEEF" in flags:
                            compiler_name = compiler_name + "_sleef"
                        if architecture == "default":
                            architecture_label = "auto-scalar"
                        elif architecture == "nehalem":
                            architecture_label = "nehalem-sse2"
                        elif architecture == "broadwell":
                            architecture_label = "broadwell-avx2"
                        else: # skylake-avx512
                            architecture_label = architecture
                        if modname != "expsyn":
                            bar_data_state_cpu_panda["architecture"].append(architecture_label)
                            bar_data_state_cpu_panda["compiler"].append(compiler_name)
                            if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                                bar_data_state_cpu_panda["runtime"].append(0)
                            else:
                                bar_data_state_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][1]*results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][1])
                        bar_data_cur_cpu_panda["architecture"].append(architecture_label)
                        bar_data_cur_cpu_panda["compiler"].append(compiler_name)
                        if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                            bar_data_cur_cpu_panda["runtime"].append(0)
                        else:
                            bar_data_cur_cpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][1]*results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][1])
        if modname != "expsyn":
            df_state = pd.DataFrame(bar_data_state_cpu_panda, columns=["architecture", "compiler", "runtime"])
            sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_state, ax=axes[0,ax_index])
            axes[0,ax_index].xaxis.label.set_visible(False)
            axes[0,ax_index].yaxis.label.set_visible(False)
            axes[0,ax_index].set_title("nrn_state_{}".format(modname))
            axes[0,ax_index].get_legend().remove()
            if not show_xlabels:
                axes[0,ax_index].get_xaxis().set_visible(False)
            if print_values:
                for i in axes[0,ax_index].containers:
                    axes[0,ax_index].bar_label(i,)
            ax_index += 1
        df_cur = pd.DataFrame(bar_data_cur_cpu_panda, columns=["architecture", "compiler", "runtime"])
        sns.barplot(x='architecture', y='runtime', hue='compiler', data=df_cur, ax=axes[0,ax_index])
        axes[0,ax_index].xaxis.label.set_visible(False)
        axes[0,ax_index].yaxis.label.set_visible(False)
        axes[0,ax_index].set_title("nrn_cur_{}".format(modname))
        axes[0,ax_index].get_legend().remove()
        if not show_xlabels:
            axes[0,ax_index].get_xaxis().set_visible(False)
        if print_values:
          for i in axes[0,ax_index].containers:
            axes[0,ax_index].bar_label(i,)
        ax_index += 1
    if xaxis_label is not None:
        fig.text(0.5, 0.04, xaxis_label, ha='center', va='center')
    # else:
    #     fig.text(0.5, 0.04, 'Target Microarchitecture-Instruction Set', ha='center', va='center')
    fig.text(0.06, 0.5, 'Runtime (s)', ha='center', va='center', rotation='vertical')
    plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
    plt.savefig("{}/combined_benchmark_{}.pdf".format(output_dir, graph_suffix), format="pdf", bbox_inches="tight")
    plt.show()
    plt.close()

In [None]:
hh_expsyn_avx512f_results = {}
hh_expsyn_avx512f_results = load_pickle_result_file(["./reference_data/hh_expsyn_mavx512f.pickle", "./reference_data/hh_expsyn_nvhpc_cpu.pickle"], hh_expsyn_avx512f_results)
colors = ['#6baed6', '#0570b0', '#66c2a4','#238b45','#b2df8a','#fdd49e','#fc8d59','#9ebcda','#8c96c6','#969696','#525252']
# pick colors according to the following order which matches the order of 
# intel #6baed6
# intel svml #0570b0
# gcc #66c2a4
# gcc_svml #238b45
# nvhpc #b2df8a
# clang #fdd49e
# clang_svml #fc8d59
# mod2ir #9ebcda
# mod2ir_svml #8c96c6
# mor2it_jit_svml #969696
# mod2ir_jit_sleef #525252
sns.set_palette(sns.color_palette(colors))
compilers_comparison_config = """
{
  "intel": {
    "skylake-avx512": [
      "-O2 -mavx512f -prec-div -fopenmp",
      "-O2 -mavx512f -prec-div -fimf-use-svml -fopenmp"
    ]
  },
  "gcc": {
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -mavx512f -ffast-math -ftree-vectorize -mveclibabi=svml -fopenmp"
    ]
  },
  "nvhpc": {
    "skylake-avx512": [
      "-fast -O3 -mp=autopar -tp=skylake -Msafeptr=all -Minfo -Mvect=simd:512,gather -mavx512vbmi -mavx512vbmi2 -mavx512vl"
    ]
  },
  "clang": {
    "skylake-avx512": [
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp -fveclib=SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SVML",
      "-O3 -march=skylake-avx512 -mtune=skylake -ffast-math -fopenmp jit SLEEF"
    ]
  },
  "nmodl_jit": {
    "skylake-avx512": [
      "SVML_nnancontractafn",
      "SLEEF_nnancontractafn"
    ]
  }
}
"""
generate_graph_pandas_cpu_combined_variance(hh_expsyn_avx512f_results, compilers_comparison_config, "variance", "graphs_output_pandas", True)

In [None]:
def generate_graph_pandas_gpu_combined_variance(results, compilers_comparison_config, graph_suffix, output_dir, print_values=False):
    compiler_flags = json.loads(compilers_comparison_config)
    fig = plt.figure(figsize=(12, 6))
    for i, modname in enumerate(results):
        df = None
        bar_data_gpu_panda = {}
        bar_data_gpu_panda["kernel"] = []
        bar_data_gpu_panda["compiler"] = []
        bar_data_gpu_panda["runtime"] = []
        architecture = "nvptx64"
        for compiler in results[modname][architecture]:
            if compiler in compiler_flags and architecture in compiler_flags[compiler]:
                for flags in compiler_flags[compiler][architecture]:
                    dict_label = "{}_{}_{}".format(architecture, compiler, _get_flags_string(flags))
                    if compiler == "nmodl_jit":
                        state_kernel_name = "nrn_state_{}".format(modname.replace("-", "_"))
                        cur_kernel_name = "nrn_cur_{}".format(modname.replace("-", "_"))
                    else:
                        state_kernel_name = "nrn_state_ext"
                        cur_kernel_name = "nrn_cur_ext"
                    if compiler == "clang" and "jit" in flags:
                        compiler_name = "mod2ir"
                    elif compiler == "nmodl_jit":
                        compiler_name = "mod2ir_jit"
                    else:
                        compiler_name = compiler
                    bar_data_gpu_panda["kernel"].append("nrn_state")
                    bar_data_gpu_panda["compiler"].append(compiler_name)
                    if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                        bar_data_gpu_panda["runtime"].append(0)
                    else:
                        bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][1]*results[modname][architecture][compiler][_get_flags_string(flags)][state_kernel_name][1])
                    bar_data_gpu_panda["kernel"].append("nrn_current")
                    bar_data_gpu_panda["compiler"].append(compiler_name)
                    if _get_flags_string(flags) not in results[modname][architecture][compiler]:
                        bar_data_gpu_panda["runtime"].append(0)
                    else:
                        bar_data_gpu_panda["runtime"].append(results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][1]*results[modname][architecture][compiler][_get_flags_string(flags)][cur_kernel_name][1])
        df_state = pd.DataFrame(bar_data_gpu_panda, columns=["kernel", "compiler", "runtime"])
        ax = fig.add_subplot(1, 2, i+1)
        ax = sns.barplot(x='kernel', y='runtime', hue='compiler', data=df_state, ax=ax)
        if print_values:
            for i in ax.containers:
                ax.bar_label(i,)
        plt.xlabel("Kernel Name")
        if i == 0:
            plt.ylabel("Runtime (s)")
        else:
            ax.set(ylabel=None)
        plt.title("OpenACC and MOD2IR comparison for {}".format(modname))
        # plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
        # plt.savefig("{}/{}_benchmark_{}.pdf".format(output_dir, modname, graph_suffix), format="pdf", bbox_inches="tight")
    plt.savefig("{}/gpu_combined_benchmark_{}.pdf".format(output_dir, graph_suffix), format="pdf", bbox_inches="tight")
    plt.show()
    plt.close()

In [None]:
hh_expsyn_gpu_1024x128 = {}
hh_expsyn_gpu_1024x128 = load_pickle_result_file(["./reference_data/hh_gpu_20mil_1024x128.pickle", "./reference_data/expsyn_gpu_100mil_1024x128.pickle"], hh_expsyn_gpu_1024x128)
compilers_comparison_config = """
{
  "nvhpc": {
    "nvptx64": [
      "-O3 -gpu=nordc,fastmath"
    ]
  },
  "nmodl_jit": {
    "nvptx64": [
      "libdevice_nnancontractafn"
    ]
  }
}
"""
colors = ['#b2df8a','#bdbdbd']
sns.set_palette(sns.color_palette(colors))

generate_graph_pandas_gpu_combined_variance(hh_expsyn_gpu_1024x128, compilers_comparison_config, "hh_expsyn_gpu", "graphs_output_pandas", print_values=False)