In [None]:
%config InlineBackend.figure_format = "svg"
import html
from collections import defaultdict
from pathlib import Path
import json
from pprint import pformat
from tempfile import NamedTemporaryFile

from IPython.display import display, HTML
from ipywidgets import widgets
import pandas as pd
import matplotlib.pyplot as plt
import compress_pickle
from tabulate import tabulate

from machinestate.html_export import to_html as ms_to_html
from kerncraft.machinemodel import MachineModel
from kerncraft.kernel import KernelCode

from hpc_inspect.inspector import *

In [None]:
machine = MachineModel(path_to_yaml='machine.yml')
kernel = KernelCode(Path('kernel.c').read_text(), machine)

In [None]:
data = compress_pickle.load('dataframe.pickle.lzma')
#data.set_index(['pmodel', 'compiler', 'incore_model', 'cores', 'define'], inplace=True)
compilers = [cc for cc in data.compiler.unique() if cc is not None]
incore_models = [icm for icm in data.incore_model.unique() if icm is not None]
cache_predictors = [cp for cp in data.cache_predictor]

## Stencil Properties

In [None]:
# TODO present origin and property of kernel code
iterations_per_cacheline = int(data.get('iterations per cacheline').dropna().unique()[0])

## Kernel Source Code

In [None]:
print(data.iloc[0].job.workload.kernel.get_code())

### In-Core Analysis

In [None]:
# Get analysis alongside with assembly for each compiler and in-core model
cc_tab = widgets.Tab()
cc_tab_children = []
for i, cc in enumerate(compilers):
    cc_tab.set_title(i, cc)
    icm_tab = widgets.Tab(children=[])
    icm_tab_children = []
    cc_tab_children.append(icm_tab)
    for j, icm in enumerate(incore_models):
        icm_tab.set_title(j, icm)
        model_output = data.query("compiler == @cc and incore_model == @icm")['in-core model output'].unique()
        if model_output:
            model_output = model_output[0]
        else:
            model_output = ''
        icm_tab_children.append(
            widgets.HTML(value='<pre style="line-height: 1;">{}</pre>'.format(html.escape(model_output))))
    icm_tab.children = icm_tab_children
cc_tab.children = cc_tab_children

cc_tab

## Layer Conditions

In [None]:
lc_data = []
for col in data.columns:
    if not col.endswith(' LCs'):
        continue
    cache = col.split(' ')[0]
    for lc in data.get(col).dropna()[0]:
        lc['cache'] = cache
        lc_data.append(lc)
lc_df = pd.DataFrame(lc_data)
idx = pd.MultiIndex.from_frame(lc_df[['cache', 'tail']])
lc_df = pd.DataFrame(lc_data, columns=['condition', 'hits', 'misses', 'evicts'], index=idx)
lc_df
# TODO present size of N for condition to be fullfilled

## Single Core Grid Scaling

### ECM Prediction vs Performance

In [None]:
data_defines = data.sort_values(by=['define'])
fig, axs = plt.subplots(len(compilers), len(incore_models),
                        squeeze=False,
                        figsize=(4*len(incore_models),3*len(compilers)),
                        sharey=True,
                        sharex=True)
for i_cc, cc in enumerate(compilers):
    for i_icm, icm in enumerate(incore_models):
        ax = axs[i_cc, i_icm]
        ax.set_title("{} {}".format(cc, icm))
        ax.set_xscale("log")
        ax.xaxis.set_tick_params(labelbottom=True)
        #ax.yaxis.set_tick_params(labelleft=True)
        ax.grid()
        ax.set_axisbelow(True)  # places gridlines behind everything else
        
        # ECM
        ecm_data = data_defines.query('pmodel=="ECM" and cores==1 and incore_model == @icm and compiler == @cc')
        ax.stackplot(
            ecm_data['define'],
            ecm_data['T_RegL1'], ecm_data['T_L1L2'], ecm_data['T_L2L3'], ecm_data['T_L3MEM'],
            labels=('T_RegL1', 'T_L1L2', 'T_L2L3', 'T_L3MEM'))
        ax.plot(ecm_data['define'], ecm_data.T_comp, label='T_comp')
        
        # Benchmark
        bench_data = data_defines.query('pmodel=="Benchmark" and cores==1 and compiler == @cc')
        ax.plot(bench_data.define, bench_data['performance [cy/CL]'], '+', label='Measured')
        
        # RooflineIACA
        roof_data = data_defines.query('pmodel=="RooflineIACA" and incore_model == @icm and compiler == @cc')
        ax.plot(roof_data.define, roof_data['performance [cy/CL]'], label='RL pred.')
        
        ax.set_ylim(0)
        if i_icm == len(incore_models) - 1:
            T_to_P = lambda T: float(machine['clock']) / T * iterations_per_cacheline
            ax_right = ax.twinx()
            ymin, ymax = ax.get_ylim()
            ax_right.set_ylim(ax.get_ylim())
            ax_right.set_yticklabels(["{:.1f}".format(T_to_P(t)/1e9) for t in ax.get_yticks()])
            ax_right.set_ylabel("giga iterations per second")
        if i_icm == 0 and i_cc == 0:
            ax.legend(loc='upper left')
        if i_icm == 0:
            ax.set_ylabel("cycle per {} iterations".format(iterations_per_cacheline))
        if i_cc == len(compilers) - 1:
            ax.set_xlabel("dimension length")
# TODO move legend some where more useful
# TODO name columns and rows
# TODO find good metric (or name) for x-axis
# TODO cache-analysis distinction (tabs?)

### Data Transfers

In [None]:
for cp in cache_predictors:
    data.query('cache_predictor==@cp')

# TODO include predicted information into pandas
# TODO include measured informatin into pandas (inspector.py:574)

## Multi Core Thread Sacling

In [None]:
fig, axs = plt.subplots(len(compilers), len(incore_models), squeeze=False,
                               figsize=(4*len(incore_models),3*len(compilers)))
max_define = data.define.max()
for i_cc, cc in enumerate(compilers):
    for i_icm, icm in enumerate(incore_models):
        ax = axs[i_cc, i_icm]
        ax.set_title("{} {}".format(cc, icm))
        ax.xaxis.set_tick_params(labelbottom=True)
        ax.grid()
        
        bench_data = data.query(
            'pmodel=="Benchmark" and define==@max_define and compiler == @cc'
        ).sort_values(by=['cores'])
        ax.plot(bench_data.cores, bench_data['performance [It/s]']/1e9, label='Measured')
        ecm_data = data.query(
            'pmodel=="ECM" and define==@max_define and incore_model==@icm and compiler == @cc'
        ).sort_values(by=['cores'])
        ax.plot(ecm_data.cores, ecm_data['performance [It/s]']/1e9, label='ECM pred.')
        roof_data = data.query(
            'pmodel=="RooflineIACA" and define==@max_define and incore_model==@icm and compiler == @cc'
        ).sort_values(by=['cores'])
        ax.plot(roof_data.cores, roof_data['performance [It/s]']/1e9, label='RL pred.')
        
        ax.set_ylim(0)
        ax.set_axisbelow(True)  # places gridlines behind everything else
        if i_icm != 0:
            ax.yaxis.set_tick_params(labelleft=False)
        if i_icm == len(incore_models) - 1:
            P_to_T = lambda P: iterations_per_cacheline*float(machine['clock'])/P
            ax_right = ax.twinx()
            ymin, ymax = ax.get_ylim()
            ax_right.set_ylim(ax.get_ylim())
            ax_right.set_yticklabels(["{:.1f}".format(P_to_T(t*1e9)) for t in ax.get_yticks()])
            ax_right.set_ylabel("cycle per {} iterations".format(iterations_per_cacheline))
        ax.set_xticks(range(int(data.cores.min()), int(data.cores.max() + 1)))
        if i_icm == 0 and i_cc == 0:
            ax.legend()
        if i_icm == 0:
            ax.set_ylabel("giga iterations per second")
        if i_cc == len(compilers) - 1:
            ax.set_xlabel("cores")

## System Information

In [None]:
with open('machinestate.json') as f:
    machinestate = json.load(f)
display(HTML('''
    <iframe class="machinestatewrapper" width="100%" height="1000" data-content="'''+html.escape(ms_to_html(machinestate))+'''"></iframe>
    <script>
        var msw = $(".machinestatewrapper")[0];
        var html = msw.getAttribute('data-content');
        msw.contentDocument.write(html);
        msw.contentWindow.document.close();
    </script>'''))
# TODO replace by more elegent solution

# Notebook things to look at:
 * https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Styling.html
