In [None]:
%matplotlib inline

In [None]:
import os
import time
import datetime
import pandas
import numpy
import scipy.stats
import tokio
import abcutils
import seaborn
import matplotlib
matplotlib.rcParams.update({'font.size': 16})

## Load and Synthesize Data from CSV

In [None]:
filtered_df = abcutils.sc18paper.load_dataset()

In [None]:
TEST_PLATFORMS = [
#   'scratch1@edison',
    'cscratch@cori-knl',
    'mira-fs1@mira'
]
BENCHMARK_IDS = [
    'ior_shared_write',
    'ior_fpp_write',
    'hacc_io_write_fpp_write',
    'vpicio_uni_shared_write',
    'ior_shared_read',
    'ior_fpp_read',
    'hacc_io_read_fpp_read',
    'dbscan_read_shared_read'
]

plot_metric = 'darshan_normalized_perf_by_max'

group_by = ['_test_platform', '_benchmark_id']

print("plot_metric =", abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric))
print("date_start =", abcutils.sc18paper.DATE_START.isoformat())
print("date_end =", abcutils.sc18paper.DATE_END.isoformat())

## Matplotlib Heatmap

In [None]:
# Define regions of the heatmap to zoom in on, highlight in
# the overview, and draw accompaying graphics.  Slices are
# expressed in indices of the numpy.array used to draw the
# z dimension in pcolormesh.
zoom_areas = [
    {
        "x": slice(212, 232),
        "y": slice(8, 16),
    },
    {
        "x": slice(100, 120),
        "y": slice(0, 8),
    }
]

In [None]:
def value2label(test_platform, benchmark_id):
    return "%s, %s" % (test_platform, benchmark_id)

In [None]:
columns = []
for benchmark_id in BENCHMARK_IDS:
    for test_platform in TEST_PLATFORMS:
        columns.append(value2label(test_platform, benchmark_id))

ts = tokio.timeseries.TimeSeries(dataset_name='blah',
                                 start=abcutils.sc18paper.DATE_START,
                                 end=abcutils.sc18paper.DATE_END,
                                 timestep=86400,
                                 num_columns=len(columns),
                                 column_names=sorted(columns),
                                 sort_hex=False)

In [None]:
example_df = filtered_df.copy()

test_filter = example_df['_benchmark_id'].isin(BENCHMARK_IDS)
test_filter &= example_df['_test_platform'].isin(TEST_PLATFORMS)
rename_filter = {
    '_datetime_start': 'datetime_start',
    '_benchmark_id': 'benchmark_id',
    '_test_platform': 'test_platform',
}
for row in example_df[test_filter].rename(rename_filter, axis='columns', inplace=False).itertuples():
    ts.insert_element(row.datetime_start.to_pydatetime(),
                      value2label(row.test_platform, row.benchmark_id),
                      row._asdict()[plot_metric])

In [None]:
def draw_perf_summary(x, y, z, ax=None):
    if ax is None:
        fig = matplotlib.pyplot.figure()
        fig.set_size_inches(16, (2.5 * len(TEST_PLATFORMS)))
        fig.subplots_adjust(hspace=0.0, wspace=0.01)

        gridspec = matplotlib.gridspec.GridSpec(nrows=1, ncols=2, width_ratios=[16, 1])

        ax = fig.add_subplot(gridspec[0])
    else:
        gridspec = None
        fig = ax.get_figure()

    cmap = matplotlib.cm.gist_heat
    cmap.set_bad(color='#666666', alpha=0.85)
    plotface = ax.pcolormesh(x,
                             numpy.arange(z.shape[1]+1),
                             numpy.ma.masked_where(z.T == 0.0, z.T),
                             cmap=cmap,
                             linewidth=0,
                             rasterized=True)

    # Set x ticks
    column_labels = []
    for label in y:
        for key, val in abcutils.CONFIG['benchmark_labels_short'].items():
            label = label.replace(key, val)
        label = label.replace('scratch1@edison', 'Edison')
        label = label.replace('cscratch@cori-knl', 'Cori')
        label = label.replace('mira-fs1@mira', 'Mira')
        column_labels.append(label)
    ax.set_yticks(numpy.arange(len(column_labels)) + 0.5)
    ax.set_yticklabels(column_labels, ha='right')

    abcutils.plot.fix_xticks_timeseries(ax)

    if gridspec is not None:
        # Set colorbar
        ax = fig.add_subplot(gridspec[1])
        cbar = matplotlib.pyplot.colorbar(plotface, cax=ax)
        ax.set_ylabel(abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric))

    return fig.axes[0]

In [None]:
x = ts.timestamps
y = ts.columns
z = ts.dataset

ax = draw_perf_summary(x, y, z)

XPAD = 2 # expand the box in x so the thick lines don't cover data we want to highlight
for zoom in zoom_areas:
#   print xy, width, height, ax.get_xlim(), ax.get_ylim()
    xy = (x[zoom['x'].start - XPAD], zoom['y'].start)
    width = x[zoom['x'].stop] - x[zoom['x'].start + XPAD]
    height = zoom['y'].stop - zoom['y'].start 

    # Draw a thick black line and a thinner green line over it
    # to make the zoomed-in region highlights stand out amidst
    # the noisy heatmap
    ax.add_patch(matplotlib.patches.Rectangle(
                 xy=xy,
                 width=width,
                 height=height,
                 facecolor="#00000000",
                 edgecolor='#000000FF',
                 linewidth=8))

    ax.add_patch(matplotlib.patches.Rectangle(
                 xy=xy,
                 width=width,
                 height=height,
                 facecolor="#00000000",
                 edgecolor='C2',
                 linewidth=4))
    
# ax.get_figure()

In [None]:
ax.get_figure().savefig('figs/summary-heatmap.pdf', bbox_inches='tight')

## Draw zoomed-in regions of interest

In [None]:
for zoom in zoom_areas:
    x = ts.timestamps[zoom['x']]
    y = ts.columns[zoom['y']]
    z = ts.dataset[zoom['x'], zoom['y']]
    ax = draw_perf_summary(x, y, z)
    fig = ax.get_figure()
    fig.set_size_inches(4, (2.5 / 8 * len(y)))
    abcutils.plot.fix_xticks_timeseries(ax, format="%b %d", criteria=(lambda x: x.weekday() == 6))

    fig.axes[1].set_visible(False)
#   for spine in ax.get_children():
#       if isinstance(spine, matplotlib.spines.Spine):
#           spine.set_color('C2')
#           spine.set_linewidth(8)

### Create a single diagram for the paper

In [None]:
fig, axes = matplotlib.pyplot.subplots(nrows=1, ncols=len(zoom_areas), figsize=(4*len(zoom_areas), 3))

# Draw heatmaps
for index, zoom in enumerate(zoom_areas):
    ax = axes[index]
    x = ts.timestamps[zoom['x']]
    y = ts.columns[zoom['y']]
    z = ts.dataset[zoom['x'], zoom['y']]
    draw_perf_summary(x, y, z, ax=ax)

# Add window dressing
alphabet = 'abcdefg'
for index, ax in enumerate(axes):
    yticklabels = []
    for label in ax.get_yticklabels():
        machine, benchmark = label.get_text().split(', ', 1)
        yticklabels.append(benchmark)
    ax.set_yticklabels(yticklabels)
    ax.set_title("(%s) %s" % (alphabet[index], machine), fontsize=16)
    abcutils.plot.fix_xticks_timeseries(ax, format="%b %d", criteria=(lambda x: x.weekday() == 6))

axes[1].set_yticklabels([])
fig.subplots_adjust(hspace=0.0, wspace=0.05)
ax.get_figure().savefig('figs/regions-heatmap.pdf', bbox_inches='tight')