In [None]:
%matplotlib inline
%load_ext autoreload

In [None]:
%autoreload 2

In [None]:
import os
import time
import datetime
import pandas
import numpy
import scipy.stats
import tokio
import abcutils
import seaborn
import matplotlib
matplotlib.rcParams.update({'font.size': 16})

## Load and Synthesize Data from CSV

In [None]:
filtered_df = abcutils.sc18paper.load_dataset()

In [None]:
TEST_PLATFORMS = [
#   'scratch1@edison',
    'cscratch@cori-knl',
    'mira-fs1@mira'
]
BENCHMARK_IDS = [
    'ior_shared_write',
    'ior_fpp_write',
    'hacc_io_write_fpp_write',
    'vpicio_uni_shared_write',
    'ior_shared_read',
    'ior_fpp_read',
    'hacc_io_read_fpp_read',
    'dbscan_read_shared_read'
]

plot_metric = 'darshan_normalized_perf_by_max'

group_by = ['_test_platform', '_benchmark_id']

print "plot_metric =", abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric)
print "date_start =", abcutils.sc18paper.DATE_START.isoformat()
print "date_end =", abcutils.sc18paper.DATE_END.isoformat()

## Seaborn Heatmap

In [None]:
job_times = {}
heatmap_dat = {}
platform_groups = filtered_df.groupby('_test_platform')
for platform_id in TEST_PLATFORMS:
    platform_df = platform_groups.get_group((platform_id))
    job_ids = sorted(list(set(platform_df['_jobid'].values)))
    job_times[platform_id] = [platform_df[(platform_df['_jobid'] == jobid)]['_datetime_start'].values[0] for jobid in job_ids]
    heatmap_dat[platform_id] = pandas.DataFrame(0.0, index=job_ids, columns=BENCHMARK_IDS)
    benchmark_groups = platform_df.groupby(by='_benchmark_id')
    for benchmark_id in BENCHMARK_IDS:
        benchmark_df = benchmark_groups.get_group((benchmark_id)).set_index('_jobid')
        heatmap_dat[platform_id][benchmark_id] = benchmark_df[plot_metric]

In [None]:
fig, axes = matplotlib.pyplot.subplots(nrows=len(heatmap_dat), ncols=1)
fig.set_size_inches(20, 5*len(heatmap_dat))
#matplotlib.pyplot.locator_params(nticks=4)

i = 0
for platform_id in heatmap_dat:
    tmp_df = heatmap_dat[platform_id].transpose()
    #tmp_df['_datetime_start'] = job_times[platform_id]
    #tmp_df = tmp_df.set_index('_datetime_start')
    #tmp_df = tmp_df.transpose()
    
    seaborn.heatmap(tmp_df, ax=axes[i])
    axes[i].set_title(platform_id)
    #axes[i].xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(10))
    axes[i].xaxis.label.set_visible(False)
    axes[i].set_xticks([])
    #axes[i].xaxis.set_major_locator(matplotlib.dates.DayLocator(interval=1))
    #axes[i].xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%Y-%m-%d'))
    axes[i].get_figure().subplots_adjust(hspace=.2)
    i += 1

    #fig.autofmt_xdate()
pass

## Matplotlib Heatmap

In [None]:
def value2label(test_platform, benchmark_id):
    return "%s, %s" % (test_platform, benchmark_id)

In [None]:
columns = []
for benchmark_id in BENCHMARK_IDS:
    for test_platform in TEST_PLATFORMS:
        columns.append(value2label(test_platform, benchmark_id))

ts = tokio.timeseries.TimeSeries(dataset_name='blah',
                                 start=abcutils.sc18paper.DATE_START,
                                 end=abcutils.sc18paper.DATE_END,
                                 timestep=86400,
                                 num_columns=len(columns),
                                 column_names=sorted(columns),
                                 sort_hex=False)

In [None]:
example_df = filtered_df.copy()

test_filter = example_df['_benchmark_id'].isin(BENCHMARK_IDS)
test_filter &= example_df['_test_platform'].isin(TEST_PLATFORMS)
rename_filter = {
    '_datetime_start': 'datetime_start',
    '_benchmark_id': 'benchmark_id',
    '_test_platform': 'test_platform',
}
for row in example_df[test_filter].rename(rename_filter, axis='columns', inplace=False).itertuples():
    ts.insert_element(row.datetime_start.to_pydatetime(),
                      value2label(row.test_platform, row.benchmark_id),
                      row._asdict()[plot_metric])

In [None]:
def draw_perf_summary(x, y, z, ax=None):
    if ax is None:
        fig = matplotlib.pyplot.figure()
        fig.set_size_inches(16, (2.5 * len(TEST_PLATFORMS)))
        fig.subplots_adjust(hspace=0.0, wspace=0.01)

        gridspec = matplotlib.gridspec.GridSpec(nrows=1, ncols=2, width_ratios=[16, 1])

        ax = fig.add_subplot(gridspec[0])
    else:
        gridspec = None
        fig = ax.get_figure()

    cmap = matplotlib.cm.gist_heat
    cmap.set_bad(color='#666666', alpha=0.85)
    plotface = ax.pcolormesh(x,
                             numpy.arange(z.shape[1]+1),
                             numpy.ma.masked_where(z.T == 0.0, z.T),
                             cmap=cmap,
                             linewidth=0,
                             rasterized=True)

    # Set x ticks
    column_labels = []
    for label in y:
        for key, val in abcutils.CONFIG['benchmark_labels_short'].iteritems():
            label = label.replace(key, val)
        label = label.replace('scratch1@edison', 'Edison')
        label = label.replace('cscratch@cori-knl', 'Cori')
        label = label.replace('mira-fs1@mira', 'Mira')
        column_labels.append(label)
    ax.set_yticks(numpy.arange(len(column_labels)) + 0.5)
    ax.set_yticklabels(column_labels, ha='right')

    abcutils.plot.fix_xticks_timeseries(ax)

    if gridspec is not None:
        # Set colorbar
        ax = fig.add_subplot(gridspec[1])
        cbar = matplotlib.pyplot.colorbar(plotface, cax=ax)
        ax.set_ylabel(abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric))

    return fig.axes[0]

In [None]:
x = ts.timestamps
y = ts.columns
z = ts.dataset

ax = draw_perf_summary(x, y, z)

ax.get_figure().savefig('figs/summary_heatmap.pdf', bbox_inches='tight')

## Highlight regions of interest

In [None]:
x = ts.timestamps[212:232]
y = ts.columns[8:16]
z = ts.dataset[210:230, 8:16]
ax = draw_perf_summary(x, y, z)
fig = ax.get_figure()
fig.set_size_inches(4, (2.5 / 8 * len(y)))
abcutils.plot.fix_xticks_timeseries(ax, format="%b %d", criteria=(lambda x: x.weekday() == 6))

fig.axes[1].set_visible(False)

In [None]:
x = ts.timestamps[100:120]
y = ts.columns[0:8]
z = ts.dataset[100:120, 0:8]
ax = draw_perf_summary(x, y, z)
fig = ax.get_figure()
fig.set_size_inches(4, (2.5 / 8 * len(y)))
abcutils.plot.fix_xticks_timeseries(ax, format="%b %d", criteria=(lambda x: x.weekday() == 6))
fig.axes[1].set_visible(False)

In [None]:
fig, axes = matplotlib.pyplot.subplots(nrows=1, ncols=2, figsize=(8, 3))

ax = axes[0]
x = ts.timestamps[212:232]
y = ts.columns[8:16]
z = ts.dataset[210:230, 8:16]
draw_perf_summary(x, y, z, ax=ax)

ax = axes[1]
x = ts.timestamps[100:120]
y = ts.columns[0:8]
z = ts.dataset[100:120, 0:8]
draw_perf_summary(x, y, z, ax=ax)

alphabet = 'abcdefg'
for index, ax in enumerate(axes):
    yticklabels = []
    for label in ax.get_yticklabels():
        machine, benchmark = label.get_text().split(', ', 1)
        yticklabels.append(benchmark)
    ax.set_yticklabels(yticklabels)
    ax.set_title("(%s) %s" % (alphabet[index], machine), fontsize=16)
    abcutils.plot.fix_xticks_timeseries(ax, format="%b %d", criteria=(lambda x: x.weekday() == 6))

axes[1].set_yticklabels([])
fig.subplots_adjust(hspace=0.0, wspace=0.05)
ax.get_figure().savefig('figs/regions-heatmap.pdf', bbox_inches='tight')