In [None]:
%matplotlib inline
%load_ext autoreload

In [None]:
%autoreload 2

In [None]:
import os
import time
import datetime
import warnings
import matplotlib
matplotlib.rcParams.update({'font.size': 16})
import pandas
import numpy
import scipy.stats
import abcutils

numpy.random.seed(int(time.mktime(datetime.datetime.now().timetuple())))

## Load and Synthesize Data from CSV

This process loads each summary CSV file, creates a few derived metrics, and then merges each system's CSV into a single global dataset that can be sliced and diced by system, benchmark, or any other way.  We are now caching the processed CSV in HDF5 format to speed up initial data ingest at the beginning of each analysis.  Delete the `CACHE_FILE` to re-generate this cache (e.g., when the contents of the CSV are updated).

In [None]:
filtered_df = abcutils.sc18paper.load_dataset()

## Demonstrate a Single Test Platform

Look at one combination of (compute system, file system, benchmark) to show what this UMAMI analysis can do.

### Define Input Parameters

In [None]:
# TEST_PLATFORM = 'scratch2@edison'
# TEST_PLATFORM = 'cscratch@cori-knl'
# TEST_PLATFORM = 'cscratch@cori-haswell'
TEST_PLATFORM = 'mira-fs1@mira'

# BENCHMARK_ID = 'ior_fpp_write'
BENCHMARK_ID = 'dbscan_read_shared_read'
# BENCHMARK_ID = 'vpicio_uni_shared_write'
# BENCHMARK_ID = 'ior_shared_write'
# BENCHMARK_ID = 'hacc_io_read_fpp_read'

plot_metric = 'darshan_normalized_perf_by_max'
delta = datetime.timedelta(days=1).total_seconds()

group_by = ['_test_platform', '_benchmark_id']

filtered_df['random'] = numpy.random.random((len(filtered_df), ))

example_df = filtered_df.groupby(by=group_by).get_group((TEST_PLATFORM, BENCHMARK_ID)).copy()

print "test_platform =", TEST_PLATFORM
print "benchmark_id =", abcutils.CONFIG['benchmark_labels'].get(BENCHMARK_ID, BENCHMARK_ID)
print "plot_metric =", abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric)
print "date_start =", abcutils.sc18paper.DATE_START.isoformat()
print "date_end =", abcutils.sc18paper.DATE_END.isoformat()

In [None]:
# Width of simple moving average (SMA) short/long windows
short_window = abcutils.features.SHORT_WINDOW
long_window = abcutils.features.LONG_WINDOW
short_window = pandas.Timedelta(days=14)
long_window = pandas.Timedelta(days=49)

min_measures = abcutils.features.MIN_REGION

print "Short window will average over %s measurements at a time" % short_window
print "Long window will average over %s measurements at a time" % long_window
print "Ignoring regions with fewer than %d measurements" % min_measures

In [None]:
# Metrics to include in UMAMI renderings and analysis.  Anything that
# _might_ affect performance should be included here.
umami_rows = {
    'darshan_normalized_perf_by_max': "Application Performance",
    'coverage_factor_bw': "Bandwidth CF",
#   'coverage_factor_nodehrs',
    'coverage_factor_opens': "open(2) CF",
    'coverage_factor_stats': "stat(2) CF",
    'coverage_factor_ops': "IOPS CF",
    'fs_ave_mds_cpu': "Metadata Server Load",
#   'fs_tot_metadata_ops',
    'fs_ave_oss_cpu': "Data Server Load",
#   'fs_tot_open_ops',
    'fshealth_ost_most_full_pct': "File System Fullness",
    'fshealth_ost_overloaded_oss_count': "LUN Failover Count",
#   'jobsdb_concurrent_nodes',
    'topology_job_max_radius': "Max Job Radius",
    'random': "Random Variable",
}

## Calculate Simple Moving Averages (SMAs)

Compare a short-window SMA and a long-window SMA and use the places where they cross over to divide the entire year into _regions_ of interesting benchmark behavior.

For each region defined above, find the _minimum_ performance observed and denote that measurement (and its associated job) as a _locus_.  We then collate all _loci_ into a set of poorly performing benchmarks that are worth contextualizing with UMAMI.

We also plot the raw performance data (light blue bars), the short SMA (orange line), the long SMA (green line), and all loci (red bars) to visually verify that the loci we've identified are indeed poorly performing jobs.

In [None]:
intercepts = abcutils.features.sma_intercepts(example_df, plot_metric, short_window=short_window, long_window=long_window)
loci = abcutils.features.generate_loci_sma(example_df,
                                           plot_metric,
                                           mins=True,
                                           maxes=False,
                                           short_window=short_window,
                                           long_window=long_window)

ax = abcutils.plot.locus_summary(example_df, plot_metric, loci, align="edge")
ax.get_figure().suptitle("%s on %s" % (abcutils.CONFIG['benchmark_labels'].get(BENCHMARK_ID, BENCHMARK_ID), TEST_PLATFORM))

# Add boundaries
ymin, ymax = ax.set_ylim(0, 1)
for x in intercepts['_datetime_start']:
    ax.plot([abcutils.core.pd2epoch(x), abcutils.core.pd2epoch(x)], [ymin, ymax], color='black', linestyle='--', linewidth=1)

In [None]:
intercepts

In [None]:
print len(loci)
loci

## Generate UMAMI Diagrams Around Loci

Generate UMAMI diagrams that _end_ at each locus and have `long_window` days' of benchmark data preceding them.  Don't bother creating UMAMI diagrams for benchmarks with fewer than `short_window` benchmark data in the preceding `long_window` days.

Note that this process mixes up the semantic meaning of `long_window`.  When defining loci, `long_window` refers to a number of benchmark measurements, not days.  Ideally, one benchmark runs each day so this semantic difference is trivial.  However in reality, there are days when no benchmarks are run meaning loci are defined using a series of `long_window` benchmark measurements that often span _more than_ `long_window` days.

Practically speaking, this does not change very much as long as the ratio of `long_window` in days to `long_window` in benchmark measurements is close to 1.0.

In [None]:
max_renders = 1

print "Rendering a maximum of %d UMAMI diagrams" % max_renders

rendered = 0
for locus in loci.itertuples():
    if rendered == max_renders:
        break
    region_idx0 = example_df.index.get_loc(locus.region_start)
    region_idxf = example_df.index.get_loc(locus.region_end)
    if locus.region_start == locus.Index and region_idx0 > 0:
        region_idx0 -= 1

    print example_df.iloc[region_idx0]['_datetime_start'], \
          example_df.index[region_idx0], \
          example_df.index[region_idxf]
    umami_region = example_df.iloc[region_idx0:region_idxf]
    if len(umami_region) >= min_measures:
        abcutils.plot.generate_umami(umami_region, umami_rows.keys(), highlight_index=umami_region.index.get_loc(locus.Index))
        rendered += 1
    else:
        print "Skipping locus at %s because it has only %d data points (%d required)" % (umami_region['_datetime_start'], len(umami_region), short_window)

In [None]:
loci = abcutils.features.generate_loci_sma(example_df,
                                           plot_metric,
                                           mins=True,
                                           maxes=False,
                                           short_window=short_window,
                                           long_window=long_window)

sma_intercepts = abcutils.features.sma_intercepts(example_df,
                                                  plot_metric,
                                                  short_window,
                                                  long_window)

ax = abcutils.plot.locus_summary(example_df,
                                 plot_metric,
                                 loci,
                                 sma_intercepts=sma_intercepts,
                                 align='edge',
                                 regioncolors=['#0000000A', '#FFFFFF00'])

# This corresponds to one of the UMAMIs identified above
#ax.set_xlim(
#    abcutils.core.pd2epoch(pandas.Timestamp(datetime.datetime(2017,  9, 21))),
#    abcutils.core.pd2epoch(pandas.Timestamp(datetime.datetime(2017, 10,  1)))
#)

# This corresponds to a pretty region to graph
ax.set_xlim(
    abcutils.core.pd2epoch(pandas.Timestamp(datetime.datetime(2017,  8, 10))),
    abcutils.core.pd2epoch(pandas.Timestamp(datetime.datetime(2017, 10, 3)))
)

# Add boundaries
ymin, ymax = ax.get_ylim()
for x in sma_intercepts['_datetime_start']:
    ax.plot([abcutils.core.pd2epoch(x), abcutils.core.pd2epoch(x)], [ymin, ymax], color='black', linestyle='--', linewidth=1)

ax.grid(False)
ax.set_ylim(0, 1)
ax.get_figure().set_size_inches(8,3)
ax.set_ylabel(ax.get_ylabel().replace("\n", " "))
abcutils.plot.fix_xticks_timeseries(ax, format="%b %d", ha="center", rotation=0, criteria=(lambda x: x.toordinal() % 10 == 0))

ax.get_lines()[0].set_label("$SMA_{%d}$" % short_window.days)
ax.get_lines()[1].set_label("$SMA_{%d}$" % long_window.days)
ax.legend(loc='lower right',
          bbox_to_anchor=(1.01, -0.04),
          ncol=2,
          handletextpad=0.1,
          columnspacing=0.5)

print "%s on %s" % (abcutils.CONFIG['benchmark_labels'].get(BENCHMARK_ID, BENCHMARK_ID), TEST_PLATFORM)

output_file = "figs/shortterm-%s-%s.pdf" % (TEST_PLATFORM.split('@', 1)[0].replace('-', ''), BENCHMARK_ID.split('_', 1)[0])
print "Saving to", output_file
ax.get_figure().savefig(output_file, bbox_inches='tight')