In [None]:
%matplotlib inline

In [None]:
import os
import time
import datetime
import warnings
import matplotlib
matplotlib.rcParams.update({'font.size': 16})
import pandas
import numpy
import scipy.stats
import abcutils

## Load and Synthesize Data from CSV

This process loads each summary CSV file, creates a few derived metrics, and then merges each system's CSV into a single global dataset that can be sliced and diced by system, benchmark, or any other way.  We are now caching the processed CSV in HDF5 format to speed up initial data ingest at the beginning of each analysis.  Delete the `CACHE_FILE` to re-generate this cache (e.g., when the contents of the CSV are updated).

In [None]:
filtered_df = abcutils.sc18paper.load_dataset()

## Demonstrate a Single Test Platform

Look at one combination of (compute system, file system, benchmark) to show what this UMAMI analysis can do.

### Define Input Parameters

In [None]:
plot_metric = 'darshan_normalized_perf_by_max'
delta = datetime.timedelta(days=1).total_seconds()

print("plot_metric =", abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric))
print("date_start =", abcutils.sc18paper.DATE_START.isoformat())
print("date_end =", abcutils.sc18paper.DATE_END.isoformat())

## Region-defined Correlation

In [None]:
# Width of simple moving average (SMA) short/long windows
SHORT_WINDOW = pandas.Timedelta(days=14)
LONG_WINDOW = pandas.Timedelta(days=49)

print("Short window will average over %s at a time" % SHORT_WINDOW)
print("Long window will average over %s at a time" % LONG_WINDOW)

## Build diagrams illustrating how timeseries is partitioned

In [None]:
# test_platform = 'cscratch@cori-knl'
test_platform = 'mira-fs1@mira'

example_df = filtered_df.groupby(by=['_test_platform']).get_group((test_platform))
sma_intercepts = abcutils.features.sma_intercepts(example_df,
                                                  plot_metric,
                                                  short_window=SHORT_WINDOW,
                                                  long_window=LONG_WINDOW)

In [None]:
def sma_overlaps(**kwargs):
    ax = abcutils.plot.sma_overlaps(**kwargs)

    # fix all the crap that plot.sma_overlaps doesn't do itself
    ax.set_yticks(numpy.arange(0, 1.2, 0.25))
    ax.set_xlim(
        time.mktime(abcutils.sc18paper.DATE_START.timetuple()),
        time.mktime(abcutils.sc18paper.DATE_END.timetuple()))
    ax.set_xlim(
        time.mktime(datetime.datetime(2017, 8, 1).timetuple()),
        time.mktime(datetime.datetime(2018, 1, 1).timetuple()))
    abcutils.plot.fix_xticks_timeseries(ax)
    ax.set_ylabel(ax.get_ylabel().replace("\n", " "))
    ax.set_ylim(0.5, 1.0)
    ax.get_figure().set_size_inches((8, 4))

In [None]:
sma_overlaps(dataframe=example_df,
             plot_metric=plot_metric,
             short_window=SHORT_WINDOW,
             long_window=LONG_WINDOW,
             sma_intercepts=None)

In [None]:
sma_overlaps(dataframe=example_df,
             plot_metric=plot_metric,
             short_window=SHORT_WINDOW,
             long_window=LONG_WINDOW,
             sma_intercepts=sma_intercepts,
             linestyle='--',
             linewidth=1,
             regioncolors=['#00000000', '#00000000'])

In [None]:
sma_overlaps(dataframe=example_df,
             plot_metric=plot_metric,
             short_window=SHORT_WINDOW,
             long_window=LONG_WINDOW,
             sma_intercepts=sma_intercepts,
             linestyle='--',
             linewidth=1,
             regioncolors=['#0A00000A', '#FFFFFF00'])