In [None]:
%matplotlib inline
%load_ext autoreload

In [None]:
%autoreload 2

In [None]:
import os
import time
import datetime
import warnings
import matplotlib
matplotlib.rcParams.update({'font.size': 16})
import pandas
import numpy
import scipy.stats
import abcutils

## Load and Synthesize Data from CSV

This process loads each summary CSV file, creates a few derived metrics, and then merges each system's CSV into a single global dataset that can be sliced and diced by system, benchmark, or any other way.  We are now caching the processed CSV in HDF5 format to speed up initial data ingest at the beginning of each analysis.  Delete the `CACHE_FILE` to re-generate this cache (e.g., when the contents of the CSV are updated).

In [None]:
CACHE_FILE = 'cache.hdf5'
if CACHE_FILE and os.path.isfile(CACHE_FILE):
    print "Loading from cache %s" % CACHE_FILE
    df = pandas.read_hdf(CACHE_FILE, 'summary')
else:
    df = pandas.concat([abcutils.load_and_synthesize_csv('summaries/edison-summaries_2017-02-14-2018-02-28.csv', system='edison'),
                        abcutils.load_and_synthesize_csv('summaries/cori-summaries_2017-02-14-2018-02-28.csv', system='cori'),
                        abcutils.load_and_synthesize_csv('summaries/alcf-tokio-results-2_14_17-2_15_18.csv', system='mira')],
                       axis='rows')
    if CACHE_FILE:
        df.to_hdf(CACHE_FILE, key='summary', mode='w', format='fixed', complevel=9, complib='zlib')
        print "Cached synthesized CSV to %s" % CACHE_FILE
    
# Reset the index to ensure that there are no degenerate indices in the final dataframe
df.index = pandas.Index(data=numpy.arange(len(df)), dtype='int64')

In [None]:
pd2epoch = lambda x: time.mktime(x.to_pydatetime().timetuple())

In [None]:
group_by = ['_test_platform', '_benchmark_id']

## Demonstrate a Single Test Platform

Look at one combination of (compute system, file system, benchmark) to show what this UMAMI analysis can do.

### Define Input Parameters

In [None]:
# test_platform = 'scratch2@edison'
test_platform = 'cscratch@cori-knl'
#test_platform = 'cscratch@cori-haswell'
# test_platform = 'mira-fs1@mira'

benchmark_id = 'ior_fpp_write'
# benchmark_id = 'dbscan_read_shared_read'
# benchmark_id = 'vpicio_uni_shared_write'
# benchmark_id = 'ior_shared_write'
# benchmark_id = 'hacc_io_read_fpp_read'

plot_metric = 'darshan_agg_perf_by_slowest_posix_gibs'
date_start = datetime.datetime(2017, 2, 14)
date_end = datetime.datetime(2018, 3, 1)

group_by = ['_test_platform', '_benchmark_id']
delta = datetime.timedelta(days=1).total_seconds()

filtered_df = df.groupby(by=group_by).get_group((test_platform, benchmark_id))
filtered_df = filtered_df[filtered_df['darshan_total_gibs_posix'] > 1.0]
filtered_df = filtered_df[filtered_df['_datetime_start'] < date_end]
filtered_df = filtered_df[filtered_df['_datetime_start'] >= date_start]
filtered_df = filtered_df[filtered_df['_benchmark_id'] != 'hacc_io_write_shared_write']

print "test_platform =", test_platform
print "benchmark_id =", abcutils.CONFIG['benchmark_labels'].get(benchmark_id, benchmark_id)
print "plot_metric =", abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric)
print "date_start =", date_start.isoformat()
print "date_end =", date_end.isoformat()

In [None]:
# Width of simple moving average (SMA) short/long windows
short_window = 7
long_window = 28

print "Short window will average over %d measurements at a time" % short_window
print "Long window will average over %d measurements at a time" % long_window

In [None]:
# Metrics to include in UMAMI renderings and analysis.  Anything that
# _might_ affect performance should be included here.
umami_rows = [
    'darshan_agg_perf_by_slowest_posix_gibs',
    'coverage_factor_bw',
    'coverage_factor_nodehrs',
    'fs_ave_mds_cpu',
    'fs_tot_metadata_ops',
    'fs_ave_oss_cpu',
    'fs_tot_open_ops',
    'fshealth_ost_most_full_pct',
    'fshealth_ost_overloaded_oss_count',
    'jobsdb_concurrent_nodes',
    'topology_job_max_radius',
]

## Calculate Simple Moving Averages (SMAs)

Compare a short-window SMA and a long-window SMA and use the places where they cross over to divide the entire year into _regions_ of interesting benchmark behavior.

For each region defined above, find the _minimum_ performance observed and denote that measurement (and its associated job) as a _locus_.  We then collate all _loci_ into a set of poorly performing benchmarks that are worth contextualizing with UMAMI.

We also plot the raw performance data (light blue bars), the short SMA (orange line), the long SMA (green line), and all loci (red bars) to visually verify that the loci we've identified are indeed poorly performing jobs.

In [None]:
x_raw = filtered_df['_datetime_start'].apply(lambda x: time.mktime(x.timetuple()))
y_raw = filtered_df[plot_metric]

loci = abcutils.features.generate_loci_sma(filtered_df, plot_metric, mins=True, maxes=False)
y_low = filtered_df.loc[loci.index][plot_metric]
x_low = [pd2epoch(x) for x in loci['_datetime_start']]
print "Found %d loci across %s" % (len(loci), date_end - date_start)

### plot the raw data
fig, ax = matplotlib.pyplot.subplots()
fig.set_size_inches(16, 4)
ax.grid()
ax.bar(x_raw, y_raw, width=delta, alpha=0.5)
ax.bar(x_low, y_low, width=delta, color='red', alpha=0.5)
ax.set_ylabel(abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric).replace(" ", "\n"))
ax.set_xticklabels([datetime.datetime.fromtimestamp(x).strftime("%b %d") for x in ax.get_xticks()])
fig.suptitle("%s on %s" % (abcutils.CONFIG['benchmark_labels'].get(benchmark_id, benchmark_id),
                           test_platform))

### also calculate and plot the SMAs
sma_short = abcutils.features.calculate_sma(filtered_df, '_datetime_start', plot_metric, short_window)
sma_long = abcutils.features.calculate_sma(filtered_df, '_datetime_start', plot_metric, long_window)

### plot the intercept points demarcating different regions
# sma_intercepts = abcutils.features.sma_intercepts(filtered_df, plot_metric, short_window, long_window)
#intercepts = numpy.array([(filtered_df['_datetime_start'].loc[x], filtered_df[plot_metric].loc[x]) for x in sma_intercepts.index])
#x_intercept = [(x - numpy.datetime64('1970-01-01T00:00:00Z')) / numpy.timedelta64(1, 's') for x in intercepts[:, 0]]
#y_intercept = intercepts[:, 1]
#ax.scatter(x_intercept, y_intercept, color='red', marker='.')

x_sma_short = [pd2epoch(x) for x in sma_short.index]
y_sma_short = sma_short.values

x_sma_long = [pd2epoch(x) for x in sma_long.index]
y_sma_long = sma_long.values

ax.plot(x_sma_short, y_sma_short, color='C1', linewidth=2)
ax.plot(x_sma_long, y_sma_long, color='C2', linewidth=2)

pass

## Generate UMAMI Diagrams Around Loci

Generate UMAMI diagrams that _end_ at each locus and have `long_window` days' of benchmark data preceding them.  Don't bother creating UMAMI diagrams for benchmarks with fewer than `short_window` benchmark data in the preceding `long_window` days.

Note that this process mixes up the semantic meaning of `long_window`.  When defining loci, `long_window` refers to a number of benchmark measurements, not days.  Ideally, one benchmark runs each day so this semantic difference is trivial.  However in reality, there are days when no benchmarks are run meaning loci are defined using a series of `long_window` benchmark measurements that often span _more than_ `long_window` days.

Practically speaking, this does not change very much as long as the ratio of `long_window` in days to `long_window` in benchmark measurements is close to 1.0.

In [None]:
max_renders = 1

print "Rendering a maximum of %d UMAMI diagrams" % max_renders

rendered = 0
for locus in loci.itertuples():
    region_idx0 = filtered_df.index.get_loc(locus.region_start)
    region_idxf = filtered_df.index.get_loc(locus.region_end)
    umami_region = filtered_df.iloc[region_idx0:region_idxf]
    print locus

    if len(umami_region) >= short_window:
        abcutils.plot.generate_umami(umami_region, umami_rows, highlight_index=umami_region.index.get_loc(locus.Index))
        rendered += 1
        if rendered == max_renders:
            break
    else:
        print "Skipping locus at %s because it has only %d data points (%d required)" % (umami_region['_datetime_start'], len(umami_region), short_window)

## Tabulate frequency of different problems

Because we define loci to be local minima (i.e., the worst benchmark measured in a temporally local region), all of the UMAMIs we generated above _should_ end on an extremely bad day.  To automatically identify the possible causes for bad performance at each locus, we look at all of the UMAMI metrics and flag those that also ended on extremely poor (e.g., worst quartile) values.  This is exactly the same process we used in the PDSW'17 paper's case studies, but now we have automated the process.

With this method of flagging, we keep a running total of metrics that were flagged as possible culprits as we examine each locus.  Note that multiple metrics can be flagged for a single locus (e.g., low coverage factor _and_ high MDS load can both be flagged for a single benchmark run), so the sum of flags over all metrics will usually add up to more than the total number of loci.

In [None]:
def identify_contributors(dataframe, dependent_column, expected_minima=-1, want_good=False):
    """Identify secondary metrics that coincide with a good/bad primary metric
    
    Args:
        dataframe (DataFrame): dataframe containing one metric per column
            over a series of measurements
        dependent_column (str): name of column in `dataframe` corresponding
            to the metric to which contributors will be identified
        want_good (bool): are we identifying metrics that are unusually good
            (True) or unusually bad (False)?
        expected_minima (int): iloc of `dataframe` that is the expected local
            minima; default of -1 selects the final value in the dataframe
    Returns:
        List of dicts, where each dicts corresponds to a single metric that
        was identified as meeting the contribution criteria.  A dict
        contains the 'error' key and a True value when `dependent_column` does
        not fall within the most extreme quartile.
    """
    contributors = []
    for column in dataframe.columns:
        big_is_good = abcutils.CONFIG['metric_big_is_good'].get(column, True)

        result = None
        # we want the value to be lower than the cutoff when either
        # (a) we're looking for bad, and big IS good, or
        # (b) we're looking for good, and big IS NOT good
        if (not want_good and big_is_good) or (want_good and not big_is_good):
            try:
                cutoff = numpy.nanpercentile(dataframe[column].iloc[0:-1], 25)
            except TypeError: # if passed non-numeric columns, just skip them
                continue
            if dataframe[column].iloc[expected_minima] < cutoff:
                result = {
                    'metric': column,
                    'value': dataframe[column].iloc[expected_minima],
                    'comparator': "<",
                    'cutoff': cutoff,
                }
        # we want the value to be higher than the cutoff when either
        # (a) we're looking for good, and big IS good
        # (b) we're looking for bad, and big IS NOT good
        elif (want_good and big_is_good) or (not want_good and not big_is_good):
            try:
                cutoff = numpy.nanpercentile(dataframe[column].iloc[0:-1], 75)
            except TypeError:
                continue
            if dataframe[column].iloc[expected_minima] > cutoff:
                result = {
                    'metric': column,
                    'value': dataframe[column].iloc[-1],
                    'comparator': ">",
                    'cutoff': cutoff,
                }
        
        if column == dependent_column:
            if result is None and expected_minima is not None:
                warnings.warn("%s=%s (index %s) not in the %s quartile (%s) of %d values" % 
                              (column,
                               dataframe[column].iloc[expected_minima],
                               dataframe[column].index[expected_minima],
                               "best" if want_good else "worst",
                               cutoff,
                               len(dataframe)))
                print dataframe[['_datetime_start', column]]
                raise Exception
                return [{"error": True}]
        elif result:
            contributors.append(result)
                
    return contributors

In [None]:
def count_contributors(dataframe, plot_metric, loci, window_days, min_points, want_good=False):
    """Count the secondary metrics that may have contributed to good/bad primary metric
    
    Args:
        dataframe (DataFrame): dataframe containing one metric per column
            over a series of measurements
        plot_metric (str): name of column corresponding to the primary metric
            of performance
        loci (list of datetime): datetimes denoting the end of a region of
            interest over which the UMAMI analysis should be conducted
        window_days (int): how many days prior to each loci to use when
            determining the UMAMI percentiles
        min_points (int): the minimum number of measurements that must fall
            within each locus and (locus - window_days) for the contributors
            to be counted
        want_good (bool): are we identifying metrics that are unusually good
            (True) or unusually bad (False)?

    Returns:
        Dict keyed by the columns of `dataframe` and whose values are the number
        of times each key was identified as a contributor to extreme performance.
        Also includes the following special keys:
            * `_loci_ignored`: number of loci not examined due to the window
              containing fewer than `min_points` benchmark measurements
            * `_loci_unclassified`: number of loci which had no contributors
            * `_loci_classified`: number of loci for which contributors were found
    """
    results = {
        '_loci_ignored': 0,
        '_loci_unclassified': 0,
        '_loci_classified': 0,
        '_loci_errors': 0,
    }
    abcutils.CONFIG['metric_labels']['_loci_unclassified'] = "Indeterminate"
    for locus in loci.itertuples():
        region_idx0 = dataframe.index.get_loc(locus.region_start)
        region_idxf = dataframe.index.get_loc(locus.region_end)
        region_df = dataframe.iloc[region_idx0:region_idxf]
        expected_minima = region_df.index.get_loc(locus.Index)
        if len(region_df) < min_points:
            results['_loci_ignored'] += 1
        else:
            contributors = identify_contributors(region_df,
                                                 plot_metric,
                                                 want_good=want_good,
                                                 expected_minima=expected_minima)
            if len(contributors) == 0:
                results['_loci_unclassified'] += 1
            elif len(contributors) == 1 and contributors[0].get('error', False):
                results['_loci_errors'] += 1
            else:
                results['_loci_classified'] += 1
                for contributor in contributors:
                    results[contributor['metric']] = results.get(contributor['metric'], 0) + 1
    return results

In [None]:
loci = abcutils.features.generate_loci_sma(filtered_df, plot_metric, mins=True, maxes=False)

results = count_contributors(dataframe=filtered_df[['_datetime_start'] + umami_rows],
                             plot_metric=plot_metric,
                             loci=loci,
                             window_days=long_window,
                             min_points=short_window,
                             want_good=False)

num_classified = results.pop('_loci_classified')
num_unclassified = results.pop('_loci_unclassified')
num_ignored = results.pop('_loci_ignored')
num_errors = results.pop('_loci_errors')

print "Classified: %d" % num_classified
print "Unclassified: %d" % num_unclassified
print "Ignored: %d" % num_ignored
print "Errors: %d" % num_errors
print
print "Number of times each metric was flagged across %d loci:" % (len(loci))
for key in reversed(sorted(results.keys(), key=lambda x: results[x])):
    print "%3d %s" % (results[key], abcutils.CONFIG['metric_labels'].get(key, key))

## Tabulate contributors to bad performance over all tests

We now apply the above analysis to the entirety of the benchmark data across all systems.

Note that warnings about certain loci not being in the worst quartile indicate that the SMA-based method we use to identify local minima is not perfect.  There are a variety of other methods (including some canned algorithms) that we can swap in to improve our classification of loci.

In [None]:
# Global plot parameters
plot_metric = 'darshan_agg_perf_by_slowest_posix_gibs'
# date_start = datetime.datetime(2017, 2, 14)
# date_end = datetime.datetime(2018, 3, 1)
group_by = ['_test_platform', '_benchmark_id']

# Determine which plots to generate
test_platforms = sorted(df['_test_platform'].unique())
benchmark_ids = sorted(df[df['_benchmark_id'] != 'hacc_io_write_shared_write']['_benchmark_id'].unique())

# test_platforms = ['cscratch@cori-knl']
# benchmark_ids = ['dbscan_read_shared_read', 'vpicio_uni_shared_write']

print "plot_metric =", abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric)
print "date_start =", date_start.isoformat()
print "date_end =", date_end.isoformat()

grouped_df = df.groupby(by=group_by)

results_flat = {}
results = []
for test_platform in test_platforms:
    for benchmark_id in benchmark_ids:
        try:
            filtered_df = grouped_df.get_group((test_platform, benchmark_id))
        except KeyError:
            continue
        filtered_df = filtered_df[filtered_df['darshan_total_gibs_posix'] > 1.0]
        filtered_df = filtered_df[filtered_df['_datetime_start'] < date_end]
        filtered_df = filtered_df[filtered_df['_datetime_start'] >= date_start]

        loci = abcutils.features.generate_loci_sma(filtered_df, plot_metric, mins=True, maxes=False)
        result = count_contributors(dataframe=filtered_df[['_datetime_start'] + umami_rows],
                                    plot_metric=plot_metric,
                                    loci=loci,
                                    window_days=long_window,
                                    min_points=short_window,
                                    want_good=False)
        for key, value in result.iteritems():
            results_flat[key] = results_flat.get(key, 0) + value
        result['_test_platform'] = test_platform
        result['_benchmark_id'] = benchmark_id
        results.append(result)

num_classified = results_flat.pop('_loci_classified')
num_unclassified = results_flat.get('_loci_unclassified')
num_ignored = results_flat.pop('_loci_ignored')
num_errors = results_flat.pop('_loci_errors')
num_loci = num_classified + num_unclassified + num_ignored + num_errors

print "Classified: %d" % num_classified
print "Unclassified: %d" % num_unclassified
print "Ignored: %d" % num_ignored
print "Errors: %d" % num_errors

The following bar graph shows the total number of times each metric has been flagged as a possible source of performance loss as defined above: its value was "bad" coincident with each locus, where a locus is a job whose performance was abnormally poor and "bad" is defined as being within the 25th worst percentile.

In [None]:
# Convert results into a DataFrame that we can slice and dice
results_df = pandas.DataFrame.from_dict(results, orient='columns')
grouped_df = results_df.groupby(by='_benchmark_id')

# Sort metric order by its impact
x_labels = [x for x in results_df.columns if (not x.startswith('_') or x == '_loci_unclassified')]
x_sums = list(enumerate([results_df[x].sum() for x in x_labels]))
new_metric_order = [xx[0] for xx in sorted(x_sums, key=lambda x:x[1], reverse=True)]
x_labels = [x_labels[i] for i in new_metric_order]

# Create plot canvas
fig, ax = matplotlib.pyplot.subplots()
fig.set_size_inches(8,4)


x_values = numpy.arange(len(y_values))
y_bottom = numpy.zeros(len(x_labels))
for _benchmark_id in benchmark_ids:
    try:
        filtered_df = grouped_df.get_group((_benchmark_id))
    except KeyError:
        continue
    y_values = numpy.array([filtered_df[x].sum() for x in x_labels])
    ax.bar(x=x_values, height=y_values, bottom=y_bottom, width=0.9, label=abcutils.CONFIG['benchmark_labels_short'].get(_benchmark_id, _benchmark_id))
    y_bottom += y_values

ax.yaxis.grid(True)        
ax.set_xticks(x_values)
ax.set_xticklabels([abcutils.CONFIG['metric_labels'].get(x, x) for x in x_labels], rotation=30, ha='right')
ax.set_ylabel("Number of occurrences")
ax.set_title("Candidate Contributors to Good Performance (%d Jobs Total)" % num_loci)
ax.legend(bbox_to_anchor=(1.0, 1.00))

for index, x_value in enumerate(x_values):
    ax.annotate("%d" % y_bottom[index], xy=(x_value, y_bottom[index]), ha='center')

In [None]:
fig, ax = matplotlib.pyplot.subplots()
fig.set_size_inches(16,4)

x_labels = list(reversed(sorted(results_flat.keys(), key=lambda x: results_flat[x])))
y_values = [results_flat[x] for x in x_labels]
x_values = numpy.arange(len(y_values))

ax.yaxis.grid(True)
ax.bar(x_values, y_values)
ax.set_xticks(x_values)
ax.set_xticklabels([abcutils.CONFIG['metric_labels'].get(x, x) for x in x_labels], rotation=45, ha='right')
ax.set_ylabel("Number of occurrences")
ax.set_title("Candidate Contributors to Bad Performance (%d Jobs Total)" % num_loci)

for index, x_value in enumerate(x_values):
    ax.annotate("%d" % y_values[index], xy=(x_value, y_values[index]), ha='center')
            
pass

There are many caveats with the above plot; notably, the majority of jobs were run on Lustre since this data includes all Edison, Cori+KNL, and Cori+Haswell jobs.  In addition, the Mira data does not currently contain file system health data (although it is available), so the "Number of Overloaded OSSes" may be underreported.

The most appropriate way to present this data is to produce one bar graph per test platform (compute system + file system combination) so that metrics that are only available on one test platform aren't being directly compared with others that are.

## Tabulate contributors to good performance over all tests

We can also perform the above analysis and look for the metrics that coincided with good performance.

In [None]:
x_raw = filtered_df['_datetime_start'].apply(lambda x: time.mktime(x.timetuple()))
y_raw = filtered_df[plot_metric]

loci = abcutils.features.generate_loci_sma(filtered_df, plot_metric, mins=False, maxes=True)
y_low = filtered_df.loc[loci.index][plot_metric]
x_low = [pd2epoch(x) for x in loci['_datetime_start']]
print "Found %d loci across %s" % (len(loci), date_end - date_start)

### plot the raw data
fig, ax = matplotlib.pyplot.subplots()
fig.set_size_inches(16, 4)
ax.grid()
ax.bar(x_raw, y_raw, width=delta, alpha=0.5)
ax.bar(x_low, y_low, width=delta, color='red')
ax.set_ylabel(abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric).replace(" ", "\n"))
ax.set_xticklabels([datetime.datetime.fromtimestamp(x).strftime("%b %d") for x in ax.get_xticks()])
fig.suptitle("%s on %s" % (abcutils.CONFIG['benchmark_labels'].get(benchmark_id, benchmark_id),
                           test_platform))

### also calculate and plot the SMAs
sma_short = abcutils.features.calculate_sma(filtered_df, '_datetime_start', plot_metric, short_window)
sma_long = abcutils.features.calculate_sma(filtered_df, '_datetime_start', plot_metric, long_window)

### plot the intercept points demarcating different regions
#sma_intercepts = abcutils.features.sma_intercepts(filtered_df, plot_metric, short_window, long_window)
#intercepts = numpy.array([(filtered_df['_datetime_start'].loc[x], filtered_df[plot_metric].loc[x]) for x in sma_intercepts.index])
#x_intercept = [(x - numpy.datetime64('1970-01-01T00:00:00Z')) / numpy.timedelta64(1, 's') for x in intercepts[:, 0]]
#y_intercept = intercepts[:, 1]
#ax.scatter(x_intercept, y_intercept, color='red', marker='.')

x_sma_short = [pd2epoch(x) for x in sma_short.index]
y_sma_short = sma_short.values

x_sma_long = [pd2epoch(x) for x in sma_long.index]
y_sma_long = sma_long.values

ax.plot(x_sma_short, y_sma_short, color='C1', linewidth=2)
ax.plot(x_sma_long, y_sma_long, color='C2', linewidth=2)

pass

In [None]:
print "plot_metric =", abcutils.CONFIG['metric_labels'].get(plot_metric, plot_metric)
print "date_start =", date_start.isoformat()
print "date_end =", date_end.isoformat()

errors = 0
results_flat = {}
results = []
for test_platform in test_platforms:
    for benchmark_id in benchmark_ids:
        try:
            filtered_df = grouped_df.get_group((test_platform, benchmark_id))
        except KeyError:
            continue
        filtered_df = filtered_df[filtered_df['darshan_total_gibs_posix'] > 1.0]
        filtered_df = filtered_df[filtered_df['_datetime_start'] < date_end]
        filtered_df = filtered_df[filtered_df['_datetime_start'] >= date_start]

        loci = abcutils.features.generate_loci_sma(filtered_df, plot_metric, mins=False, maxes=True)
        result = count_contributors(dataframe=filtered_df[['_datetime_start'] + umami_rows],
                                    plot_metric=plot_metric,
                                    loci=loci,
                                    window_days=long_window,
                                    min_points=short_window,
                                    want_good=True)
        for key, value in result.iteritems():
            results_flat[key] = results_flat.get(key, 0) + value
        result['_test_platform'] = test_platform
        result['_benchmark_id'] = benchmark_id
        results.append(result)

num_classified = results_flat.pop('_loci_classified')
num_unclassified = results_flat.get('_loci_unclassified')
num_ignored = results_flat.pop('_loci_ignored')
num_errors = results_flat.pop('_loci_errors')
num_loci = num_classified + num_unclassified + num_ignored + num_errors

print "Classified: %d" % num_classified
print "Unclassified: %d" % num_unclassified
print "Ignored: %d" % num_ignored
print "Errors: %d" % num_errors

In [None]:
# Convert results into a DataFrame that we can slice and dice
results_df = pandas.DataFrame.from_dict(results, orient='columns')
grouped_df = results_df.groupby(by='_benchmark_id')

# Sort metric order by its impact
x_labels = [x for x in results_df.columns if (not x.startswith('_') or x == '_loci_unclassified')]
x_sums = list(enumerate([results_df[x].sum() for x in x_labels]))
new_metric_order = [xx[0] for xx in sorted(x_sums, key=lambda x:x[1], reverse=True)]
x_labels = [x_labels[i] for i in new_metric_order]

# Create plot canvas
fig, ax = matplotlib.pyplot.subplots()
fig.set_size_inches(8,4)


x_values = numpy.arange(len(y_values))
y_bottom = numpy.zeros(len(x_labels))
for _benchmark_id in benchmark_ids:
    try:
        filtered_df = grouped_df.get_group((_benchmark_id))
    except KeyError:
        continue
    y_values = numpy.array([filtered_df[x].sum() for x in x_labels])
    ax.bar(x=x_values, height=y_values, bottom=y_bottom, width=0.9, label=abcutils.CONFIG['benchmark_labels_short'].get(_benchmark_id, _benchmark_id))
    y_bottom += y_values

ax.yaxis.grid(True)        
ax.set_xticks(x_values)
ax.set_xticklabels([abcutils.CONFIG['metric_labels'].get(x, x) for x in x_labels], rotation=30, ha='right')
ax.set_ylabel("Number of occurrences")
ax.set_title("Candidate Contributors to Good Performance (%d Jobs Total)" % num_loci)
ax.legend(bbox_to_anchor=(1.0, 1.00))

for index, x_value in enumerate(x_values):
    ax.annotate("%d" % y_bottom[index], xy=(x_value, y_bottom[index]), ha='center')

In [None]:
fig, ax = matplotlib.pyplot.subplots()
fig.set_size_inches(16,4)

x_labels = list(reversed(sorted(results_flat.keys(), key=lambda x: results_flat[x])))
y_values = [results_flat[x] for x in x_labels]
x_values = numpy.arange(len(y_values))
ax.yaxis.grid(True)
ax.bar(x_values, y_values)
ax.set_xticks(x_values)
ax.set_xticklabels([abcutils.CONFIG['metric_labels'].get(x, x) for x in x_labels], rotation=45, ha='right')
ax.set_ylabel("Number of occurrences")
ax.set_title("Candidate Contributors to Good Performance (%d Jobs Total)" % num_loci)

for index, x_value in enumerate(x_values):
    ax.annotate("%d" % y_values[index], xy=(x_value, y_values[index]), ha='center')
            
pass