In [None]:
%matplotlib inline
%load_ext autoreload

In [None]:
import os
import time
import datetime
import pandas
import numpy
import scipy.stats
import abcutils
import matplotlib
matplotlib.rcParams.update({'font.size': 16})

## Global Analysis Constants

## Load and Synthesize Data from CSV

In [None]:
filtered_df = abcutils.sc18paper.load_dataset()

In [None]:
filtered_df.describe().T

## Basic Statistics

In [None]:
TEST_PLATFORMS = sorted(list(filtered_df['_test_platform'].unique()))
print "Test Platforms:\n  " + "\n  ".join(TEST_PLATFORMS)

BENCHMARK_IDS = sorted(list(filtered_df['_benchmark_id'].unique()))
print "\nBenchmarks:\n  " + "\n  ".join(BENCHMARK_IDS)

In [None]:
total_days = (abcutils.sc18paper.DATE_END - abcutils.sc18paper.DATE_START).total_seconds() / 86400
print "Total days: %.1f" % total_days

total_logs = filtered_df['darshan_agg_perf_by_slowest_posix'].count()
print "Total darshan logs: %d" % total_logs

expected_logs = (total_days * len(TEST_PLATFORMS) * len(BENCHMARK_IDS))
percent_found = 100.0 * total_logs / expected_logs
print "Percent benchmarks run: %.1f%%" % (percent_found)
print "Percent benchmarks missing: %.1f%%" % (100 - percent_found)

grouped_df = filtered_df.groupby(['_test_platform', '_benchmark_id'])
grouped_df['darshan_agg_perf_by_slowest_posix'].describe()['count'] / total_days

In [None]:
grouped_df['_datetime_start'].describe()

## Boxplots

In [None]:
TEST_PLATFORMS = [
    'scratch1@edison',
#   'scratch2@edison',
    'scratch3@edison',
    'cscratch@cori-knl',
    'mira-fs1@mira'
]

In [None]:
boxplot_settings = {
    'fontsize': 16,
    'darshan_normalized_perf_by_max': {
        'output_file': "perf-boxplots.pdf",
        'ylabel': "Fraction Peak Performance",
        'title_pos': [ 
            {'x': 0.04, 'y': 0.02, 'horizontalalignment': 'left', 'fontsize': 16},
            {'x': 0.04, 'y': 0.02, 'horizontalalignment': 'left', 'fontsize': 16}]
    },
}

In [None]:
NUM_ROWS = 2
NUM_COLS = len(TEST_PLATFORMS) / 2

assert NUM_ROWS * NUM_COLS == len(TEST_PLATFORMS)

grouped_df = filtered_df.groupby('_test_platform')

fig, axes = matplotlib.pyplot.subplots(nrows=NUM_ROWS,
                                       ncols=NUM_COLS,
                                       # sharex causes problems if not all axes contain data
                                       #sharex=True,
                                       sharey=True)
fig.set_size_inches(4*NUM_COLS,3*NUM_ROWS)

### Draw subplots that contain data
for index, fs in enumerate(TEST_PLATFORMS):
    irow = index / NUM_COLS
    icol = index % NUM_COLS
    ax = axes[irow, icol]
    abcutils.plot.grouped_boxplot(grouped_df.get_group(fs),
                                  'darshan_normalized_perf_by_max',
                                  ax=ax,
                                  fontsize=16)
    title = ax.set_title(
        abcutils.CONFIG['platform_labels'].get(fs, fs), 
        **(boxplot_settings['darshan_normalized_perf_by_max']['title_pos'][irow]))
    title.set_bbox({'color': 'white', 'alpha': 0.5})
#   ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")

### Set global figure labels 
fig.suptitle("")
fig.text(0.02, 0.5,
         boxplot_settings['darshan_normalized_perf_by_max']['ylabel'],
         verticalalignment='center',
         horizontalalignment='center',
         rotation='vertical',
         fontsize=boxplot_settings['fontsize'])
fig.subplots_adjust(hspace=0.05, wspace=0.05)
fig.savefig('figs/summary-boxplots.pdf', bbox_inches='tight')