In [1]:
import os

import numpy as np
import pandas as pd

import performance
from report_util import *
from run_data import extract


def create_row(data, y, tool, benchmark):
    values1 = select(data, benchmark=benchmark, tool='galette')[y]
    values2 = select(data, benchmark=benchmark, tool=tool)[y]
    row = dict(n=len(values2), tool=tool, benchmark=benchmark, s=np.NaN, p=np.NaN, sig=False, med=np.NaN)
    if len(values2) > 0:
        row['med'] = np.median(values2)
        row['s'] = np.std(values2)
        if len(values1) > 0:
            row['p'] = mann_whitney(values2, values1)
            row['a12'] = a12(values2, values1)
    return row


def create_table(data, y):
    rows = [create_row(data, y, t, b) for b in performance.BENCHMARKS for t in performance.TOOLS]
    return pd.DataFrame(rows)


def mark_significant(p, sig_level):
    if pd.isna(p):
        return p
    elif p < sig_level:
        return f'*{p:.3f}'
    else:
        return f'{p:.3f}'


def format_table(table):
    result = pd.DataFrame(table)
    result['p'] = result['p'].apply(lambda p: mark_significant(p, sig_level=0.05 / 3))
    result['sig'] = result['sig'].apply(lambda s: 'color: red;' if s else '')
    result['med'] = result['med'].round().astype(pd.Int64Dtype())
    result['s'] = result['s'].round().astype(pd.Int64Dtype())
    return format_tool_names(result)


def style_table(table, title):
    f_table = format_table(table).pivot(index=['benchmark'], values=['med', 's', 'p', 'a12'], columns=['tool']) \
        .reorder_levels(axis=1, order=['tool', None]) \
        .sort_index(axis=1) \
        .sort_index(axis=0) \
        .reindex(['Galette', 'Base', 'MirrorTaint', 'Phosphor'], axis=1, level=0) \
        .reindex(['med', 's', 'p', 'a12'], axis=1, level=1)
    # Remove comparison of Galette against itself
    f_table = f_table.drop(columns=[('Galette', 'p'), ('Galette', 'a12')])
    f_table.index.names = [None for _ in f_table.index.names]
    f_table.columns.names = [None for _ in f_table.columns.names]
    formats = {c: "{:,.0f}" for c in f_table.columns if 'med' in c or 's' in c}
    return f_table.style.format(formats, precision=3, na_rep='---') \
        .set_caption(title)


reports_dir = '/home/katie/Downloads/galette/slurm-1191447/'
new_reports_dir = '/home/katie/Downloads/galette/slurm-1194149/'
data = extract(reports_dir, os.path.join(reports_dir, 'performance.csv'))
data2 = extract(new_reports_dir, os.path.join(new_reports_dir, 'performance.csv'))
data = pd.concat([data2, data[data['tool'] != 'galette']])

Searching for runs in /home/katie/Downloads/galette/slurm-1191447/.
Found 1680 runs.
Checking runs.
	Failed run 557 --- {'benchmark': 'luindex', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 900 --- {'benchmark': 'tradesoap', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 1666 --- {'benchmark': 'h2o', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed run 313 --- {'benchmark': 'xalan', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 227 --- {'benchmark': 'tradebeans', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 664 --- {'benchmark': 'pmd', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed run 805 --- {'benchmark': 'h2o', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 305 --- {'benchmark': 'luindex', 'tool': 'mirror-taint', 'status': 'RUN_FAILURE'}
	Failed run 335 --- {'benchmark': 'zxing', 'tool': 'phosphor', 'status': 'RUN_FAILURE'}
	Failed run 167 --- {'benchmark': 'zxing', 'tool': 'phosphor', 'status': 'RU

In [2]:
data.head()
data.groupby(by=['benchmark', 'tool'])['iteration'] \
    .count() \
    .reset_index()

Unnamed: 0,benchmark,tool,iteration
0,avrora,galette,100
1,avrora,mirror-taint,100
2,avrora,none,100
3,avrora,phosphor,100
4,batik,galette,100
...,...,...,...
56,xalan,none,100
57,xalan,phosphor,100
58,zxing,galette,100
59,zxing,mirror-taint,100


In [3]:
memory_table = style_table(create_table(data, 'rss'), 'Memory Usage (kB).')
memory_table

Unnamed: 0_level_0,Galette,Galette,Base,Base,Base,Base,MirrorTaint,MirrorTaint,MirrorTaint,MirrorTaint,Phosphor,Phosphor,Phosphor,Phosphor
Unnamed: 0_level_1,med,s,med,s,p,a12,med,s,p,a12,med,s,p,a12
avrora,217550,30464,120864,24626,*0.000,0.999,2284304,238627,*0.000,1.000,326262,6972,*0.000,1.000
batik,574220,43920,222608,17281,*0.000,1.000,2431990,250269,*0.000,1.000,---,---,---,---
biojava,551244,30103,172694,3552,*0.000,1.000,853112,49305,*0.000,1.000,394496,20668,*0.000,1.000
eclipse,603374,152922,301030,56095,*0.000,0.996,---,---,---,---,---,---,---,---
fop,303958,6726,142594,12253,*0.000,1.000,---,---,---,---,466488,12380,*0.000,1.000
graphchi,893472,263270,412470,34086,*0.000,1.000,---,---,---,---,542210,17985,*0.000,1.000
h2,717370,47603,332658,8441,*0.000,1.000,829460,107817,*0.000,0.888,827916,34592,*0.000,0.976
h2o,664464,51474,393566,33857,*0.000,1.000,---,---,---,---,---,---,---,---
jme,716900,53289,261894,33856,*0.000,1.000,2514882,130129,*0.000,1.000,615138,46196,*0.000,0.913
jython,3234062,403359,421112,15975,*0.000,1.000,---,---,---,---,---,---,---,---


In [4]:
print(memory_table.to_latex(multicol_align='c', hrules=True, multirow_align='t', convert_css=True))

\begin{table}
\caption{Memory Usage (kB).}
\begin{tabular}{lllllllllllllll}
\toprule
 & \multicolumn{2}{c}{Galette} & \multicolumn{4}{c}{Base} & \multicolumn{4}{c}{MirrorTaint} & \multicolumn{4}{c}{Phosphor} \\
 & med & s & med & s & p & a12 & med & s & p & a12 & med & s & p & a12 \\
\midrule
avrora & 217,550 & 30,464 & 120,864 & 24,626 & *0.000 & 0.999 & 2,284,304 & 238,627 & *0.000 & 1.000 & 326,262 & 6,972 & *0.000 & 1.000 \\
batik & 574,220 & 43,920 & 222,608 & 17,281 & *0.000 & 1.000 & 2,431,990 & 250,269 & *0.000 & 1.000 & --- & --- & --- & --- \\
biojava & 551,244 & 30,103 & 172,694 & 3,552 & *0.000 & 1.000 & 853,112 & 49,305 & *0.000 & 1.000 & 394,496 & 20,668 & *0.000 & 1.000 \\
eclipse & 603,374 & 152,922 & 301,030 & 56,095 & *0.000 & 0.996 & --- & --- & --- & --- & --- & --- & --- & --- \\
fop & 303,958 & 6,726 & 142,594 & 12,253 & *0.000 & 1.000 & --- & --- & --- & --- & 466,488 & 12,380 & *0.000 & 1.000 \\
graphchi & 893,472 & 263,270 & 412,470 & 34,086 & *0.000 & 1.000 & 

In [5]:
time_table = style_table(create_table(data, 'elapsed_time'), 'Execution Time (ms).')
time_table

Unnamed: 0_level_0,Galette,Galette,Base,Base,Base,Base,MirrorTaint,MirrorTaint,MirrorTaint,MirrorTaint,Phosphor,Phosphor,Phosphor,Phosphor
Unnamed: 0_level_1,med,s,med,s,p,a12,med,s,p,a12,med,s,p,a12
avrora,4570,602,2432,359,*0.000,1.000,1410370,128023,*0.000,1.000,6370,622,*0.000,0.969
batik,3334,389,265,105,*0.000,1.000,812568,36961,*0.000,1.000,---,---,---,---
biojava,4184,433,154,25,*0.000,1.000,363983,21838,*0.000,1.000,2330,287,*0.000,0.997
eclipse,1378,6961,4283,18926,0.109,0.566,---,---,---,---,---,---,---,---
fop,525,312,118,108,*0.000,0.977,---,---,---,---,792,199,*0.000,0.853
graphchi,3748,941,534,93,*0.000,1.000,---,---,---,---,7694,772,*0.000,0.983
h2,742,150,148,79,*0.000,0.993,118315,5069,*0.000,1.000,1298,223,*0.000,0.974
h2o,9976,315,604,98,*0.000,1.000,---,---,---,---,---,---,---,---
jme,4170,735,1029,312,*0.000,1.000,3989700,29022,*0.000,1.000,5034,812,*0.000,0.764
jython,4066,431,392,39,*0.000,1.000,---,---,---,---,---,---,---,---


In [6]:
print(time_table.to_latex(multicol_align='c', hrules=True, multirow_align='t', convert_css=True))

\begin{table}
\caption{Execution Time (ms).}
\begin{tabular}{lllllllllllllll}
\toprule
 & \multicolumn{2}{c}{Galette} & \multicolumn{4}{c}{Base} & \multicolumn{4}{c}{MirrorTaint} & \multicolumn{4}{c}{Phosphor} \\
 & med & s & med & s & p & a12 & med & s & p & a12 & med & s & p & a12 \\
\midrule
avrora & 4,570 & 602 & 2,432 & 359 & *0.000 & 1.000 & 1,410,370 & 128,023 & *0.000 & 1.000 & 6,370 & 622 & *0.000 & 0.969 \\
batik & 3,334 & 389 & 265 & 105 & *0.000 & 1.000 & 812,568 & 36,961 & *0.000 & 1.000 & --- & --- & --- & --- \\
biojava & 4,184 & 433 & 154 & 25 & *0.000 & 1.000 & 363,983 & 21,838 & *0.000 & 1.000 & 2,330 & 287 & *0.000 & 0.997 \\
eclipse & 1,378 & 6,961 & 4,283 & 18,926 & 0.109 & 0.566 & --- & --- & --- & --- & --- & --- & --- & --- \\
fop & 525 & 312 & 118 & 108 & *0.000 & 0.977 & --- & --- & --- & --- & 792 & 199 & *0.000 & 0.853 \\
graphchi & 3,748 & 941 & 534 & 93 & *0.000 & 1.000 & --- & --- & --- & --- & 7,694 & 772 & *0.000 & 0.983 \\
h2 & 742 & 150 & 148 & 79 & *