In [20]:
import numpy as np
from scipy import stats as scipy_stats
from IPython.display import display
import ipywidgets as widgets
from IPython.display import clear_output
import pprint

pp = pprint.PrettyPrinter(indent=4)

# test result setup
# primary_score is the performance metric to compare
# valid_list is a list of metrics we need to all be present to be confident that this result is OK
# we also must have all 4 energy scores to be valid
#big_energy = 'BOARD_ENERGY_BIG'
#little_energy = 'BOARD_ENERGY_LITTLE'
#sys_energy = 'BOARD_ENERGY_SYS'
#gpu_energy = 'BOARD_ENERGY_GPU'
big_energy = 'a57_cenr'
little_energy = 'a53_cenr'
sys_energy = 'sys_cenr'
gpu_energy = 'gpu_cenr'
total_energy = ( big_energy, little_energy, sys_energy, gpu_energy )
cpus_energy = ( big_energy, little_energy )

big_energy2 = 'scpi_sensors BOARD_ENERGY_BIG'
little_energy2 = 'scpi_sensors BOARD_ENERGY_LITTLE'
sys_energy2 = 'scpi_sensors BOARD_ENERGY_SYS'
gpu_energy2 = 'scpi_sensors BOARD_ENERGY_GPU'
total_energy2 = ( big_energy2, little_energy2, sys_energy2, gpu_energy2 )
cpus_energy2 = ( big_energy2, little_energy2 )

energy_map = { big_energy: big_energy2, little_energy: little_energy2, sys_energy: sys_energy2, gpu_energy: gpu_energy2,
               big_energy2: big_energy, little_energy2: little_energy, sys_energy2: sys_energy, gpu_energy2: gpu_energy }

interesting_results = (
  { 'name': 'nenamark2', 'energy_test':False, 'primary_score':'nenamark score',
    'interesting':('',), 'valid_list':('nenamark score', ) },
  { 'name': 'egypt_2.5.1_HD_off', 'energy_test':False, 'primary_score':'FPS_0',
    'interesting':('Frames_0',), 'valid_list':('FPS_0', ) },
  { 'name': 'egypt_2.5.1_HD_on', 'energy_test':False, 'primary_score':'FPS_0',
    'interesting':('Frames_0',), 'valid_list':('FPS_0', ) },
  { 'name': 't-rex_2.7.0_HD_off', 'energy_test':False, 'primary_score':'FPS_0',
    'interesting':('Frames_0',), 'valid_list':('FPS_0', ) },
  { 'name': 't-rex_2.7.0_HD_on', 'energy_test':False, 'primary_score':'FPS_0',
    'interesting':('Frames_0',), 'valid_list':('FPS_0', ) },
  { 'name': 'manhattan_on', 'energy_test':False, 'primary_score':'FPS_0',
    'interesting':('Frames_0',), 'valid_list':('FPS_0', ) },
  { 'name': 'manhattan_off', 'energy_test':False, 'primary_score':'FPS_0',
    'interesting':('Frames_0',), 'valid_list':('FPS_0', ) },
  { 'name': 'seemore', 'energy_test':False, 'primary_score':'FPS',
    'interesting':('frame_count', 'janks', 'not_at_vsync',), 'valid_list':('FPS',) },
  { 'name': 'andebench', 'energy_test':False, 'primary_score': 'AndEMark Native',
    'interesting':('AndEMark Java',), 'valid_list': ('AndEMark Native', 'AndEMark Java',) },
  { 'name': 'andebenchst', 'energy_test':False, 'primary_score': 'AndEMark Native',
    'interesting':('AndEMark Java',), 'valid_list': ('AndEMark Native', 'AndEMark Java',) },
  { 'name': 'caffeinemark', 'energy_test':False, 'primary_score': 'OverallScore',
    'interesting':('Sieve', 'Loop', 'Logic', 'String', 'Float', 'Method',), 'valid_list': ('OverallScore','Sieve', 'Loop', 'Logic', 'String', 'Float', 'Method') },
  { 'name': 'cfbench', 'energy_test':False, 'primary_score': 'overall_score',
    'interesting':('java_score', 'native_score',), 'valid_list': ('overall_score',) },
  { 'name': 'linpack', 'energy_test':False, 'primary_score': 'Linpack MT',
    'interesting':('Linpack ST',), 'valid_list': ('Linpack MT', 'Linpack ST',) },
  { 'name': 'quadrant', 'energy_test':False, 'primary_score': 'benchmark_score',
    'interesting':('benchmark_cpu_score', 'benchmark_memory_score', 'benchmark_io_score', 'benchmark_g2d_score', 'benchmark_g3d_score', ), 'valid_list': ('benchmark_score',) },
  { 'name': 'smartbench', 'energy_test':False, 'primary_score': 'Smartbench: valueGame',
    'interesting':('Smartbench: valueProd',), 'valid_list': ('Smartbench: valueGame', 'Smartbench: valueProd',) },
  { 'name': 'sqlitebm', 'energy_test':False, 'primary_score': 'execution_time',
    'interesting':('',), 'valid_list': ('execution_time',) },
  { 'name': 'bbench_with_audio', 'energy_test':True, 'primary_score': 'execution_time',
    'interesting':('Mean Latency',), 'valid_list': ('Mean Latency',) },
  { 'name': 'audio', 'energy_test':True, 'primary_score': 'execution_time',
    'interesting':('',), 'valid_list': ('execution_time',) },
  { 'name': 'browserlaunchx8', 'energy_test':True, 'primary_score': 'time',
    'interesting':('time sd',), 'valid_list': ('time',) },
    
)
# functions to deal with interesting_results list
def get_valid_metric_list(workload):
    valid_list=None
    for entry in interesting_results:
        if entry['name'] == workload:
            valid_list=entry['valid_list']
            break
    return valid_list

spec_id_metadata = { 'baseline': 'mp_a53bc' }

# database lookup functions

# how many results are present for a given combo
# returns the actual number
def number_iterations_present(db_conn, workload, spec_id, run_uuid):
    statement="SELECT count(iteration) FROM results WHERE workload=? AND spec_id=? AND run_uuid=? AND metric=\'execution_time\'"
    cur = db_conn.execute(statement,(workload, spec_id, run_uuid))
    return cur.fetchone()[0]

# does the given iteration have the correct metrics to be considered at least valid?
# returns True or False
def is_iteration_valid(db_conn, workload, spec_id, run_uuid, iteration):
    get_iteration="SELECT metric, iteration FROM results WHERE workload=? AND spec_id=? AND run_uuid=? AND iteration=?"
    valid_list = get_valid_metric_list(workload)
    count=0
    seen_energy=0
    if not valid_list:
        return False
    for row in db_conn.execute(get_iteration,(workload, spec_id, run_uuid, iteration)):
        metric = row[0]
        if metric in valid_list:
            count+=1
        if metric in total_energy or metric in total_energy2:
            seen_energy+=1
    if count==len(valid_list) and seen_energy==4:
        return True
    else:
        return False

def get_workload_list_from_database(db_conn):
    statement="SELECT label FROM workload_specs"
    workload_list=[]
    for row in db_conn.execute(statement):
        workload_list.append(row[0])
    return workload_list

def get_iteration_list_by_test_run(db_conn, workload, spec_id, run_uuid):
    get_iterations="SELECT iteration FROM results WHERE workload=? AND spec_id=? AND run_uuid=? AND metric=\'execution_time\'"
    iteration_list=[]
    for row in db_conn.execute(get_iterations, (workload, spec_id, run_uuid)):
        iteration_num=row[0]
        if is_iteration_valid(db_conn, workload, spec_id, run_uuid, iteration_num):
            iteration_list.append(row[0])
    return iteration_list

def get_metric_by_iteration_and_run(db_conn, workload, spec_id, run_uuid, iteration, metric):
    #print 'workload={} spec_id={} run_uuid={} iteration={} metric={}'.format(workload, spec_id, run_uuid, iteration, metric)
    statement='SELECT value FROM results WHERE workload=? AND spec_id=? AND run_uuid=? AND iteration=? AND metric=?'
    cur = db_conn.execute(statement, (workload, spec_id, run_uuid, iteration, metric))
    return cur.fetchone()[0]

# figure out the unique combinations of UID present (workload, spec_id and run_uuid) which indicate a set of test runs
# for a single configuration
def get_unique_combo_list_from_database(db_conn):
    statement="SELECT DISTINCT workload,spec_id,run_uuid FROM results"
    cur = db_conn.execute(statement)
    return cur.fetchall()

def get_energy_type(db_conn):
    statement="SELECT DISTINCT metric FROM results"
    for metric in db_conn.execute(statement):
        if big_energy in metric:
            return 1
    return 2
    
# functions handling results
    
# values is a list of test results for one metric from a set of test runs
# we want to return a list of mean, geometric mean, variance
def make_stats(values):
    val_float=np.array([float(x) for x in values])
    # geomean
    val_geomean = scipy_stats.mstats.gmean(val_float)
    return (val_float.mean(),val_geomean,val_float.std())

def get_iterations(db_vars, metrics, energy_metrics, primary, iteration_list, iteration_skip_list):
    d,w,s,r=db_vars
    runs = []
    if big_energy in energy_metrics:
        total_energy_list = total_energy
        cpus_energy_list = cpus_energy
    else:
        total_energy_list = total_energy2
        cpus_energy_list = cpus_energy2
    for iteration in iteration_list:
        iteration_metrics = dict()
        iteration_metrics['iteration'] = iteration
        iteration_metrics[primary] = get_metric_by_iteration_and_run(d,w,s,r, iteration, primary)
        for metric in metrics:
            iteration_metrics[metric] = get_metric_by_iteration_and_run(d,w,s,r, iteration, metric)
        for metric in energy_metrics:
            iteration_metrics[metric] = get_metric_by_iteration_and_run(d,w,s,r, iteration, metric)
        total_energy_accu = 0
        for metric in total_energy_list:
            total_energy_accu += float(iteration_metrics[metric])
        iteration_metrics['total_energy'] = total_energy_accu
        cpu_energy_accu = 0
        for metric in cpus_energy_list:
            cpu_energy_accu += float(iteration_metrics[metric])
        iteration_metrics['cpu_energy'] = cpu_energy_accu
        runs.append(iteration_metrics)
    return runs
    
# each workload reports the following structure:
# result is a dictionary, 'name', 'spec_id' and 'run_uuid' refer to the workload identifier
#   'runs' is an array where each element contains another dictionary containing the valid
#     results from the database for the chosen workload although it only contains the fields
#     marked as primary score, interesting or one of the energy counters
#   'primary' is the name of the metric used for primary score
#   'stats' is a dictionary of tuples containing mean, geomean and stdev for each metric measured, where metric name is the key
def get_workload_report(db_conn, label, workload, spec_id, run_uuid):
    result = { 'label': label, 'name': workload, 'spec_id': spec_id, 'run_uuid': run_uuid }
    # metrics we will report on are:
    metrics = ()
    if get_energy_type(db_conn) == 1:
        energy_metrics = total_energy
    else:
        energy_metrics = total_energy2
    for x in interesting_results:
        if x['name'] == workload:
            primary = x['primary_score']
            for item in x['interesting']:
                if len(item) > 0:
                    metrics = metrics+(item,)
            break
    iteration_list = get_iteration_list_by_test_run(db_conn, workload, spec_id, run_uuid)
    #pp.pprint(iteration_list)
    iteration_skip_list=[]
    runs = get_iterations((db_conn, workload, spec_id, run_uuid), metrics, energy_metrics, primary, iteration_list, iteration_skip_list)
    result['runs'] = runs
    stats = dict()
    skip_members = ( 'iteration', 'primary_name' )
    if len(result['runs']):
        for member in result['runs'][0]:
            if member in skip_members:
                continue
            values=[x.get(member) for x in result['runs']]
            stats[member]=make_stats(values)
    result['stats']=stats
    result['primary']=primary
    return result

def print_workload_report(report):
    pp.pprint(report)

# perform a kruskal-wallis one-way analysis of variance
# we are assuming that the populations are the same shape and any difference is from the spec used
# this tells us if we are looking at a real difference or not.
def is_significant(member, a, b):
    kr_H,kr_p=scipy_stats.kruskal([x.get(member) for x in a],[x.get(member) for x in b])
    return kr_H,kr_p
   
def compare_one_energy_metric(metric_name, reference, comparison):
    # total energy compared for the two tests
    if reference['stats'].get(metric_name):
        ref_mean, ref_geomean, ref_std = reference['stats'][metric_name]
        ref_metric_name = metric_name
    else:
        ref_mean, ref_geomean, ref_std = reference['stats'][energy_map[metric_name]]
        ref_metric_name = energy_map[metric_name]
    if comparison['stats'].get(metric_name):
        comp_mean, comp_geomean, comp_std = comparison['stats'][metric_name]
        comp_metric_name = metric_name
    else:
        comp_mean, comp_geomean, comp_std = comparison['stats'][energy_map[metric_name]]
        comp_metric_name = energy_map[metric_name]
    lowest = reference
    if ref_mean > comp_mean:
        lowest = comparison
    if ref_metric_name == comp_metric_name:
        metric_name = ref_metric_name
    else:
        metric_name = '{}({})'.format(ref_metric_name, comp_metric_name)
    print '{}: {},\'{}\' averaged less energy ({:.4f}J vs {:.4f}J) overall'.format(metric_name, lowest['spec_id'], lowest['label'], ref_mean, comp_mean)
    kr_H,kr_p=is_significant(metric_name,reference['runs'],comparison['runs'])
    if kr_p <=0.05:
        print '{}  This result is LIKELY reliable (p={:.4f})'.format(''.rjust(len(metric_name)), kr_p)
    else:
        print '{}  This result is NOT LIKELY reliable (p={:.4f})'.format(''.rjust(len(metric_name)), kr_p)
    print ''

def compare_one_metric(metric_name, reference, comparison):
    # total energy compared for the two tests
    ref_mean, ref_geomean, ref_std = reference['stats'][metric_name]
    comp_mean, comp_geomean, comp_std = comparison['stats'][metric_name]
    lowest = reference
    if ref_mean > comp_mean:
        lowest = comparison
    print '{}: {},\'{}\' averaged lower ({:.4f} vs {:.4f}) overall'.format(metric_name, lowest['spec_id'],lowest['label'], ref_mean, comp_mean)
    kr_H,kr_p=is_significant(metric_name,reference['runs'],comparison['runs'])
    if kr_p <=0.05:
        print '{}  This result is LIKELY reliable (p={:.4f})'.format(''.rjust(len(metric_name)), kr_p)
    else:
        print '{}  This result is NOT LIKELY reliable (p={:.4f})'.format(''.rjust(len(metric_name)), kr_p)
    print ''

# produce a comparison report of two workloads - reference and comparison. They should be the
# result from calling get_workload_report, and should be the same workload or this won't work
# and might not tell you
def compare_workload_reports(reference, comparison, lower_is_better):
    # compare the primary results of the two sets
    ref_mean, ref_geomean, ref_std = reference['stats'][reference['primary']]
    comp_mean, comp_geomean, comp_std = comparison['stats'][comparison['primary']]
    print "Comparing workload reports for",reference['name']
    print ""
    print "Reference is {} {} {}".format(reference['label'],reference['spec_id'],reference['run_uuid'])
    print "Comparison is {} {} {}".format(comparison['label'],comparison['spec_id'],comparison['run_uuid'])
    print ""
    print "Primary metric for comparison is \"{}\"".format(reference['primary'])
    compare_one_metric(reference['primary'], reference, comparison)
    print ""
    print '{0:44s} {1:24s} {2:24s}'.format(reference['primary'],'{}({})'.format(reference['spec_id'],reference['label']).rjust(24),'{}({})'.format(comparison['spec_id'],comparison['label']).rjust(24))
    print '{0:44s} {1:24.4f} {2:24.4f}'.format('mean',ref_mean,comp_mean)
    print '{0:44s} {1:24.4f} {2:24.4f}'.format('stdev',ref_std,comp_std)
    kr_H,kr_p=is_significant(reference['primary'],reference['runs'],comparison['runs'])
    if kr_p <=0.05:
        print 'This result is LIKELY reliable (p={0:.4f})'.format(kr_p)
    else:
        print 'This result is NOT LIKELY reliable (p={0:.4f})'.format(kr_p)
    
    print "\nEnergy Comparison\n"
    energy_list=[ 'total_energy', 'cpu_energy', little_energy, big_energy ]
    for item in energy_list:
        compare_one_energy_metric(item, reference, comparison)
    print ""
    print '{0:44s} {1:24s} {2:24s}'.format('metric','{}({})'.format(reference['spec_id'],reference['label']).rjust(24),'{}({})'.format(comparison['spec_id'],comparison['label']).rjust(24))
    for item in energy_list:
        if reference['stats'].get(item):
            # ref_item is in reference
            item_ref = item
            if comparison['stats'].get(item):
                item_com = item
            else:
                item_com = energy_map[item]
        else:
            item_ref = energy_map[item]
            if comparison['stats'].get(item):
                item_com = item
            else:
                item_com = energy_map[item]
        ref_mean, ref_geomean, ref_std=reference['stats'][item_ref]
        com_mean, com_geomean, com_std=comparison['stats'][item_com]
        if item_ref==item_com:
            item = item_ref
        else:
            item = '{}({})'.format(item_ref,item_com)
        print '{0:44s} {1:24.4f} {2:24.4f}'.format('{}(mean)'.format(item),ref_mean, com_mean)
        print '{0:44s} {1:24.4f} {2:24.4f}'.format('{}(stdev)'.format(item),ref_std, com_std)
    already_seen = ['iteration']
    already_seen.extend(energy_list)
    already_seen.append(reference['primary'])
    others=[]
    for item in reference['runs'][0].keys():
        if item not in already_seen:
            others.append(item)
    print '\n\nThere are {} other Interesting Scores:'.format(len(others))
    if not len(others):
        print "End of report."
        return
    for item in others:
        if 'ENERGY' in item.upper() or 'cenr' in item:
            compare_one_energy_metric(item, reference, comparison)
        else:
            compare_one_metric(item, reference, comparison)
    print ""
    print '{0:44s} {1:24s} {2:24s}'.format('metric','{}({})'.format(reference['spec_id'],reference['label']).rjust(24),'{}({})'.format(comparison['spec_id'],comparison['label']).rjust(24))
    for item in others:
        if 'ENERGY' in item.upper() or 'cenr' in item:
            if reference['stats'].get(item):
                # ref_item is in reference
                ref_item = item
                if comparison['stats'].get(item):
                    com_item = item
                else:
                    com_item = energy_map[item]
            else:
                ref_item = energy_map[item]
                if comparison['stats'].get(item):
                    com_item = item
                else:
                    com_item = energy_map[item]
        else:
            ref_item = item
            com_item = item
        ref_mean, ref_geomean, ref_std=reference['stats'][ref_item]
        com_mean, com_geomean, com_std=comparison['stats'][com_item]
        if ref_item == com_item:
            item_name = ref_item
        else:
            item_name = '{}({})'.format(ref_item, com_item)
        print '{0:44s} {1:24.4f} {2:24.4f}'.format(item_name+'(mean)',ref_mean, com_mean)
        print '{0:44s} {1:24.4f} {2:24.4f}'.format(item_name+'(stdev)',ref_std, com_std)
        
    return


In [21]:
label_a='hmp'
file_location_a='run1/results.sqlite'
label_b='old'
file_location_b='../WA_Reporting_WIP/results.missing_cpusets.sqlite'
# get the database loaded
import sqlite3
# to skip a database, make the connection a None
conA = sqlite3.connect(file_location_a)
conB = sqlite3.connect(file_location_b)
#conB = None

In [22]:
combos=[]
selector_combos=[]
if conA:
    result = get_unique_combo_list_from_database(conA)
    for res in result:
        combos.append(res)
        selector_combos.append(res[0]+', '+res[1]+', '+res[2]+', '+label_a) 

if conB:
    result = get_unique_combo_list_from_database(conB)
    for res in result:
        combos.append(res)
        selector_combos.append(res[0]+', '+res[1]+', '+res[2]+', '+label_b) 

w = widgets.Dropdown(
    options=selector_combos,
    description='Combination'
)
def show_report():
    global w
    clear_output()
    string = w.value
    workload,spec_id,run_uuid,label=tuple([ x.strip() for x in string.split(",") if x.strip() ])
    db = conA
    if label == label_b:
        db = conB
    print "get_workload_report db={} label={} workload={} spec_id={} run_uuid={}".format(db, label, workload, spec_id, run_uuid)
    report = get_workload_report(db, label, workload, spec_id, run_uuid)
    print_workload_report(report)
w.on_trait_change(show_report, 'value')
display(w)
show_report()


get_workload_report db=<sqlite3.Connection object at 0x7f85bf99a3d0> label=hmp workload=andebench spec_id=mp_option_default_b01 run_uuid=3251d69c-baad-423a-8ce0-2b058869ce78
{   'label': u'hmp',
    'name': u'andebench',
    'primary': 'AndEMark Native',
    'run_uuid': u'3251d69c-baad-423a-8ce0-2b058869ce78',
    'runs': [   {   'AndEMark Java': u'1260',
                    'AndEMark Native': u'9206',
                    'cpu_energy': 108.45849999999999,
                    'iteration': 1,
                    'scpi_sensors BOARD_ENERGY_BIG': u'85.37006',
                    'scpi_sensors BOARD_ENERGY_GPU': u'10.756525',
                    'scpi_sensors BOARD_ENERGY_LITTLE': u'23.08844',
                    'scpi_sensors BOARD_ENERGY_SYS': u'75.565603',
                    'total_energy': 194.78062799999998},
                {   'AndEMark Java': u'1221',
                    'AndEMark Native': u'8711',
                    'cpu_energy': 112.807338,
                    'iteration': 2,
  

In [23]:
left = widgets.Dropdown(
    options=[],
    description='Left Workload'
)
right = widgets.Dropdown(
    options=[],
    description='Right Workload'
)

if conA:
    resultA=get_unique_combo_list_from_database(conA)
else:
    resultA=None
if conB:
    resultB=get_unique_combo_list_from_database(conB)
else:
    resultB=None

workloads=[]
if resultA:
    for res in resultA:
        if res[0] not in workloads:
            workloads.append(res[0])
if resultB:
    for res in resultB:
        if res[0] not in workloads:
            workloads.append(res[0])
        
selector = widgets.Dropdown(
    options=workloads,
    description='Select Workload:'
)
def on_selector_change():
    clear_output()
    spec_uuids=[]
    if resultA:
        for res in resultA:
            if res[0] == selector.value:
                spec_uuids.append(res[1]+', '+res[2]+', '+label_a)
    if resultB:
        for res in resultB:
            if res[0] == selector.value:
                spec_uuids.append(res[1]+', '+res[2]+', '+label_b)
    left.options=spec_uuids
    left.value=spec_uuids[0]
    left.value_selected=spec_uuids[0]
    right.options=spec_uuids
    right.value=spec_uuids[1]
    right.value_selected=spec_uuids[1]
    
selector.on_trait_change(on_selector_change, 'value')

button2 = widgets.Button(description="Show Results")
def on_button_clicked(b):
    clear_output()
    workload = selector.value
    spec_id1,run_uuid1, label=tuple([ x.strip() for x in left.value.split(",") if x.strip() ])
    db = conA
    if label == label_b:
        db = conB
    report1 = get_workload_report(db, label, workload, spec_id1, run_uuid1)
    spec_id2,run_uuid2, label=tuple([ x.strip() for x in right.value.split(",") if x.strip() ])
    db = conA
    if label == label_b:
        db = conB
    report2 = get_workload_report(db, label, workload, spec_id2, run_uuid2)
    compare_workload_reports(report1, report2, False)

button2.on_click(on_button_clicked)
display(selector)
on_selector_change()
display(left)
display(right)
display(button2)

Comparing workload reports for bbench_with_audio

Reference is hmp mp_option_default_w01 3251d69c-baad-423a-8ce0-2b058869ce78
Comparison is old mp_a57only_w01 cdce8e1d-880c-417b-a148-e72f32f5e0d4

Primary metric for comparison is "execution_time"
execution_time: mp_a57only_w01,'old' averaged lower (146.7513 vs 145.4267) overall
                This result is NOT LIKELY reliable (p=0.0758)


execution_time                               mp_option_default_w01(hmp)      mp_a57only_w01(old)
mean                                                         146.7513                 145.4267
stdev                                                          1.0596                   1.1120
This result is NOT LIKELY reliable (p=0.0758)

Energy Comparison

total_energy: mp_option_default_w01,'hmp' averaged less energy (161.0305J vs 167.2457J) overall
              This result is LIKELY reliable (p=0.0090)

cpu_energy: mp_option_default_w01,'hmp' averaged less energy (21.5425J vs 26.6172J) overall
        

In [15]:
sideA='mp_a53bc'
sideB='mp_eas_nocgroups_interactive'

workloadsA=[]
workloadsB=[]

filtered_resultA=[]
for res in resultA:
    workload,spec_id,run_uuid=res
    if workload not in workloadsA:
        workloadsA.append(workload)
    if sideA in spec_id:
        filtered_resultA.append(res)

filtered_resultB=[]
for res in resultB:
    workload,spec_id,run_uuid=res
    if workload not in workloadsB and workload in workloadsA:
        workloadsB.append(workload)
    if sideB in spec_id:
        filtered_resultB.append(res)

common_workloads=[]
for workload in workloadsA:
    if workload in workloadsB:
        common_workloads.append(workload)
    
for workload in common_workloads:
    report1 = None
    report2 = None
    for work, spec_id,run_uuid in filtered_resultA:
        if work == workload:
            report1 = get_workload_report(conA, label_a, workload, spec_id, run_uuid)
            break
    if report1:
        for work, spec_id,run_uuid in filtered_resultB:
            if work == workload:
                report2 = get_workload_report(conB, label_b, workload, spec_id, run_uuid)
                break
    if report1 and report2:
        try:
            compare_workload_reports(report1, report2, False)
        except KeyError:
            print 'Error: KeyError raised for these reports: {}'.format(workload)
            pp.pprint(report1)
            pp.pprint(report2)
        print '\n\n'















































Comparing workload reports for nenamark2

Reference is hmp mp_a53bc_b11 37ba5db4-c06c-484b-90db-cecad1017820
Comparison is eas mp_eas_nocgroups_interactive_b11 bc4a8556-fe81-4d2f-994b-c402c3d6ffd2

Primary metric for comparison is "nenamark score"
nenamark score: mp_a53bc_b11,'hmp' averaged lower (59.6333 vs 59.7400) overall
                This result is NOT LIKELY reliable (p=0.4033)


nenamark score                                      mp_a53bc_b11(hmp) mp_eas_nocgroups_interactive_b11(eas)
mean                                                          59.6333                  59.7400
stdev                                                          0.2261                   0.1200
This result is NOT LIKELY reliable (p=0.4033)

Energy Comparison

total_energy: mp_eas_nocgroups_interactive_b11,'eas' averaged less energy (306.0720J vs 205.3532J) overall
              This result is LIKELY reliable (p=0.0027)

cpu_energy: mp_eas_nocgroups_interactive_b11,'eas' averaged less energy (25.7688J