In [1]:
import os
import pandas as pd

In [2]:
folder = "../../results"

projects = ['log4j2', 'netty', 'okio', 'druid', 'pgdbc', 'h2o-3', 'gs-collections']

experiments = ['FINA', 'LOOP', 'RETU', 'SETUP', 'INVO', 'FORK']

versions = ['original', 'fixed_full']

files = ['original-all.csv', 'fixed-all.csv']


In [3]:
# Read everything

df = pd.DataFrame()
for p in projects:
    for exp in experiments:
        for v, f in zip(versions, files):
            
            fname = os.path.join(folder, p, 'exp-%s-out' % exp, v, f)
            if os.path.isfile(fname): 
                temp = pd.read_csv(fname)

                temp['Project'] = p
                temp['Experiment'] = exp
                temp['Version'] = v

                df = temp if len(df) == 0 else df.append(temp)
            else:
                print('Skipping the file %s'  % fname)
            
len(df)


Skipping the file ../../results\log4j2\exp-SETUP-out\original\original-all.csv
Skipping the file ../../results\log4j2\exp-SETUP-out\fixed_full\fixed-all.csv
Skipping the file ../../results\log4j2\exp-INVO-out\original\original-all.csv
Skipping the file ../../results\log4j2\exp-INVO-out\fixed_full\fixed-all.csv
Skipping the file ../../results\log4j2\exp-FORK-out\original\original-all.csv
Skipping the file ../../results\log4j2\exp-FORK-out\fixed_full\fixed-all.csv
Skipping the file ../../results\netty\exp-FORK-out\original\original-all.csv
Skipping the file ../../results\netty\exp-FORK-out\fixed_full\fixed-all.csv
Skipping the file ../../results\okio\exp-FINA-out\original\original-all.csv
Skipping the file ../../results\okio\exp-FINA-out\fixed_full\fixed-all.csv
Skipping the file ../../results\okio\exp-LOOP-out\original\original-all.csv
Skipping the file ../../results\okio\exp-LOOP-out\fixed_full\fixed-all.csv
Skipping the file ../../results\okio\exp-RETU-out\original\original-all.csv
Sk

235110

In [4]:
df.to_csv(os.path.join(folder, 'raw-results.csv'))

In [4]:
df.groupby(['Project', 'Experiment'])

Unnamed: 0.1,Unnamed: 0,Benchmark Mode,Class,Fork,Full Bench,Full params,Iteration,Iteration Type,Measurement Unit,Method,Package,Score,Threads,Total Fork,Trial,Project,Experiment,Version
0,0,"Throughput, ops/time",LoggerConfigBenchmark,1.0,org.apache.logging.log4j.perf.jmh.LoggerConfi...,,1,warmup,ops/s,logWithCountersAndLock,org.apache.logging.log4j.perf.jmh,17613650.0,1,10.0,1,log4j2,FINA,original
1,1,"Throughput, ops/time",LoggerConfigBenchmark,1.0,org.apache.logging.log4j.perf.jmh.LoggerConfi...,,2,warmup,ops/s,logWithCountersAndLock,org.apache.logging.log4j.perf.jmh,19651650.0,1,10.0,1,log4j2,FINA,original
2,2,"Throughput, ops/time",LoggerConfigBenchmark,1.0,org.apache.logging.log4j.perf.jmh.LoggerConfi...,,3,warmup,ops/s,logWithCountersAndLock,org.apache.logging.log4j.perf.jmh,21773100.0,1,10.0,1,log4j2,FINA,original
3,3,"Throughput, ops/time",LoggerConfigBenchmark,1.0,org.apache.logging.log4j.perf.jmh.LoggerConfi...,,4,warmup,ops/s,logWithCountersAndLock,org.apache.logging.log4j.perf.jmh,21774640.0,1,10.0,1,log4j2,FINA,original
4,4,"Throughput, ops/time",LoggerConfigBenchmark,1.0,org.apache.logging.log4j.perf.jmh.LoggerConfi...,,5,warmup,ops/s,logWithCountersAndLock,org.apache.logging.log4j.perf.jmh,21761280.0,1,10.0,1,log4j2,FINA,original


In [5]:
# Read everything

analyzed = 'analysis'

df_summarized = pd.DataFrame()
for p in projects:
    for exp in experiments:
        fname = os.path.join(folder, p, 'exp-%s-out' % exp, analyzed, 'summary.csv')
        if os.path.isfile(fname): 
            temp = pd.read_csv(fname)

            temp['Project'] = p
            temp['Experiment'] = exp

            df_summarized = temp if len(df_summarized) == 0 else df_summarized.append(temp)
        else:
            print('Skipping the file %s'  % fname)
            
len(df_summarized)


Skipping the file ../../results\log4j2\exp-SETUP-out\analysis\summary.csv
Skipping the file ../../results\log4j2\exp-INVO-out\analysis\summary.csv
Skipping the file ../../results\log4j2\exp-FORK-out\analysis\summary.csv
Skipping the file ../../results\netty\exp-FORK-out\analysis\summary.csv
Skipping the file ../../results\okio\exp-FINA-out\analysis\summary.csv
Skipping the file ../../results\okio\exp-LOOP-out\analysis\summary.csv
Skipping the file ../../results\okio\exp-RETU-out\analysis\summary.csv
Skipping the file ../../results\okio\exp-SETUP-out\analysis\summary.csv
Skipping the file ../../results\okio\exp-INVO-out\analysis\summary.csv
Skipping the file ../../results\okio\exp-FORK-out\analysis\summary.csv
Skipping the file ../../results\druid\exp-FINA-out\analysis\summary.csv
Skipping the file ../../results\druid\exp-RETU-out\analysis\summary.csv
Skipping the file ../../results\druid\exp-SETUP-out\analysis\summary.csv
Skipping the file ../../results\druid\exp-FORK-out\analysis\summ

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  sort=sort)


356

In [6]:
def normalize_factor(df):
    ori = df.Original
    fix = df.Fixed
    
    if df.Factor == 0:
        return float(0)
    
    if df['Benchmark Mode'] == 'Throughput, ops/time':
        ori, fix = fix, ori # Swap 
    
    if fix > ori:
        new_factor = fix / ori
    
    else:
        new_factor = - ori / fix
        
    return float(new_factor)

df_summarized['New Factor'] = df_summarized.apply(normalize_factor, axis=1)

In [7]:
df_summarized.to_csv(os.path.join(folder, 'summarized-results.csv'))