In [10]:
import pandas as pd
import csv
import os
from collections import defaultdict

def get_files(folder, parse_name_fn):
    files = os.listdir(folder)
    result = defaultdict(list)
    for file in files:
        if ".tsv" not in file:
            continue
        allocator, run = parse_name_fn(file)
        result[allocator].append(os.path.join(folder, file))
    return result
        
# Returns (allocator, run)
def parse_filename(filename):
    return reversed(filename.split(".")[0].split("-"))
def parse_firefox_filename(filename):
    return filename.split(".")[1:3]

programs = {
    "binarytree": ("results_compiled/memory/binarytree", parse_filename),
    "larson": ("results_compiled/memory/larson", parse_filename),
    "redis": ("results_compiled/memory/redis", parse_filename),
    "firefox": ("results_compiled/memory/firefox", parse_firefox_filename),
    "mysql": ("results_compiled/memory/mysql", parse_filename),
}

In [11]:
data = {}

for program, (filename, parse_fn) in programs.items():
    data[program] = get_files(filename, parse_fn)
    
data

{'binarytree': defaultdict(list,
             {'jemalloc': ['results_compiled/memory/binarytree/2-jemalloc.tsv',
               'results_compiled/memory/binarytree/15-jemalloc.tsv',
               'results_compiled/memory/binarytree/11-jemalloc.tsv',
               'results_compiled/memory/binarytree/4-jemalloc.tsv',
               'results_compiled/memory/binarytree/1-jemalloc.tsv',
               'results_compiled/memory/binarytree/6-jemalloc.tsv',
               'results_compiled/memory/binarytree/10-jemalloc.tsv',
               'results_compiled/memory/binarytree/9-jemalloc.tsv',
               'results_compiled/memory/binarytree/18-jemalloc.tsv',
               'results_compiled/memory/binarytree/16-jemalloc.tsv',
               'results_compiled/memory/binarytree/5-jemalloc.tsv',
               'results_compiled/memory/binarytree/19-jemalloc.tsv',
               'results_compiled/memory/binarytree/13-jemalloc.tsv',
               'results_compiled/memory/binarytree/12-jemalloc.t

In [26]:
import numpy as np
from scipy.stats import sem, t

def compute_stats(samples):
    max_value = max(samples)
    ninetieth_percentile = np.percentile(samples, 90)
    mean = np.mean(samples)
    std_err = sem(samples)
    return {
        "max": max_value,
        "90th_percentile": ninetieth_percentile,
        "mean": mean,
        "std_err": std_err,
    }

def parse_file(filename):
    with open(filename, "r") as f:
        reader = csv.DictReader(f, delimiter="\t")
        return [int(row["rss"]) + int(row["kernel"]) for row in reader]

confidence = 0.95
    
def compute_stats_across_runs(files):
    stats = defaultdict(list)
    for file in files:
        run_stats = compute_stats(parse_file(file))
        for key in run_stats:
            stats[key].append(run_stats[key])
    summary_stats = {}
    for stat in stats:
        mean = np.mean(stats[stat])
        std_err = sem(stats[stat])
        ci_term = std_err*t.ppf((1+confidence)/2, len(stats[stat])-1)
        summary_stats[stat] = (mean-ci_term, mean+ci_term)
    return summary_stats
    

In [27]:
for program in data:
    for allocator in data[program]:
        print(f"Stats for {program} ({allocator}):")
        for stat, value in compute_stats_across_runs(data[program][allocator]).items():
            print(f"\t{stat}:\t\t{value}")
    print()

Stats for binarytree (jemalloc):
	max:		(207753744.01628643, 207906287.98371357)
	90th_percentile:		(133303939.18847826, 134870970.25152174)
	mean:		(88390769.19503106, 88662491.50577323)
	std_err:		(450028.651085527, 456692.91536644375)
Stats for binarytree (mesh):
	max:		(208037993.3746195, 208210633.82538047)
	90th_percentile:		(154285632.5038758, 155828057.09612423)
	mean:		(96559085.24153914, 96946699.36401667)
	std_err:		(544815.1932084257, 554712.7500135851)
Stats for binarytree (malloc):
	max:		(203243836.51576847, 203386563.48423153)
	90th_percentile:		(203243836.51576847, 203386563.48423153)
	mean:		(144601162.59121028, 144844984.61192164)
	std_err:		(400458.4739449383, 405454.3016543577)
Stats for binarytree (libc):

Stats for larson (jemalloc):
	max:		(5080253.116285665, 5287132.483714335)
	90th_percentile:		(4910373.83322069, 5088371.76677931)
	mean:		(4305644.441617002, 4429319.51734369)
	std_err:		(15943.741775650358, 17623.387724537573)
Stats for larson (mesh):
	max:		(