In [18]:
import pandas as pd
import csv
from collections import defaultdict

def parse_time(filename):
    with open(filename) as f:
        data = list(csv.DictReader(f))
    results = defaultdict(list)
    for row in data:
        if "libmesh" in row["allocator"]:
            key = "mesh"
        elif "libjemalloc" in row["allocator"]:
            key = "jemalloc"
        else:
            key = "libc"
        results[key].append(float(row["real"]))
    return results


def parse_firefox_time(filename):
    with open(filename) as f:
        data = list(csv.reader(f, delimiter="\t"))
    results = defaultdict(lambda: defaultdict(int))
    for row in data:
        allocator, run, ms = row
        results[allocator][run] += int(ms)
        
    return { allocator : [ms/1000 for ms in results[allocator].values()] for allocator in results }

programs = {
    "binarytree": ("results_compiled/speed/binarytree_results.csv", parse_time),
    "larson": ("results_compiled/speed/larson_results.csv", parse_time),
    "redis": ("results_compiled/speed/redis_results.csv", parse_time),
    "firefox": ("results_compiled/speed/firefox_results.tsv", parse_firefox_time),
}

In [19]:
data = {}

for program, (filename, parse_fn) in programs.items():
    data[program] = parse_fn(filename)
    
data

{'binarytree': defaultdict(list,
             {'jemalloc': [53.28,
               52.79,
               53.05,
               53.17,
               53.35,
               53.33,
               53.29,
               53.51,
               53.46,
               53.16,
               52.89,
               54.03,
               53.0,
               52.75,
               52.99,
               52.91,
               52.82,
               53.25,
               53.36,
               52.88],
              'libc': [35.05,
               35.04,
               35.04,
               35.05,
               35.01,
               35.14,
               36.74,
               35.1,
               35.04,
               35.04,
               35.07,
               35.05,
               35.13,
               35.0,
               35.11,
               35.13,
               36.5,
               35.02,
               35.04,
               40.99],
              'mesh': [47.93,
               47.75,
               47

In [21]:
import numpy as np
from scipy.stats import sem, t

confidence = 0.95

def compute_stats(samples):
    min_value = min(samples)
    max_value = max(samples)
    mean = np.mean(samples)
    std_err = sem(samples)
    ci_term = std_err*t.ppf((1+confidence)/2, len(samples)-1)
    return {
        "min": min_value,
        "max": max_value,
        "std_err": std_err,
        "ci_min": mean - ci_term,
        "mean": mean,
        "ci_max": mean + ci_term
    }

In [27]:
for program in data:
    for allocator in data[program]:
        print(f"Stats for {program} ({allocator}):")
        for stat, value in compute_stats(data[program][allocator]).items():
            print(f"\t{stat}:\t\t{value}")
    print()

Stats for binarytree (jemalloc):
	min:		52.75
	max:		54.03
	std_err:		0.06910432616513126
	ci_min:		53.01886298307272
	mean:		53.16350000000001
	ci_max:		53.308137016927304
Stats for binarytree (libc):
	min:		35.0
	max:		40.99
	std_err:		0.30762201259269306
	ci_min:		34.87063972797801
	mean:		35.5145
	ci_max:		36.15836027202199
Stats for binarytree (mesh):
	min:		47.56
	max:		48.97
	std_err:		0.07617362438949293
	ci_min:		47.76856677184133
	mean:		47.928
	ci_max:		48.087433228158666

Stats for larson (jemalloc):
	min:		15.08
	max:		15.08
	std_err:		4.07524207927e-16
	ci_min:		15.079999999999998
	mean:		15.079999999999998
	ci_max:		15.079999999999998
Stats for larson (libc):
	min:		15.08
	max:		15.08
	std_err:		4.07524207927e-16
	ci_min:		15.079999999999998
	mean:		15.079999999999998
	ci_max:		15.079999999999998
Stats for larson (mesh):
	min:		15.08
	max:		15.08
	std_err:		4.07524207927e-16
	ci_min:		15.079999999999998
	mean:		15.079999999999998
	ci_max:		15.079999999999998

Stats for r