In [None]:
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec

import mpld3
#mpld3.enable_notebook()

In [None]:
def normalise(means, errors, baseline, keep=False):
    new_means = {}
    new_errors = {}
    num_tests = len(means[baseline])
    for version in means:
        if version == baseline and not keep:
            continue
        new_means[version] = [means[version][i]/means[baseline][i] for i in range(num_tests)]
        new_errors[version] =[errors[version][i]/means[baseline][i] for i in range(num_tests)]
    return new_means, new_errors

In [None]:
def bar_plot(ax, data, errors=None, colors=None, total_width=0.6, single_width=1, legend=True, capsize=3):
    """Draws a bar plot with multiple bars per data point.

    Parameters
    ----------
    ax : matplotlib.pyplot.axis
        The axis we want to draw our plot on.

    data: dictionary
        A dictionary containing the data we want to plot. Keys are the names of the
        data, the items is a list of the values.

        Example:
        data = {
            "x":[1,2,3],
            "y":[1,2,3],
            "z":[1,2,3],
        }

    errors: dictionary, optional
        Dictionary of standard deviations, corresponding structure to data

    colors : array-like, optional
        A list of colors which are used for the bars. If None, the colors
        will be the standard matplotlib color cyle. (default: None)

    total_width : float, optional, default: 0.8
        The width of a bar group. 0.8 means that 80% of the x-axis is covered
        by bars and 20% will be spaces between the bars.

    single_width: float, optional, default: 1
        The relative width of a single bar within a group. 1 means the bars
        will touch eachother within a group, values less than 1 will make
        these bars thinner.

    legend: bool, optional, default: True
        If this is set to true, a legend will be added to the axis.
    """

    # Check if colors where provided, otherwhise use the default color cycle
    if colors is None:
        colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

    # Number of bars per group
    n_bars = len(data)

    # The width of a single bar
    bar_width = total_width / n_bars

    # List containing handles for the drawn bars, used for the legend
    bars = []

    # Iterate over all data
    for i, (name, values) in enumerate(data.items()):
        # The offset in x direction of that bar
        x_offset = (i - n_bars / 2) * bar_width + bar_width / 2

        # Draw a bar for every value of that type
        for x, y in enumerate(values):
            if errors is None:
                bar = ax.bar(x + x_offset, y, width=bar_width * single_width, color=colors[i % len(colors)])
            else:
                err = errors[name][x]
                bar = ax.bar(x + x_offset, y, yerr=err, error_kw=dict(capsize=capsize),
                             width=bar_width * single_width, color=colors[i % len(colors)])

        # Add a handle to the last drawn bar, which we'll need for the legend
        bars.append(bar[0])

#    # Draw legend if we need
 #   if legend:
 #       ax.legend(bars, data.keys())
    # return the handlers/labels for a legend
    if legend:
        return bars, list(data.keys())

    
# change (test -> version list) to (version -> test list)
def reverseDict(data, keys):
    tests = list(data.keys())
    result = {keys[i] : [data[test][i] for test in tests] for i in range(len(keys))}
    return result, tests

# Loading in datasets

### OCaml

In [None]:
data = {}
ocaml = {}
ocaml_nogc = {}
js = {}
c = {}
grain = {}
grain_nogc = {}
versions = ["OCaml", "OCaml (no GC)", "JS", "C", "Grain", "Grain (no GC)"]

# check files to see formatting of each
with open("../../wasm-of-ocaml/benchmarks/evaluation/ocaml_results.txt") as f:
    for _ in range(3):
        f.readline()
    line = f.readline().strip().split()
    while line != []:
        if len(line) != 5: # some cases are marked as resulting in errors i.e. shadow stack overflow
            line = f.readline().strip().split()
            continue
        ocaml[line[0]] = \
            {"time" : float(line[1]), "error" : float(line[2]), "heap": float(line[3]), "filesize" : float(line[4])}
        line = f.readline().strip().split()
    
    # With GC disabled
    for _ in range(2):
        f.readline()
    line = f.readline().strip().split()
    while line != []:
        if len(line) != 5: # some cases are marked as resulting in errors i.e. shadow stack overflow
            line = f.readline().strip().split()
            continue
        ocaml_nogc[line[0]] = \
            {"time" : float(line[1]), "error" : float(line[2]), "heap": float(line[3]), "filesize" : float(line[4])}
        line = f.readline().strip().split()
    
#tests = data.keys()

### JS, C and Grain

In [None]:
with open("../../wasm-of-ocaml/benchmarks/evaluation/alternatives.txt") as f:
    
    # JS - Heap usage isn't exact, hence heapError
    f.readline()
    line = f.readline().strip().split()
    while line != []:
        js[line[0]] = \
            {"time" : float(line[1]), "error" : float(line[2]),
             "heap": float(line[3]), "heapError":float(line[4]), "filesize" : float(line[5])}
        line = f.readline().strip().split()
    
    # C
    f.readline()
    line = f.readline().strip().split()
    while line != []:
        c[line[0]] = \
            {"time" : float(line[1]), "error" : float(line[2]),
             "heap": float(line[3]), "filesize" : float(line[4])}
        line = f.readline().strip().split()
    
    # Grain
    f.readline()
    line = f.readline().strip().split()
    while line != []:
        grain[line[0]] = \
            {"time" : float(line[1]), "error" : float(line[2]), "filesize" : float(line[3])}
        line = f.readline().strip().split()
    # heap usage calculated separately
    line = f.readline().strip().split()
    while line != []:
        if len(line) != 2: # some cases are marked as resulting in errors i.e. doesn't terminate
            line = f.readline().strip().split()
            continue
        grain[line[0]]["heap"] = float(line[1])
        line = f.readline().strip().split()
        
    # Grain no-gc
    while not f.readline().startswith("-----"):
        pass
    f.readline()
    line = f.readline().strip().split()
    while line != []:
        if len(line) != 4: # some cases are marked as resulting in errors i.e. library function only works with GC on
            line = f.readline().strip().split()
            continue
        grain_nogc[line[0]] = \
            {"time" : float(line[1]), "error" : float(line[2]), "filesize" : float(line[3])}
        line = f.readline().strip().split()
    # heap usage calculated separately
    line = f.readline().strip().split()
    while line != []:
        if len(line) != 2: # some cases are marked as resulting in errors i.e. library function only works with GC on
            line = f.readline().strip().split()
            continue
        grain_nogc[line[0]]["heap"] = float(line[1])
        line = f.readline().strip().split()

# Plotting data for each version separately
Asterisk and red colour on names of tests with incomplete data due to errors.  
For actually outputting data, will want a relative linear scale rather than a log scale, much easier to interpret.

### OCaml

In [None]:
tests = ocaml_nogc.keys()
labels = [test if test in ocaml else "*" + test for test in tests]

nogc_times = [ocaml_nogc[test]["time"] for test in tests]
gc_times = [ocaml[test]["time"]-ocaml_nogc[test]["time"] if test in ocaml else 0 for test in tests]

nogc_std = [ocaml_nogc[test]["error"] for test in tests]
gc_std = [ocaml[test]["error"] if test in ocaml else 0 for test in tests]

nogc_heap = [ocaml_nogc[test]["heap"] for test in tests]
gc_heap = [ocaml[test]["heap"] if test in ocaml else 0 for test in tests]
nogc_heap = [nogc_heap[i]-gc_heap[i] for i in range(len(tests))]

nogc_size = [ocaml_nogc[test]["filesize"] for test in tests]
gc_size = [ocaml[test]["filesize"]-ocaml_nogc[test]["filesize"] if test in ocaml else 0 for test in tests]

width = 0.35       # the width of the bars: can also be len(x) sequence

fig, axs = plt.subplots(1,3, figsize=(12,4))

# times
ax = axs[0]
ax.bar(tests, nogc_times, width, yerr=nogc_std, label='No GC')
ax.bar(tests, gc_times, width, yerr=gc_std, bottom=nogc_times, label='GC')

ax.set_ylabel('Time (ms)')

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(labels, rotation='vertical')
[i.set_color("red") if i.get_text().startswith("*") else 0 for i in ax.get_xticklabels()]

ax.set_title('OCaml execution time')
ax.set_yscale('log')
ax.legend()


# memory
ax = axs[1]
ax.bar(tests, nogc_heap, width, bottom=gc_heap, label='No GC')
ax.bar(tests, gc_heap, width, yerr=gc_std, label='GC')

ax.set_ylabel('Heap usage (Bytes)')

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(labels, rotation='vertical')
[i.set_color("red") if i.get_text().startswith("*") else 0 for i in ax.get_xticklabels()]

ax.set_title('OCaml heap usage')
ax.set_yscale('log')
ax.legend()

# file size
ax = axs[2]
ax.bar(tests, nogc_size, width, label='No GC')
ax.bar(tests, gc_size, width, bottom=nogc_size, label='GC')

ax.set_ylabel("File size (Bytes)")

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(labels, rotation='vertical')
[i.set_color("red") if i.get_text().startswith("*") else 0 for i in ax.get_xticklabels()]

ax.set_title('OCaml output size')
ax.legend()

plt.suptitle("OCaml data", fontsize=16)
plt.tight_layout();

### JS

In [None]:
tests = js.keys()

times = [js[test]["time"] for test in tests]
times_std = [js[test]["error"] for test in tests]

heap = [js[test]["heap"] for test in tests]
heap_std = [js[test]["heapError"] for test in tests]

size = [js[test]["filesize"] for test in tests]

width = 0.35       # the width of the bars: can also be len(x) sequence

fig, axs = plt.subplots(1,3, figsize=(12,4))

# times
ax = axs[0]
ax.bar(tests, times, width, yerr=times_std)

ax.set_ylabel('Time (ms)')

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(tests, rotation='vertical')

ax.set_title('JS execution time')
ax.set_yscale('log')

# memory
ax = axs[1]
ax.bar(tests, heap, width, yerr=heap_std)

ax.set_ylabel('Heap usage (Bytes)')

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(tests, rotation='vertical')

ax.set_title('JS heap usage')
ax.set_yscale('log')

# file size
ax = axs[2]
ax.bar(tests, size, width)

ax.set_ylabel("File size (Bytes)")

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(tests, rotation='vertical')

ax.set_title('JS output size')

plt.suptitle("JS data", fontsize=16)
plt.tight_layout();

## C

In [None]:
tests = c.keys()

times = [c[test]["time"] for test in tests]
times_std = [c[test]["error"] for test in tests]

heap = [c[test]["heap"] for test in tests]

size = [c[test]["filesize"] for test in tests]

width = 0.35       # the width of the bars: can also be len(x) sequence

fig, axs = plt.subplots(1,3, figsize=(12,4))

# times
ax = axs[0]
ax.bar(tests, times, width, yerr=times_std)

ax.set_ylabel('Time (ms)')

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(tests, rotation='vertical')

ax.set_title('C execution time')
ax.set_yscale('log')

# memory
ax = axs[1]
ax.bar(tests, heap, width)

ax.set_ylabel('Heap usage (Bytes)')

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(tests, rotation='vertical')

ax.set_title('C heap usage')
ax.set_yscale('log')

# file size
ax = axs[2]
ax.bar(tests, size, width)

ax.set_ylabel("File size (Bytes)")

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(tests, rotation='vertical')

ax.set_title('C output size')

plt.suptitle("C data", fontsize=16)
plt.tight_layout();

## Grain

In [None]:
tests = grain.keys()
for key in grain_nogc.keys():
    if key not in tests:
        tests.append(key)
labels = [test if (test in grain) and (test in grain_nogc) else "*" + test for test in tests]

nogc_times = [grain_nogc[test]["time"] if test in grain_nogc else 0 for test in tests]
gc_times = [grain[test]["time"] if test in grain else 0 for test in tests]
gc_times = [max(0, gc_times[i] - nogc_times[i]) for i in range(len(tests))]

nogc_std = [grain_nogc[test]["error"] if test in grain_nogc else 0 for test in tests]
gc_std = [grain[test]["error"] if test in grain else 0 for test in tests]

nogc_heap = [grain_nogc[test]["heap"] if test in grain_nogc and "heap" in grain_nogc[test] else 0 for test in tests]
gc_heap = [grain[test]["heap"] if test in grain and "heap" in grain[test] else 0 for test in tests]
nogc_heap = [max(0,nogc_heap[i]-gc_heap[i]) for i in range(len(tests))]

nogc_size = [grain_nogc[test]["filesize"] if test in grain_nogc else 0 for test in tests]
gc_size = [grain[test]["filesize"] if test in grain else 0 for test in tests]
gc_size = [max(0,gc_size[i]-nogc_size[i]) for i in range(len(tests))]

width = 0.35       # the width of the bars: can also be len(x) sequence

fig, axs = plt.subplots(1,3, figsize=(12,4))

# times
ax = axs[0]
ax.bar(tests, nogc_times, width, yerr=nogc_std, label='No GC')
ax.bar(tests, gc_times, width, yerr=gc_std, bottom=nogc_times, label='GC')

ax.set_ylabel('Time (ms)')

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(labels, rotation='vertical')
[i.set_color("red") if i.get_text().startswith("*") else 0 for i in ax.get_xticklabels()]

ax.set_title('Grain execution time')
ax.set_yscale('log')
ax.legend()


# memory
ax = axs[1]
ax.bar(tests, nogc_heap, width, bottom=gc_heap, label='No GC')
ax.bar(tests, gc_heap, width, yerr=gc_std, label='GC')

ax.set_ylabel('Heap usage (Bytes)')

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(labels, rotation='vertical')
[i.set_color("red") if i.get_text().startswith("*") else 0 for i in ax.get_xticklabels()]

ax.set_title('Grain heap usage')
ax.set_yscale('log')
ax.legend()

# file size
ax = axs[2]
ax.bar(tests, nogc_size, width, label='No GC')
ax.bar(tests, gc_size, width, bottom=nogc_size, label='GC')

ax.set_ylabel("File size (Bytes)")

ax.set_xticks(ax.get_xticks())
ax.set_xticklabels(labels, rotation='vertical')
[i.set_color("red") if i.get_text().startswith("*") else 0 for i in ax.get_xticklabels()]

ax.set_title('Grain output size')
ax.legend()

plt.suptitle("Grain data", fontsize=16)
plt.tight_layout()


# Direct Comparisons

## Time

In [None]:
tests = ocaml.keys()
# just the non-GC version for now
versions = [c, ocaml_nogc, js, grain]
labels = ["C", "OCaml", "JS", "Grain"]
means = {test : [version[test]["time"] if test in version else 0 for version in versions] for test in tests}
errors = {test : [version[test]["error"] if test in version else 0 for version in versions] for test in tests}

ocaml_gc_times = [ocaml[test]["time"]-means[test][1] if test in ocaml else 0 for test in tests]
ocaml_gc_error = [ocaml[test]["error"] if test in ocaml else 0 for test in tests]

meanData, tests = reverseDict(means, labels)
errorData, _ = reverseDict(errors, labels)

fig, ax = plt.subplots(figsize=(14,7))
handlers, legend_labels = bar_plot(ax, meanData, errors=errorData)


# Now add extra bar on top for OCaml with GC:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
single_width=1
n_bars = len(meanData)
bar_width = 0.6 / n_bars
x_offset = (1 - n_bars / 2) * bar_width + bar_width / 2
for x, y in enumerate(ocaml_gc_times):
    bar = ax.bar(x + x_offset, y, yerr=ocaml_gc_error[x], error_kw=dict(capsize=3), 
        bottom=meanData["OCaml"][x], width=bar_width * single_width, color=colors[4 % len(colors)])

handlers.insert(2, bar)
legend_labels.insert(2, "OCaml GC")


ax.set_title("Execution time")
ax.set_xticks(range(len(tests)))
ax.set_xticklabels(tests, rotation=90)
ax.set_ylabel("time (ms)")
ax.set_yscale('log')
plt.legend(handlers, legend_labels);

This seems to suggest execution time is almost on par with C?  
Might be that log scale is distorting results too much. But can still see that results are far better than JS or Grain when no GC (related to how first few samples discarded?).  

### Relative speed of OCaml vs JS (closest competitors)

In [None]:
oc_js_means = {"OCaml": meanData["OCaml"], "OCaml GC": ocaml_gc_times, "JS": meanData["JS"]}
oc_js_err = {"OCaml": errorData["OCaml"], "OCaml GC": ocaml_gc_error, "JS": errorData["JS"]}

means, errors = normalise(oc_js_means, oc_js_err, "JS", True)

colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
orange = colors[1]
green = colors[2]
purple = colors[4]


fig = plt.figure(figsize=(14,7))
spec = gridspec.GridSpec(ncols=1, nrows=2,
                         height_ratios=[1, 2])
ax = fig.add_subplot(spec[0])
ax2 = fig.add_subplot(spec[1])

# split axis so outlier for alltrees_9 can be shown neatly
# Taken from example on matplotlib page: https://matplotlib.org/3.1.0/gallery/subplots_axes_and_figures/broken_axis.html
#fig, (ax, ax2) = plt.subplots(2, 1, sharex=True, figsize=(14,7))

handlers, legend_labels = bar_plot(ax, means, errors=errors, colors=[orange, purple, green])
bar_plot(ax2, means, errors=errors, colors=[orange, purple, green])

ax.set_ylim(4.25, 5.)  # outliers only
ax2.set_ylim(0, 1.5)

# hide the spines between ax and ax2
ax.spines['bottom'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax.xaxis.tick_top()
ax.tick_params(labeltop=False)  # don't put tick labels at the top
ax2.xaxis.tick_bottom()

d = .015  # how big to make the diagonal lines in axes coordinates
# arguments to pass to plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((-d, +d), (-d, +d), **kwargs)        # top-left diagonal
ax.plot((1 - d, 1 + d), (-d, +d), **kwargs)  # top-right diagonal

kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs)  # bottom-left diagonal
ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)  # bottom-right diagonal

plt.suptitle("Execution time", fontsize=14)
ax2.set_xticks(range(len(tests)))
ax2.set_xticklabels(tests, rotation=90)
fig.text(-0.01, 0.55, 'Time relative to JS', va='center', rotation='vertical', fontsize=10)

ax.legend(handlers, legend_labels)
plt.tight_layout()


## Memory

In [None]:
tests = ocaml.keys()
# just the non-GC version for now
versions = [c, ocaml, js, grain]
labels = ["C", "OCaml GC", "JS", "Grain"]
heap_means = {test : [version[test]["heap"] if test in version and "heap" in version[test] else 0 for version in versions] for test in tests}
heap_errors = {test : [version[test]["heapError"] if test in version and "heapError" in version[test] else 0 for version in versions] for test in tests}

ocaml_nogc_heap = [ocaml_nogc[test]["heap"]-heap_means[test][1] if test in ocaml else 0 for test in tests]


meanData, tests = reverseDict(heap_means, labels)
errorData, _ = reverseDict(heap_errors, labels)

fig, ax = plt.subplots(figsize=(14,7))

# Switch around colours so OCaml GC brown, no-gc orange, like in other two plots
# (Difference is that now the GC version is the lower bar, not the upper one)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
nogc = colors[1]
gc = colors[4]
colors[1] = gc
colors[4] = nogc

handlers, legend_labels = bar_plot(ax, meanData, colors=colors, errors=errorData)


# Now add extra bar on top for OCaml with GC:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
single_width=1
n_bars = len(meanData)
bar_width = 0.6 / n_bars
x_offset = (1 - n_bars / 2) * bar_width + bar_width / 2
for x, y in enumerate(ocaml_nogc_heap):
    bar = ax.bar(x + x_offset, y,                                       # 1 rather than 5 to be consistent with other plots
        bottom=meanData["OCaml GC"][x], width=bar_width * single_width, color=colors[1 % len(colors)])

handlers.insert(1, bar)
legend_labels.insert(1, "OCaml")


ax.set_title("Heap usage")
ax.set_xticks(range(len(tests)))
ax.set_xticklabels(tests, rotation=90)
ax.set_ylabel("Memory (Bytes)")
ax.set_yscale('log')
plt.legend(handlers, legend_labels);

Do I need better tests for showing benefit of GC? Or is this all there is to show? Still about a factor of 2 in most cases.  

## Filesize

In [None]:
tests = ocaml.keys()
# just the non-GC version for now
versions = [c, ocaml_nogc, js, grain]
labels = ["C", "OCaml", "JS", "Grain"]
sizes = {test : [version[test]["filesize"] if test in version else 0 for version in versions] for test in tests}

ocaml_gc_size = [ocaml[test]["filesize"]-ocaml_nogc[test]["filesize"] if test in ocaml else 0 for test in tests]

meanData, tests = reverseDict(sizes, labels)

fig, ax = plt.subplots(figsize=(14,7))


handlers, legend_labels = bar_plot(ax, meanData, colors=colors)

# Now add extra bar on top for OCaml with GC:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
single_width=1
n_bars = len(meanData)
bar_width = 0.6 / n_bars
x_offset = (1 - n_bars / 2) * bar_width + bar_width / 2
for x, y in enumerate(ocaml_gc_size):
    bar = ax.bar(x + x_offset, y,
        bottom=meanData["OCaml"][x], width=bar_width * single_width, color=colors[4 % len(colors)])

handlers.insert(2, bar)
legend_labels.insert(2, "OCaml GC")


ax.set_title("Output file size")
ax.set_xticks(range(len(tests)))
ax.set_xticklabels(tests, rotation=90)
ax.set_ylabel("size (Bytes)")
plt.legend(handlers, legend_labels);