In [None]:
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt

import mpld3
#mpld3.enable_notebook()

In [None]:
def normalise(means, errors, baseline, keep=False):
    new_means = {}
    new_errors = {}
    num_tests = len(means[baseline])
    for version in means:
        if version == baseline and not keep:
            continue
        new_means[version] = [means[version][i]/means[baseline][i] for i in range(num_tests)]
        new_errors[version] =[errors[version][i]/means[baseline][i] for i in range(num_tests)]
    return new_means, new_errors

In [None]:
def bar_plot(ax, data, errors=None, colors=None, total_width=0.8, single_width=1, legend=False, capsize=3):
    """Draws a bar plot with multiple bars per data point.

    Parameters
    ----------
    ax : matplotlib.pyplot.axis
        The axis we want to draw our plot on.

    data: dictionary
        A dictionary containing the data we want to plot. Keys are the names of the
        data, the items is a list of the values.

        Example:
        data = {
            "x":[1,2,3],
            "y":[1,2,3],
            "z":[1,2,3],
        }

    errors: dictionary, optional
        Dictionary of standard deviations, corresponding structure to data

    colors : array-like, optional
        A list of colors which are used for the bars. If None, the colors
        will be the standard matplotlib color cyle. (default: None)

    total_width : float, optional, default: 0.8
        The width of a bar group. 0.8 means that 80% of the x-axis is covered
        by bars and 20% will be spaces between the bars.

    single_width: float, optional, default: 1
        The relative width of a single bar within a group. 1 means the bars
        will touch eachother within a group, values less than 1 will make
        these bars thinner.

    legend: bool, optional, default: True
        If this is set to true, a legend will be added to the axis.
    """

    # Check if colors where provided, otherwhise use the default color cycle
    if colors is None:
        colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

    # Number of bars per group
    n_bars = len(data)

    # The width of a single bar
    bar_width = total_width / n_bars

    # List containing handles for the drawn bars, used for the legend
    bars = []

    # Iterate over all data
    for i, (name, values) in enumerate(data.items()):
        # The offset in x direction of that bar
        x_offset = (i - n_bars / 2) * bar_width + bar_width / 2

        # Draw a bar for every value of that type
        for x, y in enumerate(values):
            if errors is None:
                bar = ax.bar(x + x_offset, y, width=bar_width * single_width, color=colors[i % len(colors)])
            else:
                err = errors[name][x]
                bar = ax.bar(x + x_offset, y, yerr=err, error_kw=dict(capsize=capsize),
                             width=bar_width * single_width, color=colors[i % len(colors)])

        # Add a handle to the last drawn bar, which we'll need for the legend
        bars.append(bar[0])

#    # Draw legend if we need
#    if legend:
#        ax.legend(bars, data.keys())
    # return the handlers/labels for a legend
    if legend:
        return bars, data.keys()

## Comparison of All optimisations, just IR, just Wasm, None
All done with optimised pattern matching, no GC, and 3 iterations where optimisation passes are being applied.

In [None]:
data = {}
versions = ["None", "Wasm", "IR",  "All"]
# Check file to see formatting
with open("../../wasm-of-ocaml/benchmarks/evaluation/optimisations.txt") as f:
    f.readline()
    for version in versions:
        f.readline()
        line = f.readline().strip().split()
        while line != []:
            if line[0] not in data:
                data[line[0]] = {}
            data[line[0]][version] = \
              {"time" : float(line[1]), "error" : float(line[2]), "heap": float(line[3]), "filesize" : float(line[4])}
            line = f.readline().strip().split()
tests = data.keys()

In [None]:
data

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(10,8))
ax = axs[0]
times = {version: [data[test][version]["time"] for test in tests] for version in versions}
errors = {version: [data[test][version]["error"] for test in tests] for version in versions}
times, errors = normalise(times, errors, "None", True)
#times = {lang : [means[lang][b] for b in benchmarks if b != "arith"] for lang in means if lang != "Grain"}
bar_plot(ax, times, errors=errors)
ax.set_title("Execution time")
ax.set_xticks([])
ax.set_ylabel("relative time")

ax = axs[1]
sizes = {version: [data[test][version]["filesize"] for test in tests] for version in versions}
handlers, labels = bar_plot(ax, sizes, legend=True)
ax.set_title("Filesize")
ax.set_xticks(range(len(tests)))
ax.set_xticklabels(tests, rotation=90)
ax.set_ylabel("size (bytes)");

fig.legend(handlers, labels, loc='center left');
plt.subplots_adjust(left=0.15)

Optimisations at both the WebAssembly and IR level reduce filesize across the board. TODO: Average percentage/range?  
In many cases, execution time is significantly reduced by the IR level optimisations, although the WebAssembly optimisations have little impact on performance, possibly making it slightly worse in some cases but within 1 standard deviation of the original execution time. The are a couple of tests where execution time is not improved, `nbody` is an imperative style program performing floating point calculations.

In [None]:
ratio = [1-sizes["All"][i] / sizes["None"][i] for i in range(len(tests))]
print(min(ratio), max(ratio))
print(np.mean(ratio))

In [None]:
ratio = [1-times["All"][i] / times["None"][i] for i in range(len(tests))]
print(min(ratio), max(ratio))
print(np.mean(ratio))

Comparing all optimisations vs none:  
The difference in filesize varies from -14% to -39%, with an average of -23%.  
Execution time at best decreases by 90%, but in the worst example only increases by 2.2%, and it decreases in the majority of cases, particularly the more functional style programs.  
TODO: Are such figures meaningful for hand picked examples? Particularly the average.

## Specific IR optimisations (inlining, tail calls, uncurrying)

## Phase order and number of iterations

In [None]:
data = {}
labels = ["0", "1", "2", "3", "4", "5"]
# Check file to see formatting
with open("../../wasm-of-ocaml/benchmarks/evaluation/iterations.txt") as f:
    for i in labels:
        f.readline()
        line = f.readline().strip().split()
        while line != []:
            if line[0] not in data:
                data[line[0]] = {}
            data[line[0]][i] = \
              {"time" : float(line[1]), "error" : float(line[2]), "heap": float(line[3]), "filesize" : float(line[4])}
            line = f.readline().strip().split()
tests = data.keys()

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(12,10))
ax = axs[0]
times = {i: [data[test][i]["time"] for test in tests] for i in labels}
errors = {i: [data[test][i]["error"] for test in tests] for i in labels}
times, errors = normalise(times, errors, "0", True)
#times = {lang : [means[lang][b] for b in benchmarks if b != "arith"] for lang in means if lang != "Grain"}
bar_plot(ax, times, errors=errors)
ax.set_title("Execution time")
ax.set_xticks([])
ax.set_ylabel("relative time")

ax = axs[1]
mem = {i: [data[test][i]["heap"] for test in tests] for i in labels}
handlers, labels = bar_plot(ax, mem, legend=True)
ax.set_title("Heap usage")
ax.set_xticks([])
ax.set_yscale("log")
ax.set_ylabel("Memory (bytes)")

ax = axs[2]
sizes = {i: [data[test][i]["filesize"] for test in tests] for i in labels}
handlers, labels = bar_plot(ax, sizes, legend=True)
ax.set_title("Filesize")
ax.set_xticks(range(len(tests)))
ax.set_xticklabels(tests, rotation=90)
ax.set_ylabel("size (bytes)");

fig.legend(handlers, labels, loc='center left');
plt.subplots_adjust(left=0.15)

From these plots we see that performance rarely gets worse with more iterations, and that further improvement after three iterations only happens in a couple instances. Where there are improvements from performing more than three iterations, these tend to be much smaller than the improvements already achieved, hence 3 iterations is chosen as the number of times to run the set of optimisation passes for all other analysis.  

Performing multiple passes of the iterations reduces the significance of the ordering of passes. Still, the more complex optimisations such as inlining or uncurrying functions tend to rely on some values being propagated through the code initially produced. As such, these information propagation passes such as CSE and constant porpagation are run first.  
Functions are then inlined where beneficial, and those functions which are fully applied but are not selected to be inlined are then uncurried. Lastly, dead assignment elimination removes any now useless definitions and tail call optimisation attempts to optimise suitable recursive functions that have not been removed.  

Inlining in particular reveals new opportunities to propagate values, hence the benefit of running iterations in multiple passes.