In [None]:
%config InlineBackend.figure_format="svg"

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Preprocess data

In [None]:
# Calculate execution time gains for every (totalsize, number of buckets) group
def calculateGains(group):
    for col in metrics:
        oldstd = col + "_std"
        meancol = col + "_gain"
        stdcol = meancol + "_std"
        base = float(group[group["ProcessCount"] == 1][col])
        basestd = float(group[group["ProcessCount"] == 1][oldstd])
        group[meancol] = base / group[col]
        group[stdcol] = np.sqrt( (basestd / group[col])**2 + (base * group[oldstd] / group[col])**2 )
    return group

In [None]:
# Gather collected data
data = pd.read_csv("results.csv")

# Program parameters and metrics
parameters = ["TotalSize", "NumBuckets", "ProcessCount"]
metrics = ["ProcTime", "MsgTime", "WaitTime", "TotalTime"]

# Get means and stds of all runs
dataMeans = data.groupby(parameters).mean().reset_index()
dataStds = data.groupby(parameters).std().reset_index()

# Add everything to one dataframe
for col in metrics:
    newcol = col + "_std"
    dataMeans[newcol] = dataStds[col]
    
# Group dataframe by totalsize and number of buckets
# This is done to calculate the execution time gains for every of these groups
parameters.pop()
data = dataMeans.groupby(parameters).apply(calculateGains)
parameters.append("ProcessCount")
        
# Gather everything back
del dataMeans
del dataStds

# Plotting function

In [None]:
def plot(ax, x, y_mean, y_cf, label, linestyle="-", marker="o", color="blue"):
    # Plot the mean
    ax.plot(x, y_mean, label=label, linestyle=linestyle, color=color)
    # Fill between lower and upper confidence intervals
    ax.fill_between(x, y_mean - y_cf, y_mean + y_cf, alpha=0.2, color=color)
    
def plot_slice(df, slice_dict, xcol, ycol, conf_interval=1, num_runs=10, xscale="linear", yscale="linear"):
    # Get df slice according to slice_dict keys and values
    for key in slice_dict: df = df[df[key] == slice_dict[key]]
        
    # Get the values to plot
    x, y_means, y_stds = df[xcol], df[ycol], df[ycol + "_std"]
    y_ci = conf_interval * y_stds / np.sqrt(len(y_stds))
    
    # Plot data
    fig, ax = plt.subplots(figsize=(12, 6))
    plot(ax, x, y_means, y_ci, "")
    plt.xlabel(xcol)
    plt.ylabel(ycol)
    plt.show()

In [None]:
slice_dict = {"TotalSize": 10000000, "NumBuckets": 20}
plot_slice(dataMeans, slice_dict, "ProcessCount", "TotalTime_gain", 10)