In [None]:
%config InlineBackend.figure_format="svg"

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
data = pd.read_csv("results.csv")

In [None]:
def get_means_stds(df, xcol, ycol, conf_interval, num_runs):
    assert len(df) % num_runs == 0, "Wrong number of runs"
    
    x_df, y_df = df[xcol].to_numpy(), df[ycol].to_numpy()
    x, y_means, y_stds = [], [], []
    for i in range(int(len(df)/num_runs)):
        x.append(x_df[i*num_runs])
        y_means.append(np.mean(y_df[i*num_runs:(i+1)*num_runs]))
        std = np.std(y_df[i*num_runs:(i+1)*num_runs])
        interval = std * conf_interval / np.sqrt(num_runs)
        y_stds.append(interval)
        
    x = np.array(x)
    y_means = np.array(y_means)
    y_stds = np.array(y_stds)
    
    return x, y_means, y_stds

def plot(ax, x, y_mean, y_cf, label, linestyle="-", marker="o", color="blue"):
    ax.plot(x, y_mean, label=label, linestyle=linestyle, color=color)
    ax.fill_between(x, y_mean - y_cf, y_mean + y_cf, alpha=0.2, color=color)
    
def plot_slice(df, slice_dict, xcol, ycol, conf_interval=1, num_runs=10, xscale="linear", yscale="linear"):
    # Get df slice
    for key in slice_dict:
        df = df[df[key] == slice_dict[key]]
        
    # Get x and y
    x, y_means, y_ci = get_means_stds(df, xcol, ycol, conf_interval, num_runs)
    
    # Plot data
    fig, ax = plt.subplots(figsize=(12, 6))
    plot(ax, x, y_means, y_ci, "")
    plt.show()

In [None]:
slice_dict = {"TotalSize": 1000000, "NumBuckets": 100}
plot_slice(data, slice_dict, "ProcessCount", "TotalTime", 10)