# Getting Started

The following tutorials might be interesting for you:
+ [jupyter tutorial](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)
+ [pandas tutorial](https://pandas.pydata.org/pandas-docs/stable/10min.html)

We further provide a short tutorial video in the next input box. You might want to check the help section in MACI.

Otherwise, click `Cell`->`Run All` to start the interactive analysis.

In [None]:
# Select the next input box and click `Run` to start the video.
from IPython.display import VimeoVideo
VimeoVideo(253681625)

# Experiment Analysis

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import math
from pandas import Series, DataFrame
from pandas.io.json import json_normalize
import json
import random
from textwrap import wrap
from IPython.core.display import HTML
import matplotlib.pyplot as plt
from copy import deepcopy
from ipywidgets import interact, interactive, fixed, Output
import ipywidgets as widgets
from IPython.display import display, clear_output
from matplotlib.colors import Colormap

import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [None]:
df = pd.read_csv('data.csv', sep=';', header=0) # TODO fix unnamed column

#
# Uncomment to sanitize columns
#
#df["value"] = df.apply (lambda row: float(row["value"].replace(",", ".")) if isinstance(row["value"], basestring) else row["value"],axis=1)
#df["variance_bw"] = df.apply (lambda row: float(row["variance_bw"].replace(",", ".")) if isinstance(row["variance_bw"], basestring) else row["variance_bw"],axis=1) 
#df["mean_bw"] = df.apply (lambda row: float(row["mean_bw"].replace(",", ".")) if isinstance(row["mean_bw"], basestring) else row["mean_bw"],axis=1)
#

raw_metadata = json.loads(open('metadata.json').read())

experimentParams = [param['Name'] for param in raw_metadata['Parameters']]
# In most scenarios, we are not interested in the instanceId, but it takes much time... so we remove it
experimentParams.remove('simInstanceId')
targetMetrics = df.key.unique().tolist()

def parameterValuesOf(parameterName):
    return df[parameterName].unique().tolist()

def unitOf(parameterName):
    return next((p['Unit'] for p in raw_metadata['Parameters'] if p['Name'] == parameterName), None)

configurationParameters = [p['Name'] for p in raw_metadata['Parameters'] if p['Purpose'] == 0]
environmentParameters = [p['Name'] for p in raw_metadata['Parameters'] if p['Purpose'] == 1]

# Cleanup
Optional data cleanup (e.g., to improve performance)

In [None]:
def remove_key(key):
    global df
    len_before = len(df)
    df = df[df["key"] != key]
    display("Length before " + str(len_before) + " and now " + str(len(df)))

#for key in ['...', '...']
#    remove_key(key)

display("Remaining targets " + str(df["key"].unique()))

## Coefficient of Variation
If you set the experiments to be run more than once, you can use this analysis to find out how much the results of your experiments disperse. The lower the CoV, the closer were the results of experiments with identical parameters.

Use the threshold slider for filtering.

In [None]:
def variance(threshold = 0.1):
    rows = []
    for metric in targetMetrics:
        for name, group in df.where(df['key'] == metric).groupby(experimentParams):
            row = {}
            for index, param in enumerate(experimentParams):
                row[param] = name[index]
            
            row['metric'] = metric
            row['min'] = group['value'].min()
            row['max'] = group['value'].max()
            row['span'] = row['max'] - row['min']
            row['std'] = group['value'].std()
            row['mean'] = group['value'].mean()
            row['cov'] = row['std'] / row['mean']
            
            rows.append(row)

    vdf = pd.DataFrame(rows, columns=experimentParams + ['metric', 'min', 'mean', 'max', 'span', 'std', 'cov'])
    return vdf[vdf['cov'] >= threshold]

#
# Uncomment to use
#
#interact(variance, threshold=(0, 1, 0.01))

# Time Series

In [None]:
from collections import defaultdict

def timeAggregater(data, scale):
    data = data.sort("offset")

    lastValueForExperimentDict = {key: 0 for key in data["simInstanceId"].unique().tolist()}
    lastOffset = data["offset"].min()
    timeList = []
    valueList = []
    stdList = []

    for row in data.iterrows():
        simInstanceId = row[1]["simInstanceId"]
        offset = row[1]["offset"]
        value = scale(row[1]["value"])

        if lastOffset != offset:
            timeList.append(lastOffset)
            valueList.append(np.mean(lastValueForExperimentDict.values()))
            stdList.append(np.std(lastValueForExperimentDict.values()))

        lastOffset = offset
        lastValueForExperimentDict[simInstanceId] = value
    return (timeList, valueList, stdList)

def timeplot(groupBy, target, filters, pipe=False, callback=None, scaleX=lambda x: x):
    df1 = df[df['key'] == target]
    for paramName, enabledValues in filters.items():
        df1 = df1[df1[paramName].isin(enabledValues)]
   
    param = "param"
    df1[param] = df1.filter([x for x in experimentParams if x not in groupBy]).apply(lambda row: np.array2string(row, separator=', ').strip('[]'), axis=1)
    params = sorted(df1[param].unique().tolist())
    fig = plt.gcf()
    fig.set_size_inches(6, 3)
    
    for cfgValue in params:
        (x, y, std) = timeAggregater(df1[df1[param] == cfgValue], scaleX)
        line = plt.plot(x, y, label = cfgValue)
        if pipe:
            plt.fill_between(x, [a_i + b_i for a_i, b_i in zip(y, std)], [a_i - b_i for a_i, b_i in zip(y, std)], color=line[0].get_color(), alpha=0.1)

    plt.suptitle("")
    plt.title(str(target))
    ax = plt.subplot(111)
    ax.legend(ncol=1, loc='center left', bbox_to_anchor=(1, 0.5))
    plt.xlabel("Time")
        
    if callback:
        callback()
    
    plt.tight_layout()

    fileName = 'fig_' + str(random.randint(0, 99999999)).zfill(8) + '.pdf'
    plt.savefig(fileName)
    display(HTML('<a href="' + fileName + '">download pdf</a>'))
    
    rnd_name = str(random.randint(0, 99999999)).zfill(8)
    display(HTML('<p>Copy this to tune this Figure.</p><textarea>'
                 + 'def fig_time_' + rnd_name + '():\n\tpass\n\t#plt.title("Test")\n'
                 + 'def scale_' + rnd_name + '(y):\n\treturn y\n' 
                 + 'timeplot(' + str(groupBy) + ', "' + str(target) + '", ' + str(filters) + ',' + str(pipe) + ', ' + 'fig_time_' + rnd_name + ', ' + 'scale_' + rnd_name + ')</textarea>'))
    
filterSelectsTime = []
for parameter in experimentParams:
    values = parameterValuesOf(parameter)
    widget = widgets.SelectMultiple(options=values, value=values, description='{0}'.format(parameter))
    filterSelectsTime.append(widget)

hboxesTime = [widgets.HBox(selects) for selects in [filterSelectsTime[i:i + 3] for i in range(0, len(filterSelectsTime), 3)]]    
    
groupSelectTime = widgets.SelectMultiple(options=experimentParams, value=experimentParams, description='Group By')
targetSelectTime = widgets.Select(options=targetMetrics, description='Target')

containerTime = widgets.VBox(hboxesTime + [widgets.HBox([groupSelectTime, targetSelectTime])])
plotButtonTime = widgets.Button(description="Plot")
clearButtonTime = widgets.Button(description="Clear")
pipeToggleTime = widgets.Checkbox(value=False, description='Pipe', icon='check')

plotButtonTime.on_click(lambda b: timeplot(groupSelectTime.value, targetSelectTime.value, {s.description: s.value for s in filterSelectsTime}, pipeToggleTime.value))
clearButtonTime.on_click(lambda b: clear_output(wait=False))

display(containerTime, plotButtonTime, clearButtonTime, pipeToggleTime)

## Boxplot analysis
Use Ctrl+Click to select the parameters you want to activate and group by.

In [None]:
def boxplot(groupBy, target, filters, details, callback=None):
    # Fix to enable clearoutput https://github.com/jupyter-widgets/ipywidgets/issues/1744 
    display(out)
    with out:
        df1 = df[df['key'] == target]
        for paramName, enabledValues in filters.items():
            df1 = df1[df1[paramName].isin(enabledValues)]
        plot = df1.boxplot(by=list(groupBy), column='value')
        plot.set_xticklabels(plot.xaxis.get_majorticklabels(), rotation=90)

        if details:
            plt.suptitle("")
            plt.title("\n".join(wrap("Target: " + str(target) + " Filter:" + str(filters) + " Group:" + str(groupBy), 100)))
        else:
            plt.suptitle("")
            plt.title(str(target))

        if callback:
            callback()

        plt.tight_layout()

        fileName = 'fig_' + str(random.randint(0, 99999999)).zfill(8) + '.pdf'
        plt.savefig(fileName)
        display(HTML('<a href="' + fileName + '">download pdf</a>'))

        rnd_name = 'fig_boxplot_' + str(random.randint(0, 99999999)).zfill(8)
        display(HTML('<p>Copy this to tune this Figure.</p><textarea>def ' + rnd_name + '():\n\tprint "Called"\n\tplt.title("Test")\nboxplot(' + str(groupBy) + ', "' + str(target) + '", ' + str(filters) + ', False, ' + rnd_name + ')</textarea>'))

        plt.show()
    
filterSelectsBpa = []
for parameter in experimentParams:
    values = parameterValuesOf(parameter)
    widget = widgets.SelectMultiple(options=values, value=values, description='{0}'.format(parameter))
    filterSelectsBpa.append(widget)

hboxesBpa = [widgets.HBox(selects) for selects in [filterSelectsBpa[i:i + 3] for i in range(0, len(filterSelectsBpa), 3)]]    
    
groupSelectBpa = widgets.SelectMultiple(options=experimentParams, value=experimentParams, description='Group By')
targetSelectBpa = widgets.Select(options=targetMetrics, description='Target')

containerBpa = widgets.VBox(hboxesBpa + [widgets.HBox([groupSelectBpa, targetSelectBpa])])
plotButtonBpa = widgets.Button(description="Plot")
clearButtonBpa = widgets.Button(description="Clear")
detailsToggleBpa = widgets.Checkbox(value=False, description='Details', icon='check')

plotButtonBpa.on_click(lambda b: boxplot(groupSelectBpa.value, targetSelectBpa.value, {s.description: s.value for s in filterSelectsBpa}, detailsToggleBpa.value))
clearButtonBpa.on_click(lambda b: out.clear_output(wait=False))

display(containerBpa, plotButtonBpa, clearButtonBpa, detailsToggleBpa)

out = Output()

## Target in Environment Comparison

In [None]:
import time

def metricPlotter(environmentParam, targetMetric, filters, pipe=False, callback=None):
   df1 = df[df['key'] == targetMetric]
   for paramName, enabledValues in filters.items():
       df1 = df1[df1[paramName].isin(enabledValues)]
   targetMetricDf = df1

   configurationParam = "configurationParam"
   targetMetricDf[configurationParam] = targetMetricDf.filter(configurationParameters).apply(lambda row: np.array2string(row, separator=', ').strip('[]'), axis=1)
   relevantCfgValues = sorted(targetMetricDf[configurationParam].unique().tolist())

   fig = plt.gcf()
   fig.set_size_inches(6, 3)

   for cfgValue in relevantCfgValues:
       tmp = targetMetricDf[targetMetricDf[configurationParam] == cfgValue].groupby([configurationParam, environmentParam]).mean().reset_index()
       tmp_std = targetMetricDf[targetMetricDf[configurationParam] == cfgValue].groupby([configurationParam, environmentParam]).std().reset_index()
       linetype = "-"
       color = 'red'
       if "DASH.JS" in cfgValue:
           linetype = '--'
           color = 'blue'
       if "APlayer" in cfgValue:
           linetype = ':'
           color = 'green'
       line = plt.plot(tmp[environmentParam], tmp["value"], label = cfgValue, linestyle=linetype)
       if pipe:
           plt.fill_between(tmp_std[environmentParam], tmp["value"]+tmp_std["value"], tmp["value"]-tmp_std["value"], color=line[0].get_color(), alpha=0.1)

   # Shrink current axis by 20%
   ax = plt.subplot(111)
   #ax.set_color_cycle(palettable.colorbrewer.qualitative.Dark2_8.mpl_colors)
   plt.subplots_adjust(right=1)
   box = ax.get_position()
   ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
   xunit = unitOf(environmentParam)
   yunit = unitOf(targetMetric)
   #legendtitle = np.array2string(list(targetMetricDf.filter(configurationParameters).columns), separator=', ').strip('[]')
   # Put a legend to the right of the current axis
   ax.legend(ncol=1, loc='center left', bbox_to_anchor=(1, 0.5))
   plt.xlabel('{0} ({1})'.format(environmentParam, xunit) if xunit else environmentParam)
   plt.ylabel('{0} ({1})'.format(targetMetric, yunit) if yunit else targetMetric)

   if callback:
       callback()

   plt.tight_layout()

   fileName = 'fig_{}.pdf'.format(time.strftime("%Y%m%d-%H%M%S"))
   plt.savefig(fileName, bbox_inches='tight')
   display(HTML('<a href="' + fileName + '">download pdf</a>'))

   rnd_name = 'fig_metricPlotter_' + str(random.randint(0, 99999999)).zfill(8)
   display(HTML('<p>Copy this to tune this Figure.</p><textarea>def ' + rnd_name + '():\n\tprint "Called"\n\tplt.title("Test")\nmetricPlotter("' + str(environmentParam) + '", "' + str(targetMetric) + '", ' + str(filters) + ', ' + rnd_name + ')</textarea>'))

filterSelectsMp = []
for parameter in experimentParams:
   values = parameterValuesOf(parameter)
   widget = widgets.SelectMultiple(options=values, value=values, description='{0}'.format(parameter))
   filterSelectsMp.append(widget)

hboxesMp = [widgets.HBox(selects) for selects in [filterSelectsMp[i:i + 3] for i in range(0, len(filterSelectsMp), 3)]]    
env1SelectMp = widgets.Dropdown(options=environmentParameters, value=environmentParameters[0], description='Environment')
targetSelectMp = widgets.Dropdown(options=targetMetrics, description='Target Metric')

containerMp = widgets.VBox(hboxesMp + [widgets.HBox([env1SelectMp]), widgets.HBox([targetSelectMp])])
plotButtonMp = widgets.Button(description="Plot")
clearButtonMp = widgets.Button(description="Clear")
pipeToggleBpa = widgets.Checkbox(value=False, description='Pipe', icon='check')

plotButtonMp.on_click(lambda b: metricPlotter(env1SelectMp.value, targetSelectMp.value, {s.description: s.value for s in filterSelectsMp}, pipeToggleBpa.value))
clearButtonMp.on_click(lambda b: clear_output(wait=False))

display(containerMp, plotButtonMp, clearButtonMp, pipeToggleBpa)

## Best configuration
You can use this analysis to find which value of a configuration parameter was the best for 2 environment parameters.
Note that this analysis will fail if you don't have at least 2 environment parameters.

The colored dots denote the best configuration value for experiments from your data set while the colored areas are estimations based on the classifier you chose.

In [None]:
import sklearn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

classifiers = {
    "KNeighborsClassifier(3, weights='distance')": KNeighborsClassifier(3, weights='distance'),
    'GaussianNB': GaussianNB(),
    'MLPClassifier(alpha=1)': MLPClassifier(alpha=1),
    'DecisionTree(max_depth=5)': DecisionTreeClassifier(max_depth=5),
    'SVC(gamma=2, C=1)': SVC(gamma=2, C=1),
    'GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True)': GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
}

def bestConfigScatter(clfName, configurationParams, environmentParams, targetMetric, filters, smallestIsBest=False, callback=None):
    assert len(environmentParams) == 2
    subset = df[df['key'] == targetMetric]
    for paramName, enabledValues in filters.items():
        subset = subset[subset[paramName].isin(enabledValues)]
    
    # add a column "configurationParam" which is a string concat of all configurationParams
    configurationParam = "configurationParam"
    subset[configurationParam] = subset.apply(lambda row: "_".join([s.strip() for s in row[list(configurationParams)].to_string(header=False, index=False).split('\n')]), axis=1)
    subset = subset.groupby(environmentParams + [configurationParam]).mean().reset_index()
    
    subset = subset.sort_values(by='value', ascending=smallestIsBest).groupby(environmentParams).first().reset_index()
    
    colors = plt.cm.get_cmap('Set3')
    
    relevantCfgValues = sorted(subset[configurationParam].unique().tolist())
    
    xorder = math.ceil(subset[environmentParams[0]].max() / 10.0)
    yorder = math.ceil(subset[environmentParams[1]].max() / 10.0)
    xmin, xmax = subset[environmentParams[0]].min() - xorder, subset[environmentParams[0]].max() + xorder
    ymin, ymax = subset[environmentParams[1]].min() - yorder, subset[environmentParams[1]].max() + yorder
    
    xstep = xorder / 10.0
    ystep = yorder / 10.0
    xx, yy = np.meshgrid(np.arange(xmin, xmax, xstep), np.arange(ymin, ymax+ystep, ystep))
    
    clf = sklearn.base.clone(classifiers[clfName])
    clf.fit(subset.ix[:, environmentParams], subset[configurationParam])
    
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    colorSelector = np.vectorize(lambda x: 0 if len(relevantCfgValues) == 1 else float(relevantCfgValues.index(x)) / float((len(relevantCfgValues) - 1)))
    Z = colorSelector(Z)
    Z = Z.reshape(xx.shape)
    plt.pcolormesh(xx, yy, Z, cmap=colors)
    
    for cfgValue in relevantCfgValues:
        idx = (0 if len(relevantCfgValues) == 1 else float(relevantCfgValues.index(cfgValue)) / float((len(relevantCfgValues) - 1)))
        color = colors(idx)
        subsubset = subset[subset[configurationParam] == cfgValue]
       
        for i in range(len(subsubset)):
            plt.scatter(subsubset[environmentParams[0]].values[i], subsubset[environmentParams[1]].values[i], s=100, c=color, linewidth=1, edgecolor='black', label=cfgValue if i == 0 else "")
            # uncomment to have annotations
            #ax.annotate(cfgValue, (subsubset[environmentParams[0]].values[i],subsubset[environmentParams[1]].values[i]))
    
    lgd = plt.legend(framealpha=0.1, scatterpoints=1, bbox_to_anchor=(2.0, 0.5));
    plt.title('Best {0} by {1}'.format(targetMetric, configurationParam))
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    xunit = unitOf(environmentParams[0])
    plt.xlabel('{0} ({1})'.format(environmentParams[0], xunit) if xunit else environmentParams[0])
    yunit = unitOf(environmentParams[1])
    plt.ylabel('{0} ({1})'.format(environmentParams[1], yunit) if yunit else environmentParams[1])
    plt.tight_layout()
    
    if callback:
        callback()
    
    fileName = 'fig_' + str(random.randint(0, 99999999)).zfill(8) + '.pdf'
    plt.savefig(fileName, bbox_extra_artists=(lgd,), bbox_inches='tight')
    display(HTML('<a href="' + fileName + '">download pdf</a>'))
    
    rnd_name = 'fig_metricPlotter_' + str(random.randint(0, 99999999)).zfill(8)
    display(HTML('<p>Copy this to tune this Figure.</p><textarea>def ' + rnd_name + 
                 '():\n\tprint "Called"\n\tplt.title("Test")\nbestConfigScatter("' + str(clfName) + '", ' + str(configurationParams) + ', ' + str(environmentParams) + ', "' + str(targetMetric) + '", ' + str(filters) + ', ' + str(smallestIsBest) + ',' + rnd_name + ')</textarea>'))

filterSelectsBc = []
for parameter in experimentParams:
    values = parameterValuesOf(parameter)
    widget = widgets.SelectMultiple(options=values, value=values, description='{0}'.format(parameter))
    filterSelectsBc.append(widget)

hboxesBc = [widgets.HBox(selects) for selects in [filterSelectsBc[i:i + 3] for i in range(0, len(filterSelectsBc), 3)]]    

classifierSelectBc = widgets.Dropdown(options=classifiers.keys(), value="KNeighborsClassifier(3, weights='distance')", description='Classifier')

configSelectBc = widgets.SelectMultiple(options=configurationParameters, value=configurationParameters, description='Config')
env1SelectBc = widgets.Dropdown(options=environmentParameters, value=environmentParameters[0], description='Environment X')
env2SelectBc = widgets.Dropdown(options=environmentParameters, value=environmentParameters[1], description='Environment Y')
targetSelectBc = widgets.Dropdown(options=targetMetrics, description='Target Metric')
orderToggleBc = widgets.Checkbox(value=False, description='Smallest target value is best', icon='check')

containerBc = widgets.VBox(hboxesBc + [widgets.HBox([configSelectBc, env1SelectBc, env2SelectBc]), widgets.HBox([targetSelectBc, orderToggleBc, classifierSelectBc])])
plotButtonBc = widgets.Button(description="Plot")
clearButtonBc = widgets.Button(description="Clear")

plotButtonBc.on_click(lambda b: bestConfigScatter(classifierSelectBc.value, configSelectBc.value, [env1SelectBc.value, env2SelectBc.value], targetSelectBc.value, {s.description: s.value for s in filterSelectsBc}, orderToggleBc.value))
clearButtonBc.on_click(lambda b: clear_output(wait=False))

display(containerBc, plotButtonBc, clearButtonBc)

## Pareto frontier

In [None]:
# TODO unify filter code

def is_pareto_front(df_target, row, xlabel, ylabel):
    x = row[xlabel]
    y = row[ylabel]

    # look for points with the same y value but larger x value
    is_max_x = df_target.loc[df_target[ylabel]==y].max()[xlabel] <= x
    # look for points with the same x value but larger y value
    is_max_y = df_target.loc[df_target[xlabel]==x].max()[ylabel] <= y
    # look for points that are larger in both x and y
    is_double = len(df_target.loc[(df_target[xlabel]>x) & (df_target[ylabel]>y)])==0

    return is_max_x and is_max_y and is_double

def paretoPlot(target_1, target_2, inverse_x, inverse_y, filters, details, callback=None):
    df_tmp = df
    for paramName, enabledValues in filters.items():
        df_tmp = df_tmp[df_tmp[paramName].isin(enabledValues)]
        
    df_first = df_tmp[df_tmp["key"] == target_1].groupby(configurationParameters).mean().reset_index()    
    df_second = df_tmp[df_tmp["key"] == target_2].groupby(configurationParameters).mean().reset_index()
    
    if inverse_x:
        df_first["value"] = df_first["value"] * (-1)
        
    if inverse_y:
        df_second["value"] = df_second["value"] * (-1)
    df_merged = pd.merge(df_first, df_second, on=configurationParameters)

    # array of True/False indicating whether the corresponding row is on the pareto frontier
    is_pareto = df_merged.apply(lambda row: is_pareto_front(df_merged, row, 'value_x', 'value_y'), axis=1)
    df_merged_pareto = df_merged.ix[is_pareto]
    df_merged_nonpareto = df_merged.ix[~is_pareto]
    
    display(str(len(df_merged_pareto)) + " of " + str(len(df_merged)) + " configurations are on the pareto front")
    
    plt.scatter(df_merged_nonpareto['value_x'], df_merged_nonpareto['value_y'], marker='o', color = "blue", label="non-pareto")
    plt.scatter(df_merged_pareto['value_x'], df_merged_pareto['value_y'], marker='v', color = "red", label="pareto") #, '--', label='Pareto frontier', alpha=0.5)
    plt.legend(bbox_to_anchor=(2, 0.5));
    plt.xlabel(target_1)
    plt.ylabel(target_2)
    
    if details:
        plt.title("\n".join(wrap(str(filters), 100)))
        plt.tight_layout()
    
    if callback:
        callback()

    fileName = 'fig_' + str(random.randint(0, 99999999)).zfill(8) + '.pdf'
    plt.savefig(fileName)
    display(HTML('<a href="' + fileName + '">download pdf</a>'))
    
    display(df_merged_pareto[(configurationParameters + ["value_x", "value_y"])])
    
    rnd_name = 'fig_metricPlotter_' + str(random.randint(0, 99999999)).zfill(8)
    display(HTML('<p>Copy this to tune this Figure.</p><textarea>def ' + rnd_name + 
                 '():\n\tprint "Called"\n\tplt.title("Test")\nparetoPlot("' + str(target_1) + '", "' + str(target_2) + 
                 '", ' + str(inverse_x) + ', ' + str(inverse_y) + ', ' + str(filters) + ', False,' + rnd_name + ')</textarea>'))


filterSelectsPf = []
for parameter in experimentParams:
    values = parameterValuesOf(parameter)
    widget = widgets.SelectMultiple(options=values, value=values, description='{0}'.format(parameter))
    filterSelectsPf.append(widget)

hboxesPf = [widgets.HBox(selects) for selects in [filterSelectsPf[i:i + 3] for i in range(0, len(filterSelectsPf), 3)]]    
containerPf = widgets.VBox(hboxesPf)

targetSelectXPf = widgets.Dropdown(options=targetMetrics, description='Target Metric X')
orderToggleXPf = widgets.Checkbox(value=False, description='Smallest target value is best', icon='check')
targetSelectYPf = widgets.Dropdown(options=targetMetrics, description='Target Metric Y')
orderToggleYPf = widgets.Checkbox(value=False, description='Smallest target value is best', icon='check')

plotButtonPf = widgets.Button(description="Plot")
clearButtonPf = widgets.Button(description="Clear")

detailsTogglePf = widgets.Checkbox(value=False, description='Details', icon='check')

plotButtonPf.on_click(lambda b: paretoPlot(targetSelectXPf.value, targetSelectYPf.value, orderToggleXPf.value, \
                                         orderToggleYPf.value, {s.description: s.value for s in filterSelectsPf}, detailsTogglePf.value))
clearButtonPf.on_click(lambda b: clear_output(wait=False))
display(containerPf, widgets.HBox([targetSelectXPf, orderToggleXPf]), widgets.HBox([targetSelectYPf, orderToggleYPf]), plotButtonPf, clearButtonPf, detailsTogglePf)

# Add Utility Values

In [None]:
def utility_helper_multiple_rows(key, row):
    global df
    tmp = df[df['key'] == key]
    tmp = tmp[tmp['simInstanceId'] == row["simInstanceId"].iloc[0]]
    return tmp["value"]

def pivot_data():
    global df
    global df_p

    df_p = df.pivot_table("value", ["simInstanceId"], "key", fill_value = 0).reset_index()
    display("Number of denormalized rows " + str(len(df)) + " and now " + str(len(df_p)))

def add_utility(name, func, update=False, show=False):
    global df
    global df_p

    pivot_data()

    if name in targetMetrics:
        display("The key " + str(name) + " is already present.")
        if update:
            df = df[df['key']!=name]
            targetMetrics.remove(name)
        else:
            return

    combinations = df.groupby(["simInstanceId"]).first().reset_index()
    new_rows = []

    for row in combinations.iterrows():
        row_p = df_p[df_p["simInstanceId"] == row[1]["simInstanceId"]]
        row[1]["key"] = name
        row[1]["offset"] = 0
        row[1]["value"] = func(row_p)
        new_rows.append(row[1])

    df = df.append(new_rows)
    targetMetrics.append(name)

    if show:
        display(df.filter(['player', 'value'])[df['key']==name].groupby('player').describe())

In [None]:
def utility_function_avg_stall(row):
    avg_stalling = 0

    stalling_sum = row['stalling_duration_sum'].mean()/1000.0
    stalling_count = row['stalling_event_count'].mean()
    avg_stalling = stalling_sum/stalling_count

    return avg_stalling

add_utility("avg_stall", utility_function_avg_stall, update=True, show=False)