In [9]:
import yaml
import os
import logging
import pprint
import plotly
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import psutil


log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
log.propagate = False
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# create formatter and add it to the handlers
ch.setFormatter(logging.Formatter('%(levelname)s - %(message)s'))
# add the handlers to the logger
if (log.hasHandlers()):
    log.handlers.clear()
log.addHandler(ch)
# "application" code
log.debug("debug message")
log.info("info message")
log.warn("warn message")
log.error("error message")
log.critical("critical message")

AttributeError: 'Logger' object has no attribute 'hasHandlers'

In [None]:
# Parse experiment yaml file
experiments_path="../experiments/regression_test.yaml"

# Get experiment information from yaml file.
experiment_params = yaml.load(open(experiments_path))

regression_tests_dir = os.path.expandvars(experiment_params['regression_tests_dir'])
params_dir = os.path.expandvars(experiment_params['params_dir'])
dataset_dir = os.path.expandvars(experiment_params['dataset_dir'])
executable_path = os.path.expandvars(experiment_params['executable_path'])

datasets_to_run = experiment_params['datasets_to_run']
regression_params = experiment_params['regression_parameters']

# Build dictionary from parameter name to list of parameter values
param_name_to_values = dict()
for regression_param in regression_params:
    param_name_to_values[regression_param['name']] = regression_param['values']

In [None]:
# Retrieve stats, if they are not there, try to collect them:
full_stats_path = os.path.join(regression_tests_dir, "all_stats.yaml")
stats = dict()
if os.path.isfile(full_stats_path):
    log.info("Found existent stats. Opening full stats from:" + full_stats_path)
    stats = yaml.load(open(full_stats_path))
else:
    log.info("Collecting full stats.")
    # Collect all yaml results for a given parameter name:
    for regression_param in regression_params:
        # Redirect to param_name_value dir param_name = regression_param['name']
        param_name = regression_param['name']
        stats[param_name] = dict()
        for param_value in regression_param['values']:
            results_dir = os.path.join(regression_tests_dir, param_name, str(param_value))
            # Redirect to modified params_dir
            params_dir = os.path.join(results_dir, 'params')
            stats[param_name][param_value] = dict()
            for dataset in datasets_to_run:
                dataset_name = dataset['name']
                pipelines_to_run = dataset['pipelines']
                stats[param_name][param_value][dataset_name] = dict()
                for pipeline in pipelines_to_run:
                    results_file = os.path.join(results_dir, dataset_name, pipeline, "results.yaml")
                    if os.path.isfile(results_file):
                        stats[param_name][param_value][dataset_name][pipeline] = yaml.load(open(results_file,'r'))
                    else:
                        log.warning("Could not find results file: {}. Adding cross to boxplot...".format(results_file))
                        stats[param_name][param_value][dataset_name][pipeline] = False

    # Save all stats in regression tests root directory for future usage.
    with open(full_stats_path, 'w') as outfile:
        outfile.write(yaml.dump(stats))
    
    # Push to the cloud?!

In [None]:
# Display plots for that result
# pprint.pprint(stats)

In [None]:


def plot(x_data, y_data):
    colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)',
              'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)']

    fig = go.Figure()

    for xd, yd in zip(x_data, y_data):
            fig.add_trace(go.Box(
                y=yd,
                name=xd,
                boxpoints='all',
                jitter=0.5,
                whiskerwidth=0.2,
                marker_size=2,
                line_width=1)
            )

    fig.update_layout(
        title='Parameter: ' + param_name + ', dataset: ' + dataset_name,
        yaxis=dict(
            autorange=True,
            showgrid=True,
            zeroline=True,
            dtick=5,
            gridcolor='rgb(255, 255, 255)',
            gridwidth=1,
            zerolinecolor='rgb(255, 255, 255)',
            zerolinewidth=2,
        ),
        margin=dict(
            l=40,
            r=30,
            b=80,
            t=100,
        ),
        paper_bgcolor='rgb(243, 243, 243)',
        plot_bgcolor='rgb(243, 243, 243)',
        showlegend=False
    )

    fig.show()
    
def plot2():
    colors = ['#3D9970',  '#FF4136', '#FF851B']
    
    names = ['S', 'SP', 'SPR']
    
    x_data = [1, 1, 1, 1, 1, 1,
              2, 2, 2, 2, 2, 2]
    
    y_data= [[0.2, 0.2, 0.6, 1.0, 0.5, 0.4, 0.2, 0.7, 0.9, 0.1, 0.5, 0.3],
            [0.6, 0.7, 0.3, 0.6, 0.0, 0.5, 0.7, 0.9, 0.5, 0.8, 0.7, 0.2],
            [0.1, 0.3, 0.1, 0.9, 0.6, 0.6, 0.9, 1.0, 0.3, 0.6, 0.8, 0.5]]

    fig = go.Figure()
    for i in xrange(len(names)):
        fig.add_trace(go.Box(
            y=y_data[i],
            x=x_data,
            name=names[i],
            marker_color=colors[i]
        ))

    fig.update_layout(
        yaxis_title='ATE errors',
        boxmode='group' # group together boxes of the different traces for each value of x
    )
    fig.show()

In [None]:
# Store stats in a tidy Pandas DataFrame # TODO(Toni): this should be done in the evaluation_lib.py script...
stats_list = []
for param_name in stats:
    for param_value in stats[param_name]:
        for dataset_name in stats[param_name][param_value]:
            for pipeline in stats[param_name][param_value][dataset_name]:
                result = stats[param_name][param_value][dataset_name][pipeline]
                if result != False:
                    result = result['absolute_errors'].np_arrays['error_array']
                    stats_list.append([param_name, param_value, dataset_name, pipeline, result])

df = pd.DataFrame.from_records(stats_list)
df.columns = ['Param Name', 'Param Value', 'Dataset Name', 'Pipe Type', 'ATE errors']
df.set_index(['Param Name', 'Dataset Name'], inplace = True)
df

In [None]:
def boxplot(param_name, dataset_name, tidy):
    tidy.set_index(['Param Value', 'Pipe Type'], inplace = True)
    tidy_2 = tidy['ATE errors'].apply(lambda x: pd.Series(x)).stack().reset_index(level=2, drop=True).to_frame('ATE errors')
    tidy_2.reset_index(level=['Pipe Type', 'Param Value'], drop=False, inplace=True)
    tidy_2.rename(columns={'Param Value': param_name}, inplace=True)
    fig = px.box(tidy_2, x=param_name, y="ATE errors", points="all", color="Pipe Type")

    fig.update_layout(
    title=go.layout.Title(
        text="Dataset: " + dataset_name
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text=param_name
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text="ATE [m]"
            )
        )
    )
    #plotly.offline.plot(fig, filename='regression_test_' + dataset_name + '_' + param_name + '.html')
    return fig

In [None]:
# Generate figures
figures = [boxplot(x, y, df.loc[x].loc[y]) for x in df.index.levels[0] for y in df.index.levels[1]]

In [None]:
# Show figures
for figure in figures:
    figure.show()

In [None]:
import plotly.io as pio
pio.orca.status
plotly.io.orca.config.executable = 'venv/bin/orca-server'

In [None]:
# Save figures
if not os.path.exists("figures"):
    os.mkdir("figures")

#for figure in figures:
#    figure.write_image("figures/"+ figure.layout.title.text + ".svg")