In [31]:
import yaml
import os
import logging
import pprint
import plotly
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import psutil


log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
log.propagate = False
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# create formatter and add it to the handlers
ch.setFormatter(logging.Formatter('%(levelname)s - %(message)s'))
# add the handlers to the logger
if not log.handlers:
    log.addHandler(ch)

In [35]:
# Parse experiment yaml file
experiments_path="../experiments/regression_test.yaml"

# Get experiment information from yaml file.
experiment_params = yaml.load(open(experiments_path))

regression_tests_dir = os.path.expandvars(experiment_params['regression_tests_dir'])

datasets_to_run = experiment_params['datasets_to_run']
regression_params = experiment_params['regression_parameters']

In [36]:
# Retrieve stats, if they are not there, try to collect them:
full_stats_path = os.path.join(regression_tests_dir, "all_stats.yaml")
stats = dict()
if os.path.isfile(full_stats_path):
    log.info("Found existent stats. Opening full stats from:" + full_stats_path)
    stats = yaml.load(open(full_stats_path))
else:
    log.info("Collecting full stats.")
    # TODO(Toni): recollection of results should be automatic by looking for results.yaml files in the
    # regression_tests_dir file system.
    # Collect all yaml results for a given parameter name:
    for regression_param in regression_params:
        # Redirect to param_name_value dir param_name = regression_param['name']
        param_name = regression_param['name']
        stats[param_name] = dict()
        for param_value in regression_param['values']:
            results_dir = os.path.join(regression_tests_dir, param_name, str(param_value))
            # Redirect to modified params_dir
            params_dir = os.path.join(results_dir, 'params')
            stats[param_name][param_value] = dict()
            for dataset in datasets_to_run:
                dataset_name = dataset['name']
                pipelines_to_run = dataset['pipelines']
                stats[param_name][param_value][dataset_name] = dict()
                for pipeline in pipelines_to_run:
                    results_file = os.path.join(results_dir, dataset_name, pipeline, "results.yaml")
                    if os.path.isfile(results_file):
                        stats[param_name][param_value][dataset_name][pipeline] = yaml.load(open(results_file,'r'))
                    else:
                        log.warning("Could not find results file: {}. Adding cross to boxplot...".format(results_file))
                        stats[param_name][param_value][dataset_name][pipeline] = False

    # Save all stats in regression tests root directory for future usage.
    with open(full_stats_path, 'w') as outfile:
        outfile.write(yaml.dump(stats))
    
    # Push to the cloud?!

INFO - Collecting full stats.


In [37]:
# Store stats in a tidy Pandas DataFrame # TODO(Toni): this should be done in the evaluation_lib.py script...
stats_list = []
for param_name in stats:
    for param_value in stats[param_name]:
        for dataset_name in stats[param_name][param_value]:
            for pipeline in stats[param_name][param_value][dataset_name]:
                result = stats[param_name][param_value][dataset_name][pipeline]
                if result != False:
                    result = result['absolute_errors'].np_arrays['error_array']
                    stats_list.append([param_name, param_value, dataset_name, pipeline, result])

df = pd.DataFrame.from_records(stats_list)
df.columns = ['Param Name', 'Param Value', 'Dataset Name', 'Pipe Type', 'ATE errors']
df.set_index(['Param Name', 'Dataset Name'], inplace = True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Param Value,Pipe Type,ATE errors
Param Name,Dataset Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
smartNoiseSigma,V1_01_easy,2.0,S,"[0.10997670458005289, 0.10517572913663793, 0.1..."
smartNoiseSigma,MH_01_easy,2.0,S,"[0.2151444667711187, 0.2157468309814195, 0.216..."
smartNoiseSigma,V1_01_easy,3.0,S,"[0.10524378348464582, 0.10130184371967772, 0.0..."
smartNoiseSigma,MH_01_easy,3.0,S,"[0.2621647134833297, 0.26193382036350643, 0.26..."
smartNoiseSigma,V1_01_easy,3.2,S,"[0.09785664059397901, 0.09267139762164138, 0.0..."
smartNoiseSigma,MH_01_easy,3.2,S,"[0.22771362099988496, 0.22756626438640226, 0.2..."
smartNoiseSigma,V1_01_easy,2.2,S,"[0.15221248678273705, 0.14793509054390908, 0.1..."
smartNoiseSigma,MH_01_easy,2.2,S,"[0.20966290130595294, 0.20999767462721666, 0.2..."
smartNoiseSigma,V1_01_easy,3.4,S,"[0.10878469304380378, 0.09976441568897954, 0.0..."
smartNoiseSigma,MH_01_easy,3.4,S,"[0.22442579681956074, 0.2248948126062282, 0.22..."


In [47]:
def boxplot(param_name, dataset_name, tidy):
    tidy.set_index(['Param Value', 'Pipe Type'], inplace = True)
    tidy_2 = tidy['ATE errors'].apply(lambda x: pd.Series(x)).stack().reset_index(level=2, drop=True).to_frame('ATE errors')
    tidy_2.reset_index(level=['Pipe Type', 'Param Value'], drop=False, inplace=True)
    fig = px.box(tidy_2, x='Param Value', y="ATE errors", points="all", color="Pipe Type")

    fig.update_layout(
    title=go.layout.Title(
        text="Dataset: " + dataset_name
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text=param_name
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text="ATE [m]"
            ),
        rangemode='tozero'
        )
    )
    return fig

In [48]:
# Generate figures
figures = [boxplot(x, y, df.loc[x].loc[y]) for x in df.index.levels[0] for y in df.index.levels[1]]

In [50]:
# Show figures
for figure in figures:
    figure.show()

In [10]:
import plotly.io as pio
pio.orca.status
plotly.io.orca.config.executable = 'venv/bin/orca-server'

In [52]:
# Save figures
if not os.path.exists("figures"):
    os.mkdir("figures")
for fig in figures:
    plotly.offline.plot(fig, filename='figures/regression_test_' + fig.layout.title.text + '_' + fig.layout.xaxis.title.text + '.html')

#for figure in figures:
#    figure.write_image("figures/"+ figure.layout.title.text + ".svg")

In [51]:
# Save figures online
for fig in figures:
    plotly.online.plot(fig, filename='figures/regression_test_' + fig.layout.title.text + '_' + fig.layout.xaxis.title.text + '.html')

AttributeError: 'module' object has no attribute 'online'