# Visualizations in Plotly for the DMP Aspect Test Experiment

In [1]:
%matplotlib notebook

In [2]:
import sys
sys.path.append("../../")

from dmp.data.logging import _get_sql_engine
import pandas as pd
import numpy as np
import plotly
import plotly.express as px
from joblib import Memory
from ipywidgets import interact, interact_manual
import ipywidgets as widgets

import plotly.graph_objects as go

In [3]:
import scipy
import scipy.interpolate

In [4]:
db = _get_sql_engine()

memory = Memory(location="./cache", verbose=0)

def query(query_string):
    with db.connect() as engine:
        return pd.read_sql(query_string, engine)

cached_query = memory.cache(query)

def clear_cache():
    memory.clear()
    
options = {
    "dataset": ['529_pollen',
        'sleep',
#         'adult',
        '537_houses',
#         'nursery',
        '201_pol',
        'mnist',
        'connect_4',
        'wine_quality_white'],
    "agg": ["avg", "min", "max"],
    "topology" : [
        "rectangle", "trapezoid", "exponential",
        "wide_first_2x", "wide_first_4x", "wide_first"],
     "loss": ['loss', 'hinge', 'accuracy',
       'val_loss', 'val_hinge', 'val_accuracy',
       'squared_hinge', 'cosine_similarity',
       'val_squared_hinge', 'mean_squared_error',
       'mean_absolute_error', 'val_cosine_similarity',
       'val_mean_squared_error', 'root_mean_squared_error',
       'val_mean_absolute_error',
       'kullback_leibler_divergence',
       'val_root_mean_squared_error',
       'mean_squared_logarithmic_error',
       'val_kullback_leibler_divergence',
       'val_mean_squared_logarithmic_error'],
    "residual_mode": ["none", "full"],
    "group_select" : ["min", "max"],
    'depth':[2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20],
    'budget': [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
                32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304,
                8388608, 16777216, 33554432],
    'color_range': (1.05, 2.5, .05),
    'epoch_axis' : ['epoch', 'log_effort', 'effort'],
    'learning_rate': [0.001, 0.0001],
    'label_noise': [0.0, 0.05, 0.10, 0.15, 0.20],
    'groupname' : ['fixed_3k_1', 'fixed_3k_0', 'fixed_01', 'exp00', 'exp01']
    
}

import plotly.io as pio
pio.templates["dmp_template"] = go.layout.Template(
    layout=go.Layout(
        colorscale={
            'diverging':'Temps',
            'sequential': 'Viridis_r',
            'sequentialminus':'Viridis_r',
#             'sequential': 'Agsunset_r',
#             'sequentialminus':'Agsunset_r',
        }
    )
)
pio.templates.default = 'dmp_template'

In [5]:
clear_cache()



In [5]:
string_map_df = pd.read_sql(
        f'''SELECT id, value from strings''',
        db.execution_options(stream_results=True, postgresql_with_hold=True), coerce_float=False,
        params=())

string_to_id_map = {}
id_to_string_map = {}

values = string_map_df['value'].to_list()
for i, str_id in enumerate(string_map_df['id'].to_list()):
    string_to_id_map[values[i]] = str_id
    id_to_string_map[str_id] = values[i]
    
def string_to_id(s):
    if isinstance(s, str):
        return string_to_id_map[s]
    return [string_to_id(e) for e in s]

def id_to_string(i):
    if isinstance(i, int):
        return id_to_string_map[i]
    return [id_to_string(e) for e in i]

print(string_to_id_map)

{'exp00': 1309, '529_pollen': 1118, 'wine_quality_white': 1119, 'wide_first': 1120, 'exponential': 1123, 'trapezoid': 1124, 'full': 1125, 'adult': 1316, '537_houses': 1122, 'connect_4': 1126, 'mnist': 1127, 'nursery': 1327, 'sleep': 1131, 'exp01': 1328, 'fixed_3k_1': 1176, 'fixed_01': 1340, 'wide_first_4x': 1213, 'wide_first_2x': 1214, 'fixed_3k_0': 1112, '201_pol': 1113, 'rectangle': 1114, 'none': 1115, 'adam': 1116, 'relu': 1117}


In [8]:
def setup_value(df, loss, color_range):
    z_label = loss
    #     df["value"] = -np.log(np.minimum(df["value"], np.min(df["value"])*4))   
#     df["value"] = -np.log(df['value'] / np.min(df['value']))
#     df["value"] = -np.log(df['value'] / np.min(df['value']))
#     df["value"] = -(df['value'] / np.min(df['value']))

    minimizing = True
    if 'accuracy' in loss:
        df['value'] = 1 - df['value']
        df['value'] = np.minimum(np.min(df['value']) * color_range, df['value'])
        df["value"] = np.log(df['value'])/np.log(10)
        z_label = f'log(1-{loss})'
    elif 'loss' in loss:
#         df["value"] = -np.exp(1 - df['value']/np.min(df['value']))
        df['value'] = np.minimum(np.min(df['value']) * color_range, df['value'])
#         df["value"] = -np.log(df['value'])/np.log(10)
#         z_label = f'-log({loss})'
        
        df["value"] = df['value'] / np.abs(np.min(df['value']))
        z_label = f'loss / abs(min(loss))'
#         df["value"] = -np.log(df['value'] / np.min(df['value']))/np.log(10)
#         df["value"] = -df['value'] / np.min(df['value'])
    elif 'error' in loss:
        df["value"] = df['value'] / np.min(df['value'])
        df['value'] = np.minimum(color_range, df['value'])
        z_label = f'error / min(error)'
    
    if minimizing:
        best = np.nanmin(df['value'])
    else:
        best = np.nanmax(df['value'])
        
    return z_label, minimizing, best

def compute_effort(df):
    df["effort"] = (df["epoch"] * df["budget"].astype("float")).astype("float")
    df["log_effort"] = np.log(df["effort"]) / np.log(10)
    df['relative_effort'] = df['effort'] / np.min(df['effort'])
    df['relative_log_effort'] = np.log(df['relative_effort']) / np.log(10)


def get_values_for_categorical_keys(df, partition_keys):
    partitions = []
    for partition_key in partition_keys:
        partition_values = sorted(df[partition_key].unique())
        partitions.append(
            (partition_key,
            {key : index for index, key in enumerate(partition_values)},
            partition_values
            ))
    return tuple(partitions)

def partitioned_interpolation(df, partition_keys, interpolation_key, value_key, resolution):
    partitions = get_values_for_categorical_keys(df, partition_keys)
    
    def make_partition_accumulator(i):
        index = partitions[i][1]
        return [make_partition_accumulator(i + 1) if i < len(partitions) - 1 else ([], []) 
                for p in range(len(index))]
    acc = make_partition_accumulator(0)
    
    for _, row in df.iterrows():
        a = acc
        for partition_key, index, _ in partitions:
            a = a[index[row[partition_key]]]
        a[0].append(row[interpolation_key])
        a[1].append(row[value_key])

    interpolation_series = df[interpolation_key]
    interpolation_index = np.linspace(np.min(interpolation_series), np.max(interpolation_series), resolution)
    partition_indexes = [np.linspace(0, len(p), len(p)) for p in partitions]
    
    def do_interpolation(a):
        if type(a) is list:
            return [do_interpolation(p) for p in a]
        func = scipy.interpolate.interp1d(a[0], a[1], kind='linear', bounds_error=False, fill_value=np.NaN)
        return func(interpolation_index)            
    interpolated = np.array(do_interpolation(acc))
    print(f'interpolated {interpolated.shape}')
    return partitions, interpolation_index, interpolated


def make_2d_heatmap_viz(df, group, dataset, topology, loss, agg, residual_mode, viz, color_range):
    z_label, minimizing, best = setup_value(df, loss, color_range)
        
    if viz == "imshow":
        img = df.pivot_table(columns="epoch", index="budget", values="value")
        fig = px.imshow(img)
        fig.update_yaxes(type='category')
    elif viz == "scatter":
        df = df.sort_values(["epoch", "budget"], ascending=[True, False])
        df["budget"] = df["budget"].astype("str")
        df["epoch"] = df["epoch"].astype("str")
        fig = px.scatter(df, x="epoch", y="budget", size="count", color="value")
    elif viz == "effort":
        compute_effort(df)
        key = 'log_effort'
        x_res = 4000
        partitions, x_index, interpolated = partitioned_interpolation(df, ['budget'], key, 'value', x_res)
        fig= px.imshow(interpolated, aspect='auto', zmin=np.min(interpolated), zmax=np.max(interpolated),
            x = x_index,
            y = [str(b) for b in partitions[0][1]],
            labels=dict(x="log(Effort)", y="# Parameters", color=z_label),)
        fig.update_yaxes(type='category')
    else:
        return None
    fig.update_layout(title=f"{z_label} using {loss} for {dataset}, {topology}, residual {residual_mode}")
    return fig


In [38]:
@interact_manual(**options, viz=["volume", "scatter"])
def heatmap_app_3d(group='fixed_3k_1',
                   learning_rate=0.0001,
                   label_noise = 0.0,
                   dataset="201_pol", 
                   topology="rectangle",
                   residual_mode="none",
                   agg="avg", 
                   loss="val_loss", 
                   viz="volume", 
                   epoch_axis = 'epoch',
                   color_range=1.25):
    query_string = f'''
    select budget, depth, {agg}(a.val) as value, count(a.val) as count, a.epoch
    from
        materialized_experiments_3_base base,
        materialized_experiments_3_loss loss,
        unnest(loss.{loss}) WITH ORDINALITY as a(val, epoch)
    WHERE
        base.id = loss.id and 
        groupname = {string_to_id(group)} and
        dataset = {string_to_id(dataset)} and
        learning_rate = {learning_rate}::real and
        label_noise = {label_noise}::real and
        topology = {string_to_id(topology)} and
        residual_mode = {string_to_id(residual_mode)}
    GROUP BY budget, depth, epoch
    ORDER BY budget, depth, epoch;
    '''
#     df = cached_query(query_string).query("count >= 7")    
    df = cached_query(query_string)
    value_label, minimizing, best = setup_value(df, loss, color_range)    
    if 'effort' in epoch_axis:
        compute_effort(df)        
    
    if viz=="scatter":        
        return px.scatter_3d(df,
                    x='config.depth',
                    y='config.budget',
                    z=epoch_axis,
                    color='value',
                    log_y=True,
                    opacity=0.25)
    elif viz == "volume":
        x_res = 2000
        partition_keys = ['budget', 'depth']
        
        if 'effort' in epoch_axis:
            partitions, z_index, values = partitioned_interpolation(df, partition_keys, epoch_axis, 'value', x_res)
        else:
            partition_keys.append(epoch_axis)
            partitions = get_values_for_categorical_keys(df, partition_keys)
            z_index = np.array(list(partitions[2][1].keys()))
            values = np.empty((len(partitions[0][2]), len(partitions[1][2]), len(partitions[2][2])))
            values[:] = np.NaN
            for _, row in df.iterrows():
                values[partitions[0][1][row[partition_keys[0]]], 
                       partitions[1][1][row[partition_keys[1]]], 
                       partitions[2][1][row[partition_keys[2]]]] = row['value']
        
        max_value = np.nanmax(values)
        values = np.nan_to_num(values, copy=False, nan=max_value + 1e-12) # set NaNs to just over the max value
        
        # trim data to the visible range
        z_start = next((z for z in range(values.shape[2]) if np.any(values[:,:,z] < max_value)), 0)
        z_end = next((z for z in reversed(range(values.shape[2])) if np.any(values[:,:,z] < max_value))
                     , values.shape[2])
        values = values[:,:,z_start:z_end+1]
        z_index = z_index[z_start:z_end+1]
            
        print(f'value: {np.nanmin(values)} {np.nanmax(values)} z_start {z_start} z_end {z_end}')
        mesh = [dim.flatten() for dim in np.meshgrid(
            np.array(range(len(partitions[0][2]))),
            np.array(range(len(partitions[1][2]))),
            z_index,            
            indexing='ij'
            )]
        print(f'{np.min(mesh[0])} {np.max(mesh[0])} {np.min(mesh[1])} {np.max(mesh[1])} {np.min(mesh[2])} {np.max(mesh[2])}')
        fig = go.Figure(data=go.Volume(
            x=mesh[0],
            y=mesh[1],
            z=mesh[2],
            value=values.flatten(),
            opacity=1.0,
            isomin= np.nanmin(values),
            isomax= max_value - 1e-12,
            caps= dict(x_show=False, y_show=False, z_show=False),
            surface_count=5,
            opacityscale="min",
            colorscale="YlOrRd_r"
            ))
        
        print(z_index[0], z_index[-1])

        axis_data = [dict(ticktext=p[2],
                         tickvals=list(range(len(p[2]))),
                         title=p[0]) 
                     for p in partitions]

        fig.update_layout(
            scene = dict(
                xaxis = axis_data[0],
                yaxis = axis_data[1],
                zaxis = dict(
                    nticks = 10,
                    range=[z_index[0], z_index[-1]],
                    title=epoch_axis
                )
            ),
            width=950, height=950,
            title=f"{value_label} using {loss} for {dataset}, {topology}, residual {residual_mode}",
            )
        return fig



interactive(children=(Text(value='fixed_3k_1', description='group'), Dropdown(description='learning_rate', ind…

In [8]:
@interact_manual(**options, 
                 statistic=["epoch", "effort", "log_effort", 'relative_effort', 'relative_log_effort', 'relative_log_effort_to_minimize', 'best'], 
                 viz=['imshow', 'depth_lines', 'budget_lines'],
                 depths=widgets.IntRangeSlider(min=2, max=20, step=1, value=(2,20)))
def heatmap_app(group='fixed_3k_1',
                learning_rate=0.0001,
                label_noise = 0.0,
                dataset="201_pol", 
                topology="trapezoid", 
                residual_mode="none",
                loss="history_val_loss", 
                agg="avg",
                statistic = 'relative_log_effort', 
                color_range=1.10,
                viz='imshow'):
    
    query_string = f'''
    select budget, depth, {agg}(a.val) as value, count(a.val) as count, a.epoch
    from
        materialized_experiments_3_base base,
        materialized_experiments_3_loss loss,
        unnest(loss.{loss}) WITH ORDINALITY as a(val, epoch)
    WHERE
        base.id = loss.id and 
        groupname = {string_to_id(group)} and
        dataset = {string_to_id(dataset)} and
        learning_rate = {learning_rate}::real and
        label_noise = {label_noise}::real and
        topology = {string_to_id(topology)} and
        residual_mode = {string_to_id(residual_mode)}
    GROUP BY budget, depth, epoch
    ORDER BY budget, depth, epoch;
    '''
    print('Querying...')
    df = cached_query(query_string)
    print('Done.')
#     z_label, minimizing, best = setup_value(df, loss, color_range)    
        
#     if viz=="efficiency":        
#         z_label = setup_value(df, loss, color_range)
    partition_keys = ['depth', 'budget']
    partitions = get_values_for_categorical_keys(df, partition_keys)
    
    filtered = df
    if statistic != 'relative_log_effort_to_minimize':
        best = np.nanmin(filtered['value']) # find best value
        threshold = best * color_range
        filtered = filtered[filtered['value'] <= threshold]
            
    minimize = 'epoch'
    select = 'epoch'
    if statistic == 'relative_log_effort_to_minimize':
        minimize = 'value'
    elif statistic == 'best':
        minimize = 'value'
        select = 'value'

    def find_first_epoch(group):
        return group.sort_values(minimize).iloc[0][select]

    composite = filtered.groupby(partition_keys).apply(find_first_epoch).reset_index(name=select)

    z_label = statistic
    if statistic == 'best':
        z_label, minimizing, best = setup_value(composite, loss, color_range)
        composite[statistic] = composite[select] / np.min(composite[select])
    else:
        compute_effort(composite)
        if statistic == 'relative_log_effort_to_minimize':
            composite['relative_log_effort_to_minimize'] = composite['relative_log_effort']
    
    color_continuous_scale = 'Greens_r'
    if statistic in ['epoch']:
        color_continuous_scale = 'Sunsetdark'

    if viz == 'imshow':
        x_key = 'depth'
        y_key = 'budget'
        img = composite.pivot_table(columns=x_key, index=y_key, values=statistic)
        fig = px.imshow(img, color_continuous_scale=color_continuous_scale,
                       labels=dict(x=x_key, y=y_key, color=z_label))
        axis_data = [dict(
            categoryorder='array',
            categoryarray=p[2],
            title=p[0],
            type='category') 
            for p in partitions]    
        fig.update_layout(
            xaxis = axis_data[0],
            yaxis = axis_data[1],
            title=f'{dataset}, {topology}, residual {residual_mode}<br>{statistic} to reach {"{:.2f}".format(100*(color_range-1))}% of best {loss}',
#             coloraxis=dict(
#                 title=statistic
#                 )
            coloraxis_colorbar_x=.7,
            )
    elif viz == 'depth_lines':
        fig = px.line(composite, x='budget', y=statistic, color='depth', log_x=True)
        
    elif viz == 'budget_lines':
        fig = px.line(composite, x='depth', y=statistic, color='budget')
    return fig



interactive(children=(Text(value='fixed_3k_1', description='group'), Dropdown(description='learning_rate', ind…

In [9]:
### Heapmap App using Imshow

@interact_manual(**options, 
                 viz=["imshow", "scatter", "effort"], 
                 depths=widgets.IntRangeSlider(min=2, max=20, step=1, value=(2,20)))
def heatmap_app(
    group='fixed_3k_1',
    learning_rate=0.0001,
    label_noise = 0.0,
    dataset="201_pol", 
    topology="trapezoid", 
    residual_mode="none",
    loss="val_loss", 
    agg="avg",
    viz="effort", 
    depths=(2,20),
    color_range=1.3):
    
    query_string = f'''
    select budget, {agg}(a.val) as value, count(a.val) as count, a.epoch
    from
        materialized_experiments_3_base base,
        materialized_experiments_3_loss loss,
        unnest(loss.{loss}) WITH ORDINALITY as a(val, epoch)
    WHERE
        base.id = loss.id and 
        groupname = {string_to_id(group)} and
        dataset = {string_to_id(dataset)} and
        learning_rate = {learning_rate}::real and
        label_noise = {label_noise}::real and
        topology = {string_to_id(topology)} and
        residual_mode = {string_to_id(residual_mode)} and
        depth BETWEEN {depths[0]} and {depths[1]}
    GROUP BY budget, epoch
    ORDER BY budget, epoch;
    '''
    print('Querying...')
    df = cached_query(query_string)
    print('done.')
    return make_2d_heatmap_viz(df, group, dataset, topology, loss, agg, residual_mode, viz, color_range)

interactive(children=(Text(value='fixed_3k_1', description='group'), Dropdown(description='learning_rate', ind…