In [17]:
import pandas as pd 
import os 
import plotly.express as px 
import plotly.graph_objects as go


def parse_experiment_name(filename): 
    print(filename)

    sample, model, dataset, num_hidden, outputscale = filename.split('-')
    num_hidden = num_hidden.split("=")[1]
    outputscale = outputscale.split("=")[1]
    return sample, model, dataset, num_hidden, outputscale


root_dir = '../logs/csv/test/'

dfs = []
for experiment_name in os.listdir(root_dir): 
    # exclude performance vs depth experiments 
    if experiment_name.startswith('naive') : 
        parse_name = experiment_name
    elif experiment_name.startswith('parametrised') or experiment_name.startswith('intrinsic') or experiment_name.startswith('rotated'): 
        parse_name = '-'.join([
            experiment_name.split('-')[0], "geometric_manifold", *experiment_name.split('-')[1:]
        ])
    else: 
        continue

    sample, model, dataset, num_hidden, outputscale = parse_experiment_name(parse_name)
    experiment_path = os.path.join(root_dir, experiment_name)
    for version in os.listdir(experiment_path):
        experiment_df = pd.DataFrame.from_dict(dict(
            sample=[sample], model=[model], dataset=[dataset], num_hidden=[num_hidden], outputscale=[outputscale], run=[version.split('_')[1]]
        ))
        file_path = os.path.join(experiment_path, version, 'metrics.csv')
        if os.path.exists(file_path):
            metrics_df = pd.read_csv(file_path)
            df = pd.concat([experiment_df, metrics_df], axis=1)
            dfs.append(df)

data = pd.concat(dfs).fillna(method='ffill')
data['num_hidden'] = data['num_hidden'].astype(int)
data['outputscale']= data['outputscale'].astype(float)
data['run'] = data['run'].astype(int)
data = data[(data['num_hidden'] < 5) & (data['dataset'] == 'singular') & (data['outputscale'] == 0.01)]
data = data[['sample', 'num_hidden', 'outputscale', 'run', 'test-negative_log_predictive_density', 'test-mean_squared_error']]

naive-geometric_manifold-smooth-num_hidden=3-outputscale=0.01
naive-geometric_manifold-singular-num_hidden=1-outputscale=0.01
naive-geometric_manifold-singular-num_hidden=1-outputscale=0.1
parametrised-geometric_manifold-singular-num_hidden=4-outputscale=0.01
naive-geometric_manifold-singular-num_hidden=5-outputscale=0.01
intrinsic-geometric_manifold-singular-num_hidden=4-outputscale=0.01
naive-geometric_manifold-singular-num_hidden=4-outputscale=0.0001
naive-geometric_manifold-smooth-num_hidden=2-outputscale=0.01
naive-geometric_manifold-singular-num_hidden=2-outputscale=0.0001
parametrised-geometric_manifold-singular-num_hidden=1-outputscale=0.01
intrinsic-geometric_manifold-singular-num_hidden=1-outputscale=0.01
naive-geometric_manifold-singular-num_hidden=0-outputscale=0.01
naive-geometric_manifold-singular-num_hidden=4-outputscale=0.01
naive-geometric_manifold-singular-num_hidden=1-outputscale=0.0001
naive-geometric_manifold-singular-num_hidden=0-outputscale=0.1
naive-geometric_ma

In [14]:
df = data[data['num_hidden'] > 0].reset_index()
df['sample'] = df['sample'].map({'naive': 'projected', 'intrinsic': 'coord. frame<br>fixed', 'parametrised': 'coord. frame<br>parametrised'})

In [41]:
df = data[data['num_hidden'] > 0]
df = df[df['sample'].isin({'naive', 'intrinsic'})]
df['sample'] = df['sample'].map({'naive': 'projected', 'intrinsic': 'coordinate<br>frame'})
fig = px.box(df, y="test-negative_log_predictive_density", x="num_hidden", color="sample", hover_data=df.columns)

# Add each box trace to the new figure
for trace in fig.data:
    trace.opacity = 0.6

# Overlay a scatter plot on top of the box plot
scatter_fig = px.strip(df, y="test-negative_log_predictive_density", x="num_hidden", color="sample", hover_data=df.columns)
for trace in scatter_fig.data:
    trace.showlegend=False
    fig.add_trace(trace)


mean = data['test-negative_log_predictive_density'][data['num_hidden'] == 0].mean()
fig.add_shape(
    type='line',
    y0=mean, y1=mean,
    x0=.5, x1=4.5,
    line=dict(
        color='Gray',
        width=1,
        dash="dash",
    ), 
    layer='below',
    name='mean',
    opacity=0.8,
)
fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(
        color="Gray", 
        width=1, 
        dash="dash",
    ),
    showlegend=True,
    name='GP Mean',
))


fig.update_layout(
    font_family="Computer Modern",
    title=dict(
        text="NLPD by GVF Construction vs Number of Hidden Layers",
        # xanchor='center', 
        # y=0.96
    ),
    legend=dict(
        title="GVF Construction", 
        x=1.05, 
        y=0.55,
        yanchor='middle',
    ),
    xaxis_title=dict(
        text="Number of Hidden Layers",
    ),
    yaxis_title=dict(
        text="NLPD", 
    ),
)

# Max width 453
width = 380
ratio = 0.4
height = width * ratio
fig.update_layout(
    width=width,
    height=height,
    title_font_size=12,
    xaxis_title_font_size=10,
    xaxis_tickfont_size=10,
    yaxis_title_font_size=10,
    yaxis_tickfont_size=10,
    legend_font_size=10, 
    legend_title_font_size=10,
    font_size=10,
    xaxis_title_standoff=5,  # Adjust this value for the x-axis title
    yaxis_title_standoff=5,   # Adjust this value for the y-axis title
    margin=dict(b=10, l=10, r=10, t=20),

)
fig.update_traces({'marker':{'size': 4}})

fig.show()
fig.write_image('../report_plots/nlpd_by_gvf_construction.pdf', scale=6, height=height, width=width)

In [62]:
df = data[data['num_hidden'] == 1]
df = df[df['sample'].isin({'rotated', 'intrinsic', 'parametrised', 'naive'})]
df['sample'] = df['sample'].map({'naive': 'projected', 'intrinsic': 'fixed', 'rotated': 'fixed rotated', 'parametrised': 'parametrised'})
df = df[['sample', 'run', 'test-negative_log_predictive_density']]

fig = px.box(df, y="test-negative_log_predictive_density", color="sample", hover_data=df.columns)

# Add each box trace to the new figure
for trace in fig.data:
    trace.opacity = 0.6

# Overlay a scatter plot on top of the box plot
scatter_fig = px.strip(df, y="test-negative_log_predictive_density", color="sample", hover_data=df.columns)
for trace in scatter_fig.data:
    trace.showlegend=False
    fig.add_trace(trace)


mean = data['test-negative_log_predictive_density'][data['num_hidden'] == 0].mean()
fig.add_shape(
    type='line',
    y0=mean, y1=mean,
    x0=-.5, x1=0.5,
    line=dict(
        color='Gray',
        width=1,
        dash="dash",
    ), 
    layer='below',
    name='mean',
    opacity=0.8,
)
fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(
        color="Gray", 
        width=1, 
        dash="dash",
    ),
    showlegend=True,
    name='GP Mean',
))


fig.update_layout(
    font_family="Computer Modern",
    title=dict(
        text="NLPD by GVF Construction for One Hidden Layer",
        # xanchor='center', 
        # y=0.96
    ),
    legend=dict(
        title="Coordinate Frame", 
        x=1.05, 
        y=0.55,
        yanchor='middle',
    ),
    yaxis_title=dict(
        text="NLPD", 
    ),
)

# Max width 453
width = 280
ratio = 0.7
height = width * ratio
fig.update_layout(
    width=width,
    height=height,
    title_font_size=12,
    xaxis_title_font_size=10,
    xaxis_tickfont_size=10,
    yaxis_title_font_size=10,
    yaxis_tickfont_size=10,
    legend_font_size=10, 
    legend_title_font_size=10,
    font_size=10,
    xaxis_title_standoff=5,  # Adjust this value for the x-axis title
    yaxis_title_standoff=5,   # Adjust this value for the y-axis title
    margin=dict(b=10, l=10, r=10, t=20),
    legend_tracegroupgap=0,
)
fig.update_traces({'marker':{'size': 4}})

fig.show()
fig.write_image('../report_plots/nlpd_by_gvf_construction_one_hidden.pdf', scale=6, height=height, width=width)