# Validation metrics 

In [152]:
import pandas as pd 
import os 


def parse_experiment_name(filename): 
    num_train, model, dataset, num_hidden, outputscale = filename.split('-')
    num_hidden = num_hidden.split("=")[1]
    outputscale = outputscale.split("=")[1]
    num_train = num_train.split("=")[1]
    return num_train, model, dataset, num_hidden, outputscale


root_dir = '../logs/csv/test/'

dfs = []
for experiment_name in os.listdir(root_dir): 
    # exclude performance vs depth experiments 
    if experiment_name.startswith('naive'): 
        parse_name = 'num_train=400-' + '-'.join(experiment_name.split('-')[1:])
    elif experiment_name.startswith('num_train=200') or experiment_name.startswith('num_train=100'): 
        if experiment_name.endswith("'naive'"): 
            parse_name = '-'.join(experiment_name.split('-')[:-1])
        else: 
            parse_name = experiment_name
    else: 
        continue 

    num_train, model, dataset, num_hidden, outputscale = parse_experiment_name(parse_name)
    experiment_path = os.path.join(root_dir, experiment_name)
    for version in os.listdir(experiment_path):
        experiment_df = pd.DataFrame.from_dict(dict(
            num_train=[num_train], model=[model], dataset=[dataset], num_hidden=[num_hidden], outputscale=[outputscale], run=[version.split('_')[1]]
        ))
        file_path = os.path.join(experiment_path, version, 'metrics.csv')
        if os.path.exists(file_path):
            metrics_df = pd.read_csv(file_path)
            df = pd.concat([experiment_df, metrics_df], axis=1)
            dfs.append(df)

data = pd.concat(dfs).fillna(method='ffill')
data['num_hidden'] = data['num_hidden'].astype(int)
data_smooth = data[data['dataset'] == 'smooth']
data = data[(data['num_hidden'] < 5) & (data['dataset'] == 'singular')]

In [202]:
import plotly.express as px 
import plotly.graph_objects as go

# Create a box plot
df = data_smooth[data_smooth['num_hidden'] > 0]

box_fig = px.box(df, y="test-negative_log_predictive_density", x="num_hidden", color="outputscale", hover_data=df.columns)

# Create an empty figure
fig = go.Figure()

# Add each box trace to the new figure
for trace in box_fig.data:
    trace.showlegend=False
    trace.opacity = 0.6
    fig.add_trace(trace)

# Overlay a scatter plot on top of the box plot
scatter_fig = px.strip(df, y="test-negative_log_predictive_density", x="num_hidden", color="outputscale", hover_data=df.columns)
for trace in scatter_fig.data:
    trace.showlegend=False
    fig.add_trace(trace)

mean = data_smooth['test-negative_log_predictive_density'][data_smooth['num_hidden'] == 0].mean()
fig.add_shape(
    type='line',
    y0=mean, y1=mean,
    x0=.5, x1=4.5,
    line=dict(
        color='Gray',
        width=2,
        dash="dash",
    ), 
    layer='below',
    name='mean',
    opacity=0.8,
)
fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color="Gray", width=2, dash="dash"),
    showlegend=True,
    name='GP Mean',
))

fig.update_layout(
    font_family="Serif",
    title=dict(
        text="Negative Log Predictive Density vs Number of Hidden Layers<br>with Smooth Target Function", 
        font_size=18,
        y=0.85, 
        x=0.5,
    ),
    legend=dict(
        x=0.0, 
        y=1.00,
        yanchor='top',
        xanchor='left',
        orientation='h',
        bgcolor='rgba(255,255,255,0.3)'
    ),
    xaxis_title=dict(
        text="Number of Hidden Layers",
        font_size=16,
    ),
    yaxis_title=dict(
        text="Negative Log Predictive Density", 
        font_size=16, 
    ),
    font_size=14, 
    width=500, 
    height=350, 
)

fig.show()
fig.write_image('../report_plots/nlpd_vs_num_hidden_smooth.jpg', scale=6, height=350, width=500)
fig.write_image('../report_plots/nlpd_vs_num_hidden_smooth.svg', scale=6, height=350, width=500)

In [37]:
import plotly.express as px 
import plotly.graph_objects as go

# Create a box plot
df = data[(data['num_train'] == '100') & (data['num_hidden'] > 0)]

fig = px.box(df, y="test-negative_log_predictive_density", x="num_hidden", color="outputscale", hover_data=df.columns)

# Create an empty figure
# fig = go.Figure()

# Add each box trace to the new figure
for trace in fig.data:
    # trace.showlegend=False
    trace.opacity = 0.6
    # fig.add_trace(trace)

# Overlay a scatter plot on top of the box plot
scatter_fig = px.strip(df, y="test-negative_log_predictive_density", x="num_hidden", color="outputscale", hover_data=df.columns)
for trace in scatter_fig.data:
    trace.showlegend=False
    fig.add_trace(trace)

hidden_num_0_mean = data['test-negative_log_predictive_density'][(data['num_hidden'] == 0) & (data['num_train'] == '100')].mean()
fig.add_shape(
    type='line',
    y0=hidden_num_0_mean, y1=hidden_num_0_mean,
    x0=.5, x1=4.5,
    line=dict(
        color='Gray',
        width=2,
        dash="dash",
    ), 
    layer='below',
    name='mean',
    opacity=0.8,
)
fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color="Gray", width=2, dash="dash"),
    showlegend=True,
    name='GP Mean',
))

fig.update_layout(
    width=700, 
    height=400, 
    font_family="Serif",
    title=dict(
        text="Negative Log Predictive Density by Hidden Layer Standard Deviation Prior<br>vs Number of Hidden Layers", 
        font_size=20,
        y=0.96
    ),
    legend=dict(
        title="Prior Mean and<br>Standard Deviation", 
        x=1.05, 
        y=0.55,
        yanchor='middle',
    ),
    xaxis_title=dict(
        text="Number of Hidden Layers",
        font_size=16,
    ),
    yaxis_title=dict(
        text="Negative Log Predictive Density", 
        font_size=16, 
    ),
    font_size=14, 
)

fig.show()
# fig.write_image('../report_plots/nlpd_vs_num_hidden_layers_and_outputscale.png', scale=6, height=400, width=700)

In [36]:
import plotly.express as px 
import plotly.graph_objects as go

# Create a box plot
df = data[(data['num_train'] == '200') & (data['num_hidden'] > 0)]

fig = px.box(df, y="test-negative_log_predictive_density", x="num_hidden", color="outputscale", hover_data=df.columns)

# Create an empty figure
# fig = go.Figure()

# Add each box trace to the new figure
for trace in fig.data:
    # trace.showlegend=False
    trace.opacity = 0.6
    # fig.add_trace(trace)

# Overlay a scatter plot on top of the box plot
scatter_fig = px.strip(df, y="test-negative_log_predictive_density", x="num_hidden", color="outputscale", hover_data=df.columns)
for trace in scatter_fig.data:
    trace.showlegend=False
    fig.add_trace(trace)

hidden_num_0_mean = data['test-negative_log_predictive_density'][(data['num_hidden'] == 0) & (data['num_train'] == '200')].mean()
fig.add_shape(
    type='line',
    y0=hidden_num_0_mean, y1=hidden_num_0_mean,
    x0=.5, x1=4.5,
    line=dict(
        color='Gray',
        width=2,
        dash="dash",
    ), 
    layer='below',
    name='mean',
    opacity=0.8,
)
fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color="Gray", width=2, dash="dash"),
    showlegend=True,
    name='GP Mean',
))

fig.update_layout(
    width=700, 
    height=400, 
    font_family="Serif",
    title=dict(
        text="Negative Log Predictive Density by Hidden Layer Standard Deviation Prior<br>vs Number of Hidden Layers", 
        font_size=20,
        y=0.96
    ),
    legend=dict(
        title="Prior Mean and<br>Standard Deviation", 
        x=1.05, 
        y=0.55,
        yanchor='middle',
    ),
    xaxis_title=dict(
        text="Number of Hidden Layers",
        font_size=16,
    ),
    yaxis_title=dict(
        text="Negative Log Predictive Density", 
        font_size=16, 
    ),
    font_size=14, 
)

fig.show()

In [151]:
import plotly.express as px 
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create a box plot
df = data[data['num_hidden'] > 0]

fig = make_subplots(rows=1, cols=3, shared_yaxes=False, vertical_spacing=0.02, horizontal_spacing=0.05, x_title="Number of Hidden Layers", y_title="Negative Log Predictive Density", 
                    column_titles=["100 training points", "200 training points", "400 training points"])

box_fig = px.box(df, y="test-negative_log_predictive_density", x="num_hidden", color="outputscale", facet_col='num_train', hover_data=df.columns)
for trace in box_fig.data:
    trace.opacity = 0.6
    fig.add_trace(trace)

# Overlay a scatter plot on top of the box plot
strip_fig = px.strip(df, y="test-negative_log_predictive_density", x="num_hidden", color="outputscale", facet_col='num_train', hover_data=df.columns)
for trace in strip_fig.data:
    trace.showlegend=False
    fig.add_trace(trace)

for i, num_train in enumerate(['100', '200', '400'], 1): 
    mean = data['test-negative_log_predictive_density'][(data['num_train'] == num_train) & (data['num_hidden'] == 0)].mean()
    fig.add_shape(
        type='line',
        y0=mean, y1=mean,
        x0=.5, x1=4.5,
        line=dict(
            color='Gray',
            width=2,
            dash="dash",
        ), 
        layer='below',
        name='mean',
        opacity=0.8,
        col=i, row=1, 
    )

# hidden_num_0_mean = data['test-negative_log_predictive_density'][(data['num_hidden'] == 0) & (data['num_train'] == '200')].mean()

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color="Gray", width=2, dash="dash"),
    showlegend=True,
    name='GP Mean',
), row=1, col=1)

fig.update_layout(
    font_family="Serif",
    title=dict(
        text="Negative Log Predictive Density by Hidden Layer Standard Deviation Prior and Number of Training Points vs Number of Hidden Layers", 
        font_size=20,
        y=0.9, 
        xanchor='center',
        x=.5,
    ),
    legend=dict(
        title="Prior Mean and<br>Standard Dev.", 
        x=1.01, 
        y=0.55,
        yanchor='middle',
        tracegroupgap=2,
    ),
    font_size=14, 
    boxmode='group', 
    width=1200, 
    height=350, 
)

fig.update_yaxes(dtick=0.2, col=1, row=1)
fig.update_yaxes(dtick=0.2, col=2, row=1)
fig.update_yaxes(dtick=0.2, col=3, row=1)

fig.update_annotations(font_size=18)

fig.show()
fig.write_image('../report_plots/nlpd_vs_outputscale_vs_num_traing_vs_num_hidden.jpg', scale=6, width=1200, height=350)
fig.write_image('../report_plots/nlpd_vs_outputscale_vs_num_traing_vs_num_hidden.svg', scale=6, width=1200, height=350)

In [28]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

# Read the data

# Get unique values for the facets and categories
num_train_values = df['num_train'].unique()
num_hidden_values = df['num_hidden'].unique()
outputscale_values = df['outputscale'].unique()

# Create subplots for different num_train values
fig = make_subplots(rows=len(num_train_values), cols=1, shared_xaxes=True, subplot_titles=num_train_values)

# Iterate through the unique values to create the box traces
for row, num_train in enumerate(num_train_values, start=1):
    for x_val, num_hidden in enumerate(num_hidden_values):
        for color_val, outputscale in enumerate(outputscale_values):
            # Filter the DataFrame for the current combination of values
            sub_df = df[(df['num_hidden'] == num_hidden) &
                        (df['outputscale'] == outputscale) &
                        (df['num_train'] == num_train)]
            
            # Create a Box trace for the filtered data
            trace = go.Box(
                y=sub_df["test-negative_log_predictive_density"],
                x=[num_hidden] * len(sub_df),
                marker=dict(color=color_val), # You can map color_val to specific color names
                name=f"Outputscale {outputscale}"
            )
            
            # Add the trace to the subplot for the current num_train value
            fig.add_trace(trace, row=row, col=1)

# Update layout if necessary
fig.update_layout(title="Your Title", xaxis_title="num_hidden", yaxis_title="test-negative_log_predictive_density")

# Show the plot
fig.show()


ValueError: 
    Invalid value of type 'builtins.int' received for the 'color' property of box.marker
        Received value: 0

    The 'color' property is a color and may be specified as:
      - A hex string (e.g. '#ff0000')
      - An rgb/rgba string (e.g. 'rgb(255,0,0)')
      - An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
      - An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
      - A named CSS color:
            aliceblue, antiquewhite, aqua, aquamarine, azure,
            beige, bisque, black, blanchedalmond, blue,
            blueviolet, brown, burlywood, cadetblue,
            chartreuse, chocolate, coral, cornflowerblue,
            cornsilk, crimson, cyan, darkblue, darkcyan,
            darkgoldenrod, darkgray, darkgrey, darkgreen,
            darkkhaki, darkmagenta, darkolivegreen, darkorange,
            darkorchid, darkred, darksalmon, darkseagreen,
            darkslateblue, darkslategray, darkslategrey,
            darkturquoise, darkviolet, deeppink, deepskyblue,
            dimgray, dimgrey, dodgerblue, firebrick,
            floralwhite, forestgreen, fuchsia, gainsboro,
            ghostwhite, gold, goldenrod, gray, grey, green,
            greenyellow, honeydew, hotpink, indianred, indigo,
            ivory, khaki, lavender, lavenderblush, lawngreen,
            lemonchiffon, lightblue, lightcoral, lightcyan,
            lightgoldenrodyellow, lightgray, lightgrey,
            lightgreen, lightpink, lightsalmon, lightseagreen,
            lightskyblue, lightslategray, lightslategrey,
            lightsteelblue, lightyellow, lime, limegreen,
            linen, magenta, maroon, mediumaquamarine,
            mediumblue, mediumorchid, mediumpurple,
            mediumseagreen, mediumslateblue, mediumspringgreen,
            mediumturquoise, mediumvioletred, midnightblue,
            mintcream, mistyrose, moccasin, navajowhite, navy,
            oldlace, olive, olivedrab, orange, orangered,
            orchid, palegoldenrod, palegreen, paleturquoise,
            palevioletred, papayawhip, peachpuff, peru, pink,
            plum, powderblue, purple, red, rosybrown,
            royalblue, rebeccapurple, saddlebrown, salmon,
            sandybrown, seagreen, seashell, sienna, silver,
            skyblue, slateblue, slategray, slategrey, snow,
            springgreen, steelblue, tan, teal, thistle, tomato,
            turquoise, violet, wheat, white, whitesmoke,
            yellow, yellowgreen