# Batch run models
1. Running model
2. Evaluating results

In [None]:
%load_ext lab_black
import os, json, multiprocessing
import pandas as pd
import altair as alt
import papermill as pm
from time import sleep
from meta import model_cfg, make_batch_cfg, check_cfgs_params, parse_batch_results
from evaluate import make_df_wnw

## Run batch
Make configs

In [None]:
# import random
# seeds = [int(random.random() * 1e5) for x in range(10)]

batch_name = "O2P_rer2019"
batch_output_dir = "batch_eval/{}/".format(batch_name)

param_grid = {
    "p_noise": [0.0, 1.0, 2.0, 3.0],
    "hidden_units": [50, 100, 150, 200],
    "learning_rate": [0.001, 0.005, 0.01],
    "cleanup_units": [10, 50],
}

static_hpar = {
    "sample_name": "jay",
    "rng_seed": 4321,
    "use_semantic": False,
    "input_dim": 119,
    "output_dim": 250,
    "use_attractor": False,
    "rnn_activation": "sigmoid",
    "regularizer_const": None,
    "w_initializer": "glorot_uniform",
    "tau": 0.2,
    "max_unit_time": 4.0,
    "optimizer": "adam",
    "n_mil_sample": 1.0,
    "batch_size": 128,
    "save_freq": 10,
    "bq_dataset": batch_name,
}

batch_cfgs = make_batch_cfg(batch_name, static_hpar, param_grid, "OSP_master.ipynb")

Parallel run

In [None]:
# Run
def run_batch(cfg):
    """
    Using papermill to run parameterized notebook
    """
    print("Running model {}".format(cfg['sn']))
    os.makedirs(cfg['model_folder'], exist_ok=True)
    pm.execute_notebook(
        cfg['in_notebook'],
        cfg['out_notebook'],
        parameters=cfg['params'],
    )

# Run in parallel pool
with multiprocessing.Pool(4) as pool:
    pool.map(run_batch, batch_cfgs)

Compile and save results

In [None]:
df = parse_batch_results(cfgs)
df.to_csv(batch_output_dir + 'bcdf.csv')

Shutdown compute engine

In [None]:
send_mail(batch_name)
sleep(30)
!sudo poweroff  

## Plotting

Review the batch structure

In [None]:
check_cfgs_params(cfgs)

Create re-useable overview heatmap and word vs. nonword df

In [None]:
alt.data_transformers.enable("default")
alt.data_transformers.disable_max_rows()

# Selectors for interactions
sel_run = alt.selection(type="multi", on="click", fields=["code_name"])
sel_cond = alt.selection(
    type="multi", on="click", fields=["cond"], bind="legend"
)

# df for overview
df_ov = df[(df.epoch == df.epoch.max()) & (df.timestep == df.timestep.max())]

# Shared master over-view
overview = (
    alt.Chart(df_ov).mark_rect().encode(
        x="p_noise:O",
        y="hidden_units:O",
        row="learning_rate:O",
        column="cleanup_units:O",
        color=alt.Color("acc", scale=alt.Scale(scheme="redyellowgreen")),
        opacity=alt.condition(sel_run, alt.value(1), alt.value(0)),
        tooltip=["code_name", "acc"],
    ).add_selection(sel_run).properties(title="Overall accuracy")
)

# Accuracy Word (HF-INC) vs. Nonwords
df_wnw = make_df_wnw(df, selected_cond=['INC_HF', 'ambiguous', 'unambiguous'])

Single run plots

In [None]:
# Accuracy over epoch at last time step for selected model
df_laststep = df[df.timestep == df.timestep.max()]

acc_plot = (
    alt.Chart(df_laststep).mark_line(point=True).encode(
        y=alt.Y("acc:Q", scale=alt.Scale(domain=(0, 1))),
        x="epoch",
        color="cond",
        opacity=alt.condition(sel_cond, alt.value(1), alt.value(0)),
        tooltip=["code_name", "acc"],
    ).add_selection(sel_cond).transform_filter(sel_run).properties(
        title="Full model at final time step"
    )
)

wnw_plot = (
    alt.Chart(df_wnw).mark_point().encode(
        y=alt.Y("nonword_acc:Q", scale=alt.Scale(domain=(0, 1))),
        x=alt.X("word_acc:Q", scale=alt.Scale(domain=(0, 1))),
        color=alt.Color("epoch", scale=alt.Scale(scheme="redyellowgreen")),
        tooltip=["epoch", "word_acc", "nonword_acc"],
    ).transform_filter(sel_run).properties(
        title="Word vs. Nonword accuracy at final time step"
    )
)

# Plot diagonal
diagline = alt.Chart(pd.DataFrame({
    'x': [0, 1],
    'y': [0, 1]
})).mark_line(color='black').encode(x='x', y='y')

wnw_with_diag = wnw_plot + diagline

# overview = overview_strain & overview_grain
mainplots = acc_plot & wnw_with_diag
splot = overview | mainplots

splot.save(batch_output_dir + 'single_run.html')
splot

Multi runs plots

In [None]:
wnw_mdf = df_wnw.melt(
    id_vars=['code_name', 'epoch'],
    value_vars=['word_acc', 'nonword_acc'],
    var_name='wnw',
    value_name='acc'
)

plot_epoch = alt.Chart(wnw_mdf).mark_point(size=80).encode(
    y=alt.Y("acc:Q", scale=alt.Scale(domain=(0, 1))),
    x="epoch:Q",
    color="code_name:N",
    shape="wnw:N",
    opacity=alt.condition(sel_run, alt.value(1), alt.value(0)),
    tooltip=["code_name", "epoch", "acc"],
).add_selection(sel_run).transform_filter(sel_run).properties(
    title="Plot word and nonword accuracy by epoch"
)

plot_wnw = alt.Chart(df_wnw).mark_line(point=True).encode(
    y=alt.Y("nonword_acc:Q", scale=alt.Scale(domain=(0, 1))),
    x=alt.X("word_acc:Q", scale=alt.Scale(domain=(0, 1))),
    color="code_name:N",
    opacity=alt.condition(sel_run, alt.value(1), alt.value(0)),
    tooltip=["code_name", "epoch", "word_acc", "nonword_acc"],
).add_selection(sel_run).properties(
    title="Word vs. Nonword accuracy at final time step"
)

plot_wnw_diag = plot_wnw + diagline

multi_plot = overview | (plot_epoch & plot_wnw_diag)
multi_plot.save(batch_output_dir + 'multi_runs.html')
multi_plot

## Save and shutdown

In [None]:
# !jupyter nbconvert --output-dir=$batch_output_dir --to html batch.ipynb

### Maybe useful

In [None]:
def main_effect_plot(df, var):

    pdf = df.pivot_table(
        index=['epoch', var], values=['word_acc', 'nonword_acc']
    )

    pdf.reset_index(inplace=True)
    pdf['word_advantage'] = pdf.word_acc - pdf.nonword_acc

    sel_var = alt.selection(type="multi", on="click", fields=[var])

    overview_bias = (
        alt.Chart(pdf).mark_rect().encode(
            y=alt.Y(var, type="ordinal"),
            x='epoch:O',
            color=alt.Color(
                "word_advantage",
                scale=alt.Scale(scheme="redyellowgreen", domain=(-.2, .2))
            ),
            opacity=alt.condition(sel_var, alt.value(1), alt.value(0.1)),
            tooltip=["word_acc", "nonword_acc"],
        ).add_selection(sel_var).properties(
            title=
            "Word - Nonword accuracy (word_advantage) heatmap by {} and epoch".
            format(var)
        )
    )

    overview_wacc = (
        alt.Chart(pdf).mark_rect().encode(
            y=alt.Y(var, type="ordinal"),
            x='epoch:O',
            color=alt.Color(
                "word_acc",
                scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1))
            ),
            opacity=alt.condition(sel_var, alt.value(1), alt.value(0.1)),
            tooltip=["word_acc", "nonword_acc"],
        ).add_selection(sel_var).properties(
            title=
            "Word - Nonword accuracy (word_advantage) heatmap by {} and epoch".
            format(var)
        )
    )

    wnw_line = alt.Chart(pdf).mark_line().encode(
        y=alt.Y("nonword_acc:Q", scale=alt.Scale(domain=(0, 1))),
        x=alt.X("word_acc:Q", scale=alt.Scale(domain=(0, 1))),
        color=alt.Color(var, type="ordinal", scale=alt.Scale(scheme="magma")),
        opacity=alt.condition(sel_var, alt.value(0.9), alt.value(0)),
        tooltip=[var, "epoch", "word_acc", "nonword_acc"],
    )

    diagonal = alt.Chart(pd.DataFrame({
        'x': [0, 1],
        'y': [0, 1]
    })).mark_line(color='black').encode(x='x', y='y')

    wnw = diagonal + wnw_line

    return overview_wacc | overview_bias | wnw

In [None]:
def main_dashboard(df):

    sel_run = alt.selection(type="multi", on="click", fields=["code_name"])

    # df for overview
    df_ov = df[df.epoch == df.epoch.max()]

    # Shared master over-view
    overview = (
        alt.Chart(df_ov).mark_rect().encode(
            x="p_noise:O",
            y="hidden_units:O",
            row="learning_rate:O",
            column="cleanup_units:O",
            color=alt.Color(
                "word_acc",
                scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1))
            ),
            opacity=alt.condition(sel_run, alt.value(1), alt.value(0.1)),
            tooltip=[
                "code_name", "p_noise", "hidden_units", "cleanup_units",
                "learning_rate", "word_acc", "nonword_acc"
            ],
        ).add_selection(sel_run).properties(title="Overall accuracy")
    )

    wnw_mdf = df.melt(
        id_vars=['code_name', 'epoch'],
        value_vars=['word_acc', 'nonword_acc'],
        var_name='wnw',
        value_name='acc'
    )

    plot_epoch = alt.Chart(wnw_mdf).mark_point(size=80).encode(
        y=alt.Y("acc:Q", scale=alt.Scale(domain=(0, 1))),
        x="epoch:Q",
        color="code_name:N",
        shape="wnw:N",
        opacity=alt.condition(sel_run, alt.value(1), alt.value(0)),
        tooltip=["code_name", "epoch", "acc"],
    ).add_selection(sel_run).properties(
        title="Plot word and nonword accuracy by epoch"
    )

    wnw_line = alt.Chart(df).mark_line(point=True).encode(
        y=alt.Y("nonword_acc:Q", scale=alt.Scale(domain=(0, 1))),
        x=alt.X("word_acc:Q", scale=alt.Scale(domain=(0, 1))),
        color="code_name:N",
        opacity=alt.condition(sel_run, alt.value(1), alt.value(0)),
        tooltip=["code_name", "epoch", "word_acc", "nonword_acc"],
    ).add_selection(sel_run).properties(
        title="Word vs. Nonword accuracy at final time step"
    )

    diagonal = alt.Chart(pd.DataFrame({
        'x': [0, 1],
        'y': [0, 1]
    })).mark_line(color='black').encode(x='x', y='y')

    return overview | (plot_epoch & (diagonal + wnw_line))