# Examine Rueckl19 dataset

In [None]:
%load_ext lab_black
import pandas as pd
import altair as alt
alt.data_transformers.enable("default")
alt.data_transformers.disable_max_rows()

### Ingest, tidy

In [None]:
df = pd.read_csv('plotdf.csv', index_col=0)

# df['word_acc'] = df[[
#     'HF_CON_Accuracy', 'HF_INC_Accuracy', 'LF_CON_Accuracy', 'LF_INC_Accuracy'
# ]].mean(axis=1)
# df['nonword_acc'] = df[['NW_AMB_Accuracy', 'NW_UN_Accuracy']].mean(axis=1)

df['word_acc'] = df[['HF_INC_Accuracy']].mean(axis=1)
df['nonword_acc'] = df[['NW_UN_Accuracy']].mean(axis=1)

df.rename(
    columns={
        'ID': 'code_name',
        'Trial.Scaled': 'epoch',  # Trial scaled renamed to Epoch onward
        'Pnoise': 'p_noise',  # group renamed to code_name onward
        'Hidden': 'hidden_units',
        'Epsilon': 'learning_rate',
        'PhoHid': 'cleanup_units',
        'Classification': 'group'
    },
    inplace=True
)

df = df[[
    'code_name', 'epoch', 'hidden_units', 'cleanup_units', 'p_noise',
    'learning_rate', 'word_acc', 'nonword_acc', 'group'
]]

df = df[df.p_noise.isin([0, 2, 4, 8]) & df.hidden_units.isin([50, 100, 250]) &
        df.cleanup_units.isin([20]) &
        df.learning_rate.isin([.002, .004, .006, .008, .01])]

### Are there more than one model in a unique set of setting? Yes...

In [None]:
def count_models(df):
    print(
        'There are {} models in the datafile'.format(
            len(df.code_name.unique())
        )
    )

    dfm = df[[
        'code_name', 'p_noise', 'hidden_units', 'learning_rate', 'cleanup_units'
    ]].pivot_table(index='code_name')
    dfm['code_name'] = dfm.index

    pvt = dfm.pivot_table(
        index=['p_noise', 'hidden_units', 'learning_rate', 'cleanup_units'],
        aggfunc='count',
        values='code_name'
    )

    pvt.reset_index(inplace=True)
    pvt.rename(columns={'code_name': 'n'}, inplace=True)

    plot_n = alt.Chart(pvt).mark_rect().encode(
        x="p_noise:O",
        y="hidden_units:O",
        row="learning_rate:O",
        column="cleanup_units:O",
        color="n:O",
        tooltip=[
            "p_noise", "hidden_units", "cleanup_units", "learning_rate", "n"
        ],
    ).properties(title="Model counts")

    return plot_n


df_upper = df.loc[df.group == 'Upper']
df_mid = df.loc[df.group == 'Mid']
df_lower = df.loc[df.group == 'Lower']

count_by_group = count_models(df) | count_models(df_upper) | count_models(
    df_mid
) | count_models(df_lower)

count_by_group.save('count_model_selgridall.html')

count_by_group

# Replicate Fig2.

In [None]:
sel_group = alt.selection(
    type="single",
    on="click",
    fields=['group'],
    bind=alt.binding_radio(
        options=['Upper', 'Mid', 'Lower'], name="Classification: "
    )
)

base = alt.Chart(df).mark_point().encode(
    y=alt.Y("nonword_acc:Q", scale=alt.Scale(domain=(0, 1))),
    x=alt.X("word_acc:Q", scale=alt.Scale(domain=(0, 1))),
    color=alt.Color(
        "epoch", scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1))
    ),
    opacity=alt.condition(sel_group, alt.value(0.2), alt.value(0)),
    tooltip=["code_name", "epoch", "word_acc", "nonword_acc"],
).add_selection(sel_group)

diagonal = alt.Chart(pd.DataFrame({
    'x': [0, 1],
    'y': [0, 1]
})).mark_line(color='black').encode(x='x', y='y')

diagonal + base

### Group average plots

In [None]:
df.columns

In [None]:
dfg = df.pivot_table(index=['group', 'epoch']).reset_index()

sel_group = alt.selection(
    type="single",
    on="click",
    fields=['group'],
    bind=alt.binding_radio(
        options=['Upper', 'Mid', 'Lower'], name="Classification: "
    )
)

base = alt.Chart(dfg).mark_point().encode(
    y=alt.Y("nonword_acc:Q", scale=alt.Scale(domain=(0, 1))),
    x=alt.X("word_acc:Q", scale=alt.Scale(domain=(0, 1))),
    color=alt.Color(
        "epoch", scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1))
    ),
    opacity=alt.condition(sel_group, alt.value(1), alt.value(0)),
    tooltip=["code_name", "epoch", "word_acc", "nonword_acc"],
).add_selection(sel_group)

diagonal = alt.Chart(pd.DataFrame({
    'x': [0, 1],
    'y': [0, 1]
})).mark_line(color='black').encode(x='x', y='y')

diagonal + base

### Aggregate cell heatmaps

In [None]:
dfc = df.pivot_table(
    index=[
        'epoch', 'hidden_units', 'cleanup_units', 'p_noise', 'learning_rate'
    ]
).reset_index()

dfc['word_advantage'] = dfc.word_acc - dfc.nonword_acc

In [None]:
dfc

In [None]:
sel_epoch = alt.selection(
    type="single",
    on="click",
    fields=['epoch'],
    bind=alt.binding_radio(options=list(dfc.epoch.unique()), name="Epoch: ")
)

alt.Chart(dfc).mark_rect().encode(
    x="p_noise:O",
    y="hidden_units:O",
    row="learning_rate:O",
    color=alt.Color(
        "word_acc", scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1))
    ),
    tooltip=[
        "p_noise", "hidden_units", "cleanup_units", "learning_rate", "word_acc",
        "nonword_acc"
    ],
).add_selection(sel_epoch).transform_filter(sel_epoch).properties(
    title="Word acc"
)

In [None]:
alt.Chart(dfc).mark_rect().encode(
    x="p_noise:O",
    y="hidden_units:O",
    row="learning_rate:O",
    color=alt.Color(
        "nonword_acc", scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1))
    ),
    tooltip=[
        "p_noise", "hidden_units", "cleanup_units", "learning_rate", "word_acc",
        "nonword_acc"
    ],
).add_selection(sel_epoch).transform_filter(sel_epoch).properties(
    title="Nonword acc"
)

In [None]:
alt.Chart(dfc).mark_rect().encode(
    x="p_noise:O",
    y="hidden_units:O",
    row="learning_rate:O",
    color=alt.Color(
        "word_advantage",
        scale=alt.Scale(scheme="redyellowgreen", domain=(-0.3, 0.3))
    ),
    tooltip=[
        "p_noise", "hidden_units", "cleanup_units", "learning_rate", "word_acc",
        "nonword_acc"
    ],
).add_selection(sel_epoch).transform_filter(sel_epoch).properties(
    title="Word advantage (Word - Nonword)"
)

In [None]:
# # Get model level mean word advantage sorting
# # Merge it back to cell level df

# dfm = dfc.pivot_table(
#     index=['hidden_units', 'cleanup_units', 'p_noise', 'learning_rate']
# ).reset_index()

# dfm['cell_id'] = dfm.index
# dfms = dfm.sort_values('word_advantage').reset_index(drop=True)
# dfms['sorted_adv'] = dfms.index
# dfms = dfms[['code_name', 'cell_id', 'sorted_adv']]

# dfc = dfc.merge(dfms, on='code_name')

In [None]:
dfc

### P-noise without aggregation

In [None]:
diagonal = alt.Chart(pd.DataFrame({
    'x': [0, 1],
    'y': [0, 1]
})).mark_line(color='black').encode(x='x', y='y')

plot_pnoise = alt.Chart().mark_line().encode(
    y=alt.Y("nonword_acc:Q", scale=alt.Scale(domain=(0, 1))),
    x=alt.X("word_acc:Q", scale=alt.Scale(domain=(0, 1))),
    color=alt.Color("p_noise", type='ordinal', scale=alt.Scale(scheme="reds")),
    tooltip=[
        "epoch", "hidden_units", "cleanup_units", "p_noise", "learning_rate",
        "word_acc", "nonword_acc"
    ],
)

alt.layer(diagonal + plot_pnoise,
          data=dfc).facet(row="hidden_units:O", column="learning_rate:O")

### Hidden units effect

In [None]:
plot_hidden = alt.Chart().mark_line().encode(
    y=alt.Y("nonword_acc:Q", scale=alt.Scale(domain=(0, 1))),
    x=alt.X("word_acc:Q", scale=alt.Scale(domain=(0, 1))),
    color=alt.Color(
        "hidden_units", type="ordinal", scale=alt.Scale(scheme="blues")
    ),
    tooltip=[
        "epoch", "hidden_units", "cleanup_units", "p_noise", "learning_rate",
        "word_acc", "nonword_acc"
    ],
)

alt.layer(diagonal + plot_hidden,
          data=dfc).facet(row="p_noise:O", column="learning_rate:O")

### Learning rate effect

In [None]:
plot_lr = alt.Chart().mark_line().encode(
    y=alt.Y("nonword_acc:Q", scale=alt.Scale(domain=(0, 1))),
    x=alt.X("word_acc:Q", scale=alt.Scale(domain=(0, 1))),
    color=alt.Color(
        "learning_rate", type="ordinal", scale=alt.Scale(scheme="greens")
    ),
    tooltip=[
        "epoch", "hidden_units", "cleanup_units", "p_noise", "learning_rate",
        "word_acc", "nonword_acc"
    ],
)

alt.layer(diagonal + plot_lr,
          data=dfc).facet(row="hidden_units:O", column="p_noise:O")