# Individual differences (part 3)

In [None]:
%load_ext lab_black
import helper
import altair as alt

### Import parsed datafile

In [None]:
sim1_df = helper.parse_from_file("../sims/1250_sims.csv")
sim2_df = helper.parse_from_file("../sims/part3_1750.csv")

### Interactive plot

In [None]:
# Baseline for reference point
baseline = helper.SimResults(sim1_df)
baseline.select_by_control(
    hidden_units=[100, 150, 200],
    p_noise=[1, 2, 3],
    learning_rate=[0.004, 0.006, 0.008],
    cleanup_units=[20],
)


## Developlment baseline
base_dev_df = (
    baseline.df.loc[baseline.df.cond.isin(["HF_INC", "NW_UN"])]
    .groupby(["cond", "epoch"])
    .mean()
    .reset_index()
)

base_dev = (
    alt.Chart(base_dev_df)
    .mark_line(strokeDash=[10, 10], size=5)
    .encode(
        y=alt.Y("score:Q", scale=alt.Scale(domain=(0, 1)), title="Accuracy"),
        x=alt.X("epoch:Q", title="Sample (M)"),
        color=alt.Color("cond:N", legend=None),
        opacity=alt.value(0.3),
    )
)

## Performance baseline

base_per_df = baseline.make_wnw().groupby("epoch").mean().reset_index()

base_wnw = (
    alt.Chart(base_per_df)
    .mark_line(color="black", strokeDash=[10, 10], size=5)
    .encode(
        y=alt.Y("nonword_acc:Q", scale=alt.Scale(domain=(0, 1)), title="Nonword",),
        x=alt.X("word_acc:Q", scale=alt.Scale(domain=(0, 1)), title="Word"),
        tooltip=["epoch", "word_acc:Q", "nonword_acc:Q"],
        opacity=alt.value(0.5),
    )
    .transform_calculate(
        color="if(datum.epoch===0.05, 'red', if(datum.epoch === 0.3, 'green', ''))"
    )
)

points = base_wnw.mark_point(size=200).encode(color=alt.Color("color:N", scale=None))
base_wnw += points

In [None]:
sim2 = helper.SimResults(sim2_df)
sim2.select_by_cond(["HF_INC", "NW_UN"])
sim2.plot_interactive(
    title="Sim 2 interactive plot", show_sd=False, base_dev=base_dev, base_wnw=base_wnw
).save("sim2_interactive.html")

### SD in baseline

In [None]:
baseline_sd = helper.SimResults(sim1_df)
baseline_sd.select_by_control(
    hidden_units=[100, 150, 200],
    p_noise=[1, 2, 3],
    learning_rate=[0.004, 0.006, 0.008],
    cleanup_units=[20],
)

baseline_sd.select_by_cond(["HF_INC", "HF_CON", "LF_INC", "LF_CON"])
baseline_sd.plot_mean_dev(show_sd=True, by_cond=False).properties(
    title="Mean and SD of word accuracy (all 4 conditions) over Epoch in Baseline"
).save("part3 plots/baseline_dev_sd.html")

print(
    f"These conditions are included when calculating baseline: {baseline_sd.df.cond.unique()}"
)

### Reading disability grouping

In [None]:
baseline = helper.SimResults(sim1_df)
baseline.select_by_control(
    hidden_units=[100, 150, 200],
    p_noise=[1, 2, 3],
    learning_rate=[0.004, 0.006, 0.008],
    cleanup_units=[20],
)

sim2 = helper.SimResults(sim2_df)
sim2.df = sim2.df.loc[
    sim2.df.risk_count >= 1,
]
sim2.df.columns

In [None]:
rd = helper.RDGrouping(sim2.df, baseline.df)

### Raw score / percentage / z-score over epoch

In [None]:
rd.plot_heatmap("score").save("part3 plots/epoch_score.html")
rd.plot_heatmap("pc").save("part3 plots/epoch_pc.html")
rd.plot_heatmap("z_deviance").save("part3 plots/epoch_z.html")

### Interactive cutoff

In [None]:
rd.plot_interactive_group_heatmap(version="z").save("part3 plots/grouping_z.html")
rd.plot_interactive_group_heatmap(version="pc").save("part3 plots/grouping_pc.html")