# Part III: Subtype

In [None]:
%load_ext lab_black
import pandas as pd
import numpy as np
import altair as alt
import helper
from altair import datum, expr

alt.data_transformers.disable_max_rows()

### Create baseline

In [None]:
baseline_df = helper.parse_from_file("../sims/1520_sims.csv")
baseline = helper.Select_Model(baseline_df)
baseline.select_by_control(
    hidden_units=[100, 150, 200], p_noise=[1, 2, 3], learning_rate=[0.004, 0.006, 0.008]
)

### Load Part 3 data (n=1750)

In [None]:
df = helper.parse_from_file("../sims/part3_1750.csv")

df["risk_count"] = (
    (df.hidden_units < 100) * 1 + (df.p_noise > 3) * 1 + (df.learning_rate < 0.004) * 1
)

df = df.loc[df.risk_count >= 1]

print(len(df.code_name.unique()))

In [None]:
base_stat = baseline_df.groupby(["cond", "epoch"]).agg(["mean", "std"]).score.to_dict()
# base_stat["mean"][("HF_INC", 0.01)]


def calcuate_z_deviance(row):
    """Calcuate z score relative to TD at each epoch
    """
    m = base_stat["mean"][(row["cond"], row["epoch"])]
    sd = base_stat["std"][(row["cond"], row["epoch"])]

    # Avoid zero division
    if sd == 0:
        sd = 1e-6

    return (row["score"] - m) / sd

In [None]:
def reduce_epoch_resolution(df):
    sel_epoch = [0.01, 0.02, 0.03, 0.05, 0.07, 0.09, 0.2, 0.4, 0.6, 0.8, 1.0]
    return df.loc[
        df.epoch.isin(sel_epoch),
    ]

df = reduce_epoch_resolution(df)

In [None]:
df["z"] = df.apply(calcuate_z_deviance, axis=1)

In [None]:
m_idx = [
    "code_name",
    "hidden_units",
    "learning_rate",
    "p_noise",
    "cleanup_units",
    "epoch",
]



### Make z datafile

In [None]:
sdf = df[m_idx + ["cond", "z"]]
sdf = sdf.pivot_table(index=m_idx, columns=["cond"],).reset_index()
sdf.columns = ["".join(c).strip() for c in sdf.columns.values]
sdf.columns

In [None]:
for x in range(10, 31):
    sdf[f"word_cutoff_{x}"] = 1 * (sdf.zHF_INC > -x / 10)
    sdf[f"nonword_cutoff_{x}"] = 1 * (sdf.zNW_UN > -x / 10)

In [None]:
sdf["z_wadv"] = sdf.zHF_INC - sdf.zNW_UN

In [None]:
brush_word = alt.selection(type="interval", encodings=["x"])
brush_nonword = alt.selection(type="interval", encodings=["x"])

base = (
    alt.Chart(sdf)
    .mark_rect()
    .encode(
        x="p_noise:O",
        y=alt.Y("hidden_units:O", sort="descending"),
        row=alt.Row("learning_rate:O", sort="descending"),
        column="epoch:O",
        color=alt.Color("count(code_name):O", scale=alt.Scale(domain=(0, 10))),
    )
)

cut = -2.0

iW = base.transform_filter(((datum.zHF_INC < cut) & (datum.zNW_UN >= cut))).properties(
    title="iW"
)

iNW = base.transform_filter(((datum.zHF_INC >= cut) & (datum.zNW_UN < cut))).properties(
    title="iNW"
)

iBoth = base.transform_filter(
    ((datum.zHF_INC < cut) & (datum.zNW_UN < cut))
).properties(title="iBoth")

iNone = base.transform_filter(
    ((datum.zHF_INC >= cut) & (datum.zNW_UN >= cut))
).properties(title="iNone")

(iNone & iBoth & iW & iNW).save("impairment_2.html")
# word_density | nonword_density