In [None]:
%load_ext lab_black
import pandas as pd
import numpy as np
import altair as alt
from tqdm import tqdm

alt.data_transformers.enable("default")
alt.data_transformers.disable_max_rows()

# Predicted accuracy over epoch

In [None]:
df_growth = pd.read_csv("req1_results.csv", index_col=0)
epochs = np.concatenate([np.linspace(0.0, 0.1, 11), np.linspace(0.2, 1.0, 9)]).round(3)

df_hpar = df_growth.pivot_table(
    index="code_name",
    values=["cleanup_units", "hidden_units", "learning_rate", "p_noise"],
).reset_index()


df_actual = pd.read_csv("1250_sims.csv")
df_actual = df_actual.loc[
    df_actual.Measure == "Accuracy",
]

In [None]:
def vonb(x, max_acc, k, x0):
    """ von Bertalanffy (1938)
    Assume that the rate of growth of an organism declines with size 
    so that the rate of change in length, l,  may be described by:
    dl/dt = K (L_inf - l) or under our context: dy/dx = k (max_acc - y)
    max_acc: Maximum accuracy / upper asymtote
    k: growth rate
    x0: x value where model start to learn
    """
    return max_acc * (1 - np.exp(-k * (x - x0)))


def clipped_vonb(x, max_acc, k, x0):
    return np.clip(max_acc * (1 - np.exp(-k * (x - x0))), 0, 1)


def get_params(df, code_name, cond):
    """ Return growth parameters (max_acc, k, and x0)
    """
    return df.loc[
        (df.code_name == code_name) & (df.cond == cond), ["max_acc", "k", "x0"]
    ].to_numpy()[0]

### Predict and merge dataset

In [None]:
df_pred = pd.DataFrame()

for model in tqdm(df_growth.code_name.unique()):
    this_df = pd.DataFrame()
    this_df["HF_INC"] = clipped_vonb(epochs, *get_params(df_growth, model, "HF_INC"))
    this_df["word_pred"] = this_df.HF_INC  # Copy for convienient
    this_df["HF_CON"] = clipped_vonb(epochs, *get_params(df_growth, model, "HF_CON"))
    this_df["LF_INC"] = clipped_vonb(epochs, *get_params(df_growth, model, "LF_INC"))
    this_df["LF_CON"] = clipped_vonb(epochs, *get_params(df_growth, model, "LF_CON"))
    this_df["NW_UN"] = clipped_vonb(epochs, *get_params(df_growth, model, "NW_UN"))
    this_df["nonword_pred"] = this_df.NW_UN  # Copy for convienient
    this_df["NW_AMB"] = clipped_vonb(epochs, *get_params(df_growth, model, "NW_AMB"))
    this_df["epoch"] = np.round(epochs, 3)
    this_df["code_name"] = model
    df_pred = pd.concat([df_pred, this_df], ignore_index=True)

### Export predicted value to Jay

In [None]:
df_pred_melt = pdf.melt(id_vars=["code_name", "epoch"])
df_pred_melt.merge(df_hpar, on="code_name").to_csv("predicted.csv")

### W vs. NW plot for predicted accuracy 

In [None]:
predicted_wnw = (
    alt.Chart(df_pred)
    .mark_point()
    .encode(
        x="word_pred",
        y="nonword_pred",
        color=alt.Color(
            "epoch:Q", scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1)),
        ),
        opacity=alt.value(0.2),
    )
)

# Actual accuracy 

In [None]:
pvt = df_actual.pivot_table(index=["ID", "Trial.Scaled"], columns="Type").reset_index()

rdf = pd.DataFrame()
rdf["code_name"] = pvt.ID
rdf["epoch"] = np.round(pvt["Trial.Scaled"], 3)
rdf["word"] = pvt.Score.HF_INC
rdf["nonword"] = pvt.Score.NW_UN

In [None]:
actual_wnw = (
    alt.Chart(rdf)
    .mark_point()
    .encode(
        x="word",
        y="nonword",
        color=alt.Color(
            "epoch:Q", scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1)),
        ),
        opacity=alt.value(0.2),
    )
)

In [None]:
predicted_wnw | actual_wnw

### Merge data sets

In [None]:
# Merge actual and predicted df
df = df_pred.merge(rdf, on=["code_name", "epoch"])

In [None]:
df["d_word"] = df.word - df.word_pred
df["d_nw"] = df.nonword - df.nonword_pred
df["wnw_real"] = df.word - df.nonword
df["wnw_pred"] = df.word_pred - df.nonword_pred
df["d_wnw"] = df.wnw_real - df.wnw_pred

# Merge h-params
df = df.merge(df_hpar, on=["code_name"])

In [None]:
df

In [None]:
actual = df[["code_name", "epoch", "word", "nonword"]].melt(["code_name", "epoch"])

In [None]:
predicted = df[["code_name", "epoch", "word_pred", "nonword_pred"]].melt(
    ["code_name", "epoch"]
)

predicted["variable"] = predicted.variable.map(
    lambda x: "word" if x == "word_pred" else "nonword"
)

In [None]:
df1 = actual.merge(predicted, on=["code_name", "epoch", "variable"])
df1.rename(
    {"value_x": "actual", "value_y": "predicted", "variable": "type"},
    axis=1,
    inplace=True,
)

In [None]:
df1

In [None]:
df1 = df1.melt(id_vars=["code_name", "epoch", "type"]).rename({"value": "acc"}, axis=1)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(18, 8))
ax = sns.violinplot(x="epoch", y="acc", hue="variable", split=True, data=df1).set_title(
    "Predicted vs. Actual Accuracy by Epoch"
)

In [None]:
df2 = df[["code_name", "epoch", "d_word", "d_nw"]].melt(id_vars=["code_name", "epoch"])
df2

In [None]:
fig, ax = plt.subplots(figsize=(18, 8))
ax = sns.violinplot(
    x="epoch", y="value", hue="variable", split=True, inner="box", data=df2
).set_title("Residual Accuracy (actual-predicted) by Epoch")

sns.despine(left=True)

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

m_epoch = smf.mixedlm("value ~ epoch * variable", df2, groups="code_name").fit()

In [None]:
print(m_epoch.summary())

# Whether the shape of W vs. NW change

In [None]:
df3 = df[
    [
        "code_name",
        "d_word",
        "d_nw",
        "hidden_units",
        "p_noise",
        "learning_rate",
        "cleanup_units",
    ]
]

df3.loc[:, "learning_rate"] = np.round(df3.learning_rate, 3)

In [None]:
heatmap_base = (
    alt.Chart(df3)
    .mark_rect()
    .encode(
        x="p_noise:O",
        y="hidden_units:O",
        row="learning_rate:O",
        column="cleanup_units:O",
    )
)

heatmap_base.encode(
    color=alt.Color(
        "mean(d_word)", scale=alt.Scale(scheme="redyellowgreen", domain=(-0.05, 0.05))
    )
).properties(title="WORD residual")

In [None]:
heatmap_base.encode(
    color=alt.Color(
        "mean(d_nw)", scale=alt.Scale(scheme="redyellowgreen", domain=(-0.05, 0.05))
    )
).properties(title="NONWORD residual")

### Other conditions

In [None]:
df_actual

In [None]:
# Parse predicted a bit
p = (
    df_pred[
        [
            "code_name",
            "epoch",
            "HF_INC",
            "HF_CON",
            "LF_INC",
            "LF_CON",
            "NW_UN",
            "NW_AMB",
        ]
    ]
    .melt(["code_name", "epoch"])
    .rename({"variable": "cond", "value": "acc_pred"}, axis=1)
)

In [None]:
df_allcond = df_actual.rename(
    {"ID": "code_name", "Trial.Scaled": "epoch", "Type": "cond", "Score": "acc"}, axis=1
)[["code_name", "epoch", "cond", "Hidden", "PhoHid", "Pnoise", "Epsilon", "acc"]].merge(
    p, on=["code_name", "epoch", "cond"]
)

df_allcond["d"] = df_allcond.acc - df_allcond.acc_pred

In [None]:
df_allcond

In [None]:
df_allcond_aggepoch = df_allcond.pivot_table(index=["code_name", "cond"]).reset_index()
df_allcond_aggepoch["Epsilon"] = df_allcond_aggepoch.Epsilon.round(3)

In [None]:
from altair.expr import datum

base = (
    alt.Chart(df_allcond_aggepoch)
    .mark_rect()
    .encode(
        x="Pnoise:O",
        y="Hidden:O",
        row="Epsilon:O",
        column="PhoHid:O",
        color=alt.Color(
            "d:Q", scale=alt.Scale(scheme="redyellowgreen", domain=(-0.05, 0.05))
        ),
    )
)

chart = alt.vconcat()

for cond in df_allcond_aggepoch.cond.unique():
    chart |= base.transform_filter(datum.cond == cond).properties(title=cond)


chart

In [None]:
fig, ax = plt.subplots(figsize=(36, 8))
ax = sns.violinplot(
    x="epoch", y="d", hue="cond", data=df_allcond.loc[df_allcond.epoch <= 0.1,]
).set_title("Residual Accuracy (actual-predicted) by Epoch in all conditions")


plt.savefig("all_cond_vio_1.png")

In [None]:
fig, ax = plt.subplots(figsize=(36, 8))
ax = sns.violinplot(
    x="epoch", y="d", hue="cond", data=df_allcond.loc[df_allcond.epoch > 0.1,]
).set_title("Residual Accuracy (actual-predicted) by Epoch in all conditions")


plt.savefig("all_cond_vio_2.png")

In [None]:
df_allcond.columns

In [None]:
df_allcond

In [None]:
base = alt.Chart(df_allcond).mark_line(point=True).encode(
    x="epoch", y=""
    )