In [None]:
%load_ext lab_black
import pandas as pd
import numpy as np
import altair as alt
from tqdm import tqdm

alt.data_transformers.enable("default")
alt.data_transformers.disable_max_rows()

# Predicted accuracy over epoch

In [None]:
df = pd.read_csv("req1_results.csv", index_col=0)
epochs = np.concatenate([np.linspace(0.0, 0.1, 11), np.linspace(0.2, 1.0, 9)])
epochs = np.round(epochs, 3)
print(epochs)

In [None]:
def vonb(x, max_acc, k, x0):
    """ von Bertalanffy (1938)
    Assume that the rate of growth of an organism declines with size 
    so that the rate of change in length, l,  may be described by:
    dl/dt = K (L_inf - l) or under our context: dy/dx = k (max_acc - y)
    max_acc: Maximum accuracy / upper asymtote
    k: growth rate
    x0: x value where model start to learn
    """
    return max_acc * (1 - np.exp(-k * (x - x0)))


def clipped_vonb(x, max_acc, k, x0):
    return np.clip(max_acc * (1 - np.exp(-k * (x - x0))), 0, 1)


def get_params(df, code_name, cond):
    return df.loc[
        (df.code_name == code_name) & (df.cond == cond), ["max_acc", "k", "x0"]
    ].to_numpy()[0]

In [None]:
model_hp = df.pivot_table(
    index="code_name",
    values=["cleanup_units", "hidden_units", "learning_rate", "p_noise"],
).reset_index()

In [None]:
pdf = pd.DataFrame()

for model in tqdm(df.code_name.unique()):
    this_df = pd.DataFrame()
    this_df["HF_INC"] = clipped_vonb(epochs, *get_params(df, model, "HF_INC"))
    this_df["HF_CON"] = clipped_vonb(epochs, *get_params(df, model, "HF_CON"))
    this_df["LF_INC"] = clipped_vonb(epochs, *get_params(df, model, "LF_INC"))
    this_df["LF_CON"] = clipped_vonb(epochs, *get_params(df, model, "LF_CON"))
    this_df["NW_UN"] = clipped_vonb(epochs, *get_params(df, model, "NW_UN"))
    this_df["NW_AMB"] = clipped_vonb(epochs, *get_params(df, model, "NW_AMB"))
    this_df["epoch"] = np.round(epochs, 3)
    this_df["code_name"] = model
    pdf = pd.concat([pdf, this_df], ignore_index=True)

### Export predicted value to Jay

In [None]:
pdf.melt(id_vars=["code_name", "epoch"]).merge(model_hp, on="code_name").to_csv(
    "predicted.csv"
)

### W vs. NW plot for predicted accuracy 

In [None]:
alt.Chart(pdf).mark_point().encode(
    x="HF_INC",
    y="NW_UN",
    color=alt.Color(
        "epoch:Q", scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1)),
    ),
    opacity=alt.value(0.2),
)

In [None]:
predicted = (
    alt.Chart(pdf)
    .mark_point()
    .encode(
        x="word",
        y="nonword",
        color=alt.Color(
            "epoch:Q", scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1)),
        ),
        opacity=alt.value(0.2),
    )
)

# Actual accuracy 

In [None]:
real_df = pd.read_csv("1250_sims.csv")
sdf = real_df.loc[
    (real_df.Measure == "Accuracy") & real_df.Type.isin(["HF_INC", "NW_UN"]),
]

pvt = sdf.pivot_table(index=["ID", "Trial.Scaled"], columns="Type").reset_index()

rdf = pd.DataFrame()
rdf["code_name"] = pvt.ID
rdf["epoch"] = np.round(pvt["Trial.Scaled"], 3)
rdf["word"] = pvt.Score.HF_INC
rdf["nonword"] = pvt.Score.NW_UN

In [None]:
actual = (
    alt.Chart(rdf)
    .mark_point()
    .encode(
        x="word",
        y="nonword",
        color=alt.Color(
            "epoch:Q", scale=alt.Scale(scheme="redyellowgreen", domain=(0, 1)),
        ),
        opacity=alt.value(0.2),
    )
)

In [None]:
actual | predicted

### Merge data sets

In [None]:
# Merge actual and predicted df
df = pdf.merge(rdf, on=["code_name", "epoch"])
df["d_word"] = df.word_x - df.word_y
df["d_nw"] = df.nonword_x - df.nonword_y

# Merge h-params
merge_df = real_df[["ID", "Trial.Scaled", "Hidden", "PhoHid", "Pnoise", "Epsilon"]]
merge_df = merge_df.rename(columns={"ID": "code_name", "Trial.Scaled": "epoch"})
df = df.merge(merge_df, on=["code_name", "epoch"])

In [None]:
alt.Chart(df).mark_point().encode(x="epoch", y="d_word", tooltip=["d_word"])

In [None]:
alt.Chart(df).mark_point().encode(x="epoch", y="d_nw", tooltip=["d_nw"])

In [None]:
df.d_word.mean()

In [None]:
df.d_nw.mean()