# Figure 1. Accuracy over training epoch

## Import libraries

In [2]:
import pandas as pd
import altair as alt
from helper import RawData

## Import part I data

In [3]:
raw = RawData("../../data/data_part1_1250.csv")

## Function for plotting figure 1

In [4]:
def fig1(sim_df:pd.DataFrame, font_size:int=18) -> alt.Chart:
    """Plot accuracy by condition and word type over epoch"""

    df = sim_df.copy()
    df["condition"] = df.cond.apply(lambda x: "NW" if x in ("NW_AMB", "NW_UN") else x)
    df = df.groupby(["epoch", "condition"]).mean().reset_index()

    return (
        alt.Chart(df)
        .mark_line()
        .encode(
            x=alt.X("epoch:Q", title="Sample (Mil.)"),
            y=alt.Y("score:Q", title="Accuracy", scale=alt.Scale(domain=(0, 1))),
            color=alt.Color(
                "condition:N",
                legend=alt.Legend(orient="bottom-right"),
                title="Condition",
            ),
            strokeDash=alt.condition(
                alt.datum.condition == "NW", alt.value([5, 5]), alt.value([0])
            ),
        )
        .configure_axis(labelFontSize=font_size, titleFontSize=font_size)
        .configure_header(labelFontSize=font_size, titleFontSize=font_size)
        .configure_legend(labelFontSize=font_size, titleFontSize=font_size)
    ).properties(width=400, height=300)

## Plotting figure 1 from part I raw data 

In [5]:
fig1(raw.df)

Figure 1. Accuracy over training epoch (in fractions of a million samples) including high-frequency consistent words (HF_CON), high-frequency inconsistent words (HF_INC), low-frequency consistent words (LF_CON), low-frequency inconsistent words (LF_INC), as well as overall nonwords (NW). 