# Figure 5. Nonword versus word accuracy (performance space) at different levels of control parameter settings

## Import libraries

In [None]:
import pandas as pd
import altair as alt
from helper import RawData, long_to_wide, parse_df, alt_diagonal, apply_font_size

## Import part II data (Expanded control parameter range)

In [None]:
raw = RawData("../../data/data_part2_1750.csv")
df = raw.get(epoch_less_than=0.3)
df = parse_df(df)
df = long_to_wide(df)

## Function for plotting figure 5

In [None]:
def plot_performance_space(df:pd.DataFrame) -> alt.Chart:
    """Plotting a performance space"""
    # Points
    base = (
        alt.Chart(df)
        .mark_circle(color="black", size=10)
        .encode(
            x=alt.X(f"score_word:Q", scale=alt.Scale(domain=(0, 1)), title="Word"),
            y=alt.Y(f"score_nonword:Q", scale=alt.Scale(domain=(0, 1)), title="Nonword"),
            tooltip=["epoch"],
        )
    )

    # LOESS Curve
    loess = base.transform_loess(
        "score_word", "score_nonword", bandwidth=0.4
    ).mark_line(color="black")

    # Color points to indicate epoch
    color_points = (
        base.mark_circle(size=200)
        .encode(x="mean_x:Q", y="mean_y:Q", color=alt.Color("color:N", scale=None))
        .transform_aggregate(
            mean_x="mean(score_word)",
            mean_y="mean(score_nonword)",
            groupby=["epoch"],
        )
        .transform_calculate(
            color="if(datum.epoch===0.05, 'red', if(datum.epoch === 0.3, 'green', ''))"
        )
    )

    return base + color_points + loess + alt_diagonal()

## Plotting figure 5 in selected control parameters settings

In [None]:
plot_main = alt.hconcat()
for e in (.01, .002):
    for p in (0, 8):    
        plot_column = alt.vconcat()
        for h in (250, 50):
            # Subset by control parameters setting
            this_df = df.loc[(df.learning_rate == e) & (df.p_noise==p) & (df.hidden_units==h)]
            # Plot a single figure
            this_plot = plot_performance_space(this_df).properties(title=f"Pnoise={p}, HU={h}, Epsilon={e}")
            # Combine with previous plots
            plot_column &= this_plot

        plot_main |= plot_column

apply_font_size(plot_main, 18)