In [None]:
from hytea.utils import DotDict

from pathlib import Path
from yaml import safe_load
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter

### Set global variables

In [None]:
with open(Path() / 'hytea' / 'config.yaml', 'r') as f:
    CFG = DotDict.from_dict(safe_load(f))

In [None]:
DATA = Path() / 'results' / 'data'
PLOTS = Path() / 'results' / 'plots'

PLOTS.mkdir(parents=True, exist_ok=True)

### Define function to load data

In [None]:
def load_data(name: str) -> tuple[pd.DataFrame, pd.DataFrame]:
    """ Get both dataframes for a given experiment name.

    First merges all training csv files into one dataframe.
    Then merges all summary csv files into one dataframe.
    Finally, returns both dataframes.
    """

    # check how many csv files there are for this experiment
    n = len(list(DATA.glob(f'{name}*tr.csv')))
    trdfs = []
    for i in range(1, n+1):
        trdfs.append(pd.read_csv(DATA / f'{name}{i}tr.csv', index_col=0))

    trdf = pd.concat(trdfs, axis=1)
    # filter out columns with __MIN in the name
    trdf = trdf.loc[:, ~trdf.columns.str.contains('__MIN')]
    # filter out columns with __MAX in the name
    trdf = trdf.loc[:, ~trdf.columns.str.contains('__MAX')]

    # remove " - train_reward" from the column names
    trdf.columns = trdf.columns.str.replace(' - train_reward', '')

    # transpose the dataframe
    trdf = trdf.T
    trdf.index.name = 'runID'

    sdfs = []
    for i in range(1, n+1):
        sdfs.append(pd.read_csv(DATA / f'{name}{i}s.csv', index_col=0))
    
    sdf = pd.concat(sdfs, axis=0)

    # create df with only the columns we are interested in
    sdf = sdf.loc[:, ['agent.bl_sub', 'agent.ent_reg_weight', 'agent.gamma', 'group_name', 'network.hidden_activation', 'network.hidden_size', 'network.num_layers', 'optimizer.lr', 'optimizer.lr_decay', 'optimizer.lr_step', 'test_reward']]

    sdf.index.name = 'runID'

    return trdf, sdf

### Define function to plot global rewards

In [None]:
def plot_rewards(
    grouper: pd.Grouper, df_tr: pd.DataFrame, df_s: pd.DataFrame,
    title: str, max: bool = False
) -> plt.Figure:
    """
    Plot the rewards on a global episode scale.
    If max is True, the maximum reward is plotted instead of the mean.
    """

    dfs: list[pd.DataFrame] = []
    for name in grouper.groups.keys():
        dfs.append(df_tr.loc[df_s.loc[df_s['group_name'] == name].index])

    fig, ax = plt.subplots(figsize=(10, 5))

    l = dfs[0].shape[1]

    test_points = []

    for i, df in enumerate(dfs):
        data = df.mean(axis=0) if not max else df.max(axis=0)
        smooth_data = savgol_filter(data, 51, 3)
        ax.plot(np.arange(i*l+1, (i+1)*l+1), data, alpha=0.5, color='tab:blue', zorder=1)
        ax.plot(np.arange(i*l+1, (i+1)*l+1), smooth_data, color='tab:blue', zorder=2)

        # take average/max test reward
        test_rewards = df_s.loc[df.index, 'test_reward']
        data_point = test_rewards.mean() if not max else test_rewards.max()
        ax.scatter((i+1)*l, data_point, color='tab:red', marker='x', s=100, zorder=4)
        test_points.append(data_point)

    # plot the average/max test reward
    smoothed_test_points = savgol_filter(test_points, 5, 2)
    ax.plot(np.arange(1, len(test_points)+1)*l, smoothed_test_points, linestyle='--', color='tab:red', zorder=3)

    ax.set_xlabel('global episode')
    ax.xaxis.set_major_locator(plt.MaxNLocator(10))
    ax.xaxis.set_major_formatter(lambda x, pos: f'{x/1000:.1f}k')

    ax.legend(handles=[
        plt.Line2D([0], [0], color='tab:blue', label='train reward'),
        plt.Line2D([0], [0], color='tab:red', label='test reward'),
    ])

    ax.set_ylabel('avg. reward' if not max else 'max. reward')
    ax.set_title(title + (' (max)' if max else ' (avg)'))
    fig.tight_layout()

    return fig

### Load data

In [None]:
db = {}
for name in ['ab', 'cp', 'll']:
    trdf, sdf = load_data(name)
    db[name] = {'tr': trdf, 's': sdf}

DB: DotDict[str, DotDict[str, pd.DataFrame]] = DotDict.from_dict(db)

### Plot global rewards

In [None]:
fig = plot_rewards(DB.ab.s.groupby('group_name'), DB.ab.tr, DB.ab.s, 'AcroBot-v1')
fig.savefig(PLOTS / 'ab_avg_rewards.png', dpi=500)

fig = plot_rewards(DB.cp.s.groupby('group_name'), DB.cp.tr, DB.cp.s, 'CartPole-v1')
fig.savefig(PLOTS / 'cp_avg_rewards.png', dpi=500)

fig = plot_rewards(DB.ll.s.groupby('group_name'), DB.ll.tr, DB.ll.s, 'LunarLander-v2')
fig.savefig(PLOTS / 'll_avg_rewards.png', dpi=500)

In [None]:
fig = plot_rewards(DB.ab.s.groupby('group_name'), DB.ab.tr, DB.ab.s, 'AcroBot-v1', max=True)
fig.savefig(PLOTS / 'ab_max_rewards.png', dpi=500)

fig = plot_rewards(DB.cp.s.groupby('group_name'), DB.cp.tr, DB.cp.s, 'CartPole-v1', max=True)
fig.savefig(PLOTS / 'cp_max_rewards.png', dpi=500)

fig = plot_rewards(DB.ll.s.groupby('group_name'), DB.ll.tr, DB.ll.s, 'LunarLander-v2', max=True)
fig.savefig(PLOTS / 'll_max_rewards.png', dpi=500)

### Explore population diversity per generation

In [None]:
def print_occurrences_table(grouper: pd.Grouper, caption: str, label: str) -> None:    
    relevant_columns = [name for name in grouper.get_group('Gen1').columns if name not in ['test_reward', 'group_name']]

    df = pd.DataFrame(columns=relevant_columns)
    df.index.name = 'generation'

    occurences = []

    for _, group in grouper:
        # calculate the number of occurences each hyperparameter setting is present in the group
        occ = {param: {setting: 0 for setting in group[param].unique()} for param in relevant_columns}
        for _, row in group.iterrows():
            for param in relevant_columns:
                occ[param][row[param]] += 1
        occurences.append(occ)
    
    tab = '    '

    print(r'\begin{table*}[htbp]')
    print(tab + r'\centering')
    print(tab + r'\begin{tabular}')
    print(tab * 2 + r'{|c|' + 'c' * len(grouper) + '|}')
    print(tab * 2 + r'\toprule')
    print(tab * 2 + r'\textbf{param - value} & ' + ' & '.join([f'\\textbf{{{gen}}}' for gen in range(1, len(grouper)+1)]) + r'\\')
    print(tab * 2 + r'\midrule')
    # loop over occureneces with keys that start with 'agent.'
    for param in relevant_columns:
        for val in eval(f'CFG.{param}.vals'):
            parsed_param = param.replace('_', ' ').replace('agent.', '').replace('network.', '').replace('optimizer.', '').replace('hidden ', '')
            print(tab * 2 + f'{parsed_param} - {val}', end=' ')
            for i in range(len(grouper)):
                try:
                    print(f'& {occurences[i][param][val] // 2}', end=' ')
                except KeyError:
                    print(f'& {0}', end=' ')
            print(r'\\')
        print(tab * 2 + r'\midrule')
    print(tab * 2 + r'\bottomrule')
    print(tab + r'\end{tabular}')
    print(tab + rf'\caption{{{caption}}}')
    print(tab + rf'\label{{{label}}}')
    print(r'\end{table*}')

In [None]:
print_occurrences_table(DB.ab.s.groupby('group_name'), 'AcroBot-v1', 'tab:ab_occ')

In [None]:
print_occurrences_table(DB.cp.s.groupby('group_name'), 'CartPole-v1', 'tab:cp_occ')

In [None]:
print_occurrences_table(DB.ll.s.groupby('group_name'), 'LunarLander-v2', 'tab:ll_occ')