In [1]:
import os.path
from glob import glob

import numpy as np
import pandas as pd

from base_rl.eval_policy import EvalDiscreteStatePolicy
from benchmarks.policy_benchmarks import PolicyBenchmarksParallel
from envs.env_creator import env_creator
from experiments.eval_configs import ExperimentsEvaluationConfigs

In [2]:
device = 'cpu'
steps_per_episode = 1000


# Load All Polices
load all the saved polices from the experiment folder

In [3]:
policy_files = glob(f'tmp/experiments/**/*policy.pkl', recursive=True)
len(policy_files)

168

# Build Configs
The configs object contains the metadata to test a policy. Which contains experiment id, model used to generate the policy, model path and policy path.

In [4]:


eval_configs = []
for policy_path in policy_files:
    trimmed_path = policy_path.split('tmp/experiments/')[-1]
    exp_id = trimmed_path.split('/')[0]
    model_name = trimmed_path.split('/')[2]
    model_path = os.path.join('tmp', 'experiments', exp_id, 'state_quantization', model_name)
    eval_config = ExperimentsEvaluationConfigs(experiment_id=exp_id, model_name=model_name, model_path=model_path,
                                               policy_path=policy_path)
    eval_configs.append(eval_config)

# Benchmark all policies
This step build the evaluator objects and benchmark the policies in parallel

In [None]:


benchmarks = {}
evaluators = []
for eval_config in eval_configs:
    env_kwargs = {'steps_per_episode': steps_per_episode, 'device': device,
                  'model_path': eval_config.model_path}
    evaluator = EvalDiscreteStatePolicy(policy=eval_config.get_policy(), env_creator=env_creator, env_kwargs=env_kwargs,
                                        tag=eval_config.policy_path)
    evaluators.append(evaluator)

policy_benchmarks = PolicyBenchmarksParallel(evaluators=evaluators, epochs=10, pool_size=4)
policy_benchmarks.benchmark()

  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  8%|▊         | 14/168 [03:39<44:15, 17.25s/it]  

In [None]:

from tbparse import SummaryReader

results = []
offline_algos = ['rmin', 'policy_iteration']
scalars = SummaryReader('tmp/experiments', event_types={'scalars'}, extra_columns={'dir_name'}).scalars

# Build and Save Results Dataframe

In [None]:
from tensorboard_utils.scalar import get_last_scalar, get_first_scalar


df_save_path = 'tmp/evaluation_results'

for evaluator in policy_benchmarks.evaluated_evaluators:
    trimmed_path = evaluator.tag.split('tmp/experiments/')[-1]
    exp_id = trimmed_path.split('/')[0]
    model_name = trimmed_path.split('/')[2]
    algo = trimmed_path.split('/')[1]
    dataset_size = int(trimmed_path.split('/')[3]) if algo in offline_algos else None
    save_type = None if algo in offline_algos else trimmed_path.split('/')[-1].split('_policy')[0]
    trained_model = False if 'untrained' in model_name else True
    model_path = os.path.join('tmp', 'experiments', exp_id, 'state_quantization', model_name)
    model_loss = get_first_scalar(
        scalars=scalars,
        exp_id=exp_id,
        exp_type='forecasting_models',
        model=model_name.split('untrained_')[-1],
        scalar_tag='Forecasting/Eval/loss'
    ) if 'untrained' in model_name else \
        get_last_scalar(
            scalars=scalars,
            exp_id=exp_id,
            exp_type='forecasting_models',
            model=model_name,
            scalar_tag='Forecasting/Eval/loss')

    total_states = evaluator.policy.policy_table.size if algo in offline_algos else len(evaluator.policy.q_table)
    results.extend([
        {
            'exp_id': exp_id,
            'rewards': epoch_reward,
            'model_name': model_name,
            'algo': algo,
            'dataset_size': dataset_size,
            'save_type': save_type,
            'trained_model': trained_model,
            'unique_obs': np.unique(traj).size,
            'model_path': model_path,
            'model_loss': model_loss,
            'total_states': total_states
        } for epoch_reward, traj in zip(evaluator.eval_rewards_per_epoch, evaluator.eval_trajectories)]
    )

results_df = pd.DataFrame(results)
results_df = results_df[(results_df['save_type'] == 'x_interval') | (results_df['save_type'].isna())]
results_df.to_pickle(df_save_path)
results_df