In [2]:
import json
import os
import re
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import ray

from rl_trading.simulation.env import StockExchangeEnv0
from rl_trading.data.indicators import *

root_dir = '../exp_results/baseline_correct'

In [3]:
state_config = {
        'market_state': ['vwap'],
        'technical_indicators': [
            (RPC, {}, '1min'),
            (EMA, dict(timeperiod=5, normalize=True), '1min'),
            (EMA, dict(timeperiod=13, normalize=True), '1min'),
            (RSI, dict(timeperiod=7, normalize=True), '1min'),
            (BBANDS, dict(timeperiod=10), '1min'),
            (EMA, dict(timeperiod=20, normalize=True), '1h'),
            (EMA, dict(timeperiod=50, normalize=True), '1h'),
            (RSI, dict(timeperiod=14, normalize=True), '1h'),
            (BBANDS, dict(timeperiod=20), '1h'),
            (MACD_DIFF, dict(fastperiod=12, slowperiod=26, signalperiod=9, normalize=True), '1h'),
            (EMA, dict(timeperiod=50, normalize=True), '1d'),
            (EMA, dict(timeperiod=200, normalize=True), '1d'),
            (RSI, dict(timeperiod=14, normalize=True), '1d'),
            (BBANDS, dict(timeperiod=20), '1d'),
            (MACD_DIFF, dict(fastperiod=12, slowperiod=26, signalperiod=9, normalize=True), '1d'),
        ]}

In [4]:
from rl_trading.utils import load_model

def load_checkpoints():
    checkpoints = {}
    for alg_name in os.listdir(root_dir):
        checkpoints[alg_name] = {}
        for exp_name in os.listdir(os.path.join(root_dir, alg_name)):
            for result_dir in os.listdir(os.path.join(root_dir, alg_name, exp_name)):
                if not 'steps' in result_dir:
                    continue

                n_steps = int(re.findall('steps=([0-9]+)', result_dir)[0])

                for file in os.listdir(os.path.join(root_dir, alg_name, exp_name, result_dir)):
                    if 'checkpoint' in file:
                       checkpoints[alg_name][n_steps] = os.path.join(root_dir, alg_name, exp_name, result_dir, file)
    return checkpoints

def evaluate_policies(checkpoints):
    for alg_name in checkpoints:
        for n_steps, checkpoint in checkpoints[alg_name].items():
            model = load_model(checkpoint)

            sim_env = StockExchangeEnv0(
                sim_config={'max_steps': n_steps},
                state_config=state_config,
                _n_days=1,
                seed=42
            )
            eval_env = StockExchangeEnv0(
                sim_config={'max_steps': n_steps},
                state_config=state_config,
                _n_days=1,
                seed=42
            )

            done = False
            reward_total = 0
            baseline_reward = 0
            state, _ = sim_env.reset()
            while not done:
                current_idx = eval_env.current_idx
                current_price = eval_env.price_data['1min'][current_idx]
                next_price = eval_env.price_data['1min'][current_idx + 1]
                if next_price > current_price:
                    action = 1
                elif next_price < current_price:
                    action = 2
                else:
                    action = 0
                _, reward, done, _, _ = eval_env.step(action)
                baseline_reward += reward

                action = model.compute_single_action(state, explore=False)
                state, reward, done, _, _ = sim_env.step(action)
                reward_total += reward
            print('n_steps:', n_steps)
            print('baseline_reward:', baseline_reward)
            print('reward_total:', reward_total)
            del model

In [5]:
checkpoints = load_checkpoints()
evaluate_policies(checkpoints)

2023-06-16 00:17:45,019	INFO worker.py:1616 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
2023-06-16 00:18:00,924	INFO trainable.py:172 -- Trainable.setup took 17.769 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


n_steps: 30
baseline_reward: 43.69773366187292
reward_total: 14.533179177537022




n_steps: 10
baseline_reward: 23.910725800918954
reward_total: 23.32282505819785




n_steps: 5
baseline_reward: 3.312600487506643
reward_total: 3.312600487506643


2023-06-16 00:18:31,217	INFO trainable.py:172 -- Trainable.setup took 10.095 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


n_steps: 60
baseline_reward: 111.7008164300205
reward_total: 80.91736596007468


2023-06-16 00:18:41,635	INFO trainable.py:172 -- Trainable.setup took 10.163 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


n_steps: 120
baseline_reward: 206.08981530180426
reward_total: 129.2678949645051




n_steps: 300
baseline_reward: 631.5881569909507
reward_total: 254.78855158085207


2023-06-16 00:19:03,214	INFO trainable.py:172 -- Trainable.setup took 10.063 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


n_steps: 600
baseline_reward: 1179.012786375477
reward_total: 271.1943010224495
