In [1]:
import pfrl
import torch.nn.functional as F
import torch.nn as nn
import torch
import random
import numpy
import datetime
import sys




In [2]:
from stocknet.envs.bc_env import BC5Env
from stocknet.envs.market_clients.csv.client import CSVClient

In [3]:
dtype = torch.float32
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device:", device)

device: cuda:0


In [4]:
data_client = CSVClient('../../data_source/bitcoin_5_2017T0710-2021T103022.csv')
env = BC5Env(data_client, columns=["Open", "High", "Low", "Close"], useBudgetColumns=True)

In [6]:
import torch
import torch.nn as nn
from torch.optim import SGD
import math
import numpy as np

class PredictorSimple(nn.Module):
    def __init__(self, size, inputDim, n_actions, removeHistoryData = True):
        super().__init__()
        self.size = size
        self.rhd = removeHistoryData
        self.inDim = inputDim
        self.ActionHistoryDim = 3
        self.conv1 = nn.Conv1d(inputDim, inputDim*3, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(inputDim*3, inputDim*2, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(inputDim*2*size, inputDim*size)
        self.fc2 = nn.Linear(inputDim*size, size)
        self.output_layer = nn.Linear(size, n_actions)
        self.softmax = nn.Softmax(1)

    def forward(self, inputs):
        batch_size, feature_len, seq_len  = inputs.shape[0], inputs.shape[1],inputs.shape[2]
        if self.rhd:
            out = inputs[:,0: feature_len - self.ActionHistoryDim, :]
        else:
            out = inputs
        out = torch.tanh(self.conv1(out))
        out = torch.tanh(self.conv2(out))
        out = out.view(-1, self.inDim*2*self.size)
        out = torch.tanh(self.fc1(out))
        out = torch.tanh(self.fc2(out))
        out = self.output_layer(out)
        return pfrl.action_value.DiscreteActionValue(out)

In [7]:
training_size = 1000 #traning dataのデータ数
epochs_num = 10000 #traningのepoch回数
hidden_size = 500 #LSTMの隠れ層の次元数
batch_size = 32

In [8]:
obs = env.reset()
inputDim = obs.shape[0]
size = obs.shape[1]

In [9]:
obs.shape

(6, 28)

In [10]:
model = PredictorSimple(size, inputDim, 3, False) #modelの宣言

#model = Predictor(6, hidden_size, 5) #modelの宣言
criterion = nn.MSELoss() #評価関数の宣言

In [16]:
#optimizer = SGD(model.parameters(), lr=0.0001) #最適化関数の宣言
optimizer = torch.optim.Adam(model.parameters(), eps=1e-4)
# Set the discount factor that discounts future rewards.
gamma = 0.9

# Use epsilon-greedy for exploration
explorer = pfrl.explorers.ConstantEpsilonGreedy(
    epsilon=0.1, random_action_func=env.action_space.sample)

# DQN uses Experience Replay.}
# Specify a replay buffer and its capacity.
replay_buffer = pfrl.replay_buffers.ReplayBuffer(capacity=10**2)

# Since observations from CartPole-v0 is numpy.float64 while
# As PyTorch only accepts numpy.float32 by default, specify
# a converter as a feature extractor function phi.
phi = lambda x: x.astype(numpy.float32, copy=False)

# Set the device id to use GPU. To use CPU only, set it to -1.
gpu = 0

# Now create an agent that will interact with the environment.
agent = pfrl.agents.DoubleDQN(
    model,
    optimizer,
    replay_buffer,
    gamma,
    explorer,
    replay_start_size=40,
    update_interval=5,
    target_update_interval=20,
    phi=phi,
    gpu=gpu,
)

In [17]:
n_episodes = 500
max_step_len = 10000
mr = 0
pl = 0
print(datetime.datetime.now(),'start episodes')
for i in range(1, n_episodes + 1):
    obs = env.reset()
    #obs = obs.to('cpu').detach().numpy().copy()
    R = 0  # return (sum of rewards)
    t = 0  # time step
    while True:
        # Uncomment to watch the behavior in a GUI window
        #env.render()
        action = agent.act(obs)
        obs, reward, done, ops = env.step(action)
        #obs = obs.to('cpu').detach().numpy().copy()
        R += reward
        t += 1
        reset = t == max_step_len
        agent.observe(obs, reward, done, reset)
        if reset:
            print("max steps")
            break
        elif done:
            break
    if i % 10 == 0:
        print('statistics:', agent.get_statistics(), 'R:', R/t, 'Mean R:', mr/10, 'PL:', env.pl, 'Mean PL:', pl/10)
        env.render()
        mr = 0
        pi = 0
    else:
        pl += env.pl
        mr += R/t
print('Finished.')

2022-03-16 15:02:43.819698 start episodes
max steps
max steps
max steps
statistics: [('average_q', 31.767239), ('average_loss', 3.699133024215698), ('cumulative_steps', 38481), ('n_updates', 7689), ('rlen', 100)] R: -0.019442876537156362 Mean R: 1.0954418741442282 PL: -1.003199271297293 Mean PL: 33688.07650264995
pl: -1.003199271297293
max steps
max steps
statistics: [('average_q', 52.23422), ('average_loss', 6.085895175933838), ('cumulative_steps', 62283), ('n_updates', 12449), ('rlen', 100)] R: -0.011851847258422242 Mean R: 1.575044573494469 PL: -7.951915339945941 Mean PL: 83541.00820695575
pl: -7.951915339945941
max steps
statistics: [('average_q', 0.14172278), ('average_loss', 0.0002792327536371886), ('cumulative_steps', 91365), ('n_updates', 18266), ('rlen', 100)] R: -0.0014808287719497147 Mean R: 0.30175289874105715 PL: -1.0770170204134135 Mean PL: 90077.56734440688
pl: -1.0770170204134135
max steps
statistics: [('average_q', 33.415085), ('average_loss', 2.8401054835319517), ('cu

max steps
statistics: [('average_q', 16.425484), ('average_loss', 1.192324856519699), ('cumulative_steps', 797678), ('n_updates', 159528), ('rlen', 100)] R: -0.0034147670785224897 Mean R: 0.19861003857973752 PL: -1.2109794838637773 Mean PL: 781185.7237962111
pl: -1.2109794838637773
max steps
max steps
max steps
max steps
statistics: [('average_q', 42.944096), ('average_loss', 5.816650772094727), ('cumulative_steps', 842691), ('n_updates', 168531), ('rlen', 100)] R: 4.296956452594272 Mean R: 1.1496184731671397 PL: 139141.87127407407 Mean PL: 814484.040848593
pl: 139141.87127407407
max steps
max steps
statistics: [('average_q', 37.161118), ('average_loss', 3.234438171386719), ('cumulative_steps', 877409), ('n_updates', 175474), ('rlen', 100)] R: -0.00203714998718376 Mean R: 1.6643493558379536 PL: -1.2616271241567278 Mean PL: 863773.6777722139
pl: -1.2616271241567278
max steps
max steps
statistics: [('average_q', 9.384017), ('average_loss', 0.46726679474115373), ('cumulative_steps', 90584

In [18]:
torch.save(model.state_dict(), 'bc_rl_5min_macd-bolinger_with_budgets_v1')

In [12]:
model.load_state_dict(torch.load('fx_rl_5min_macd_v1', map_location=device))

<All keys matched successfully>

### DoubleDQN with LSTM

In [5]:
data_client = CSVClient()
env = BC5Env(data_client, columns=["macd"], useBudgetColumns=True, featureFirst=False,use_diff=True)

In [6]:
training_size = 1000 #traning dataのデータ数
epochs_num = 10000 #traningのepoch回数
hidden_size = 500 #LSTMの隠れ層の次元数
batch_size = 32

In [7]:
obs = env.reset()
size = obs.shape[0]
inputDim = obs.shape[1]

In [8]:
inputDim

6

In [9]:
import torch
import torch.nn as nn
from torch.optim import SGD
import math
import numpy as np

class PredictorLSTM(nn.Module):
    def __init__(self, inputDim, hiddenDim,  n_actions):
        super(PredictorLSTM, self).__init__()
        self.ActionHistoryDim = 2
        self.rnn = nn.LSTM(input_size = inputDim - self.ActionHistoryDim,
                            hidden_size = hiddenDim,
                            batch_first=True)
        self.rnn.to(device)
        self.output_layer = nn.Linear(hiddenDim, n_actions)#+self.ActionHistoryDim, n_actions)
        self.output_layer.to(device)
    
    def forward(self, inputs, hidden0=None):
        batch_size, seq_len, feature_len = inputs.shape[0], inputs.shape[1],inputs.shape[2]
        ohlc_inputs = inputs[:,:, 0: feature_len - self.ActionHistoryDim]
        last_actions = inputs[:, -1, -self.ActionHistoryDim:] # [1, ActionHistoryDim] (ex.torch.Size([1, 3]))
        output, (hidden, cell) = self.rnn(ohlc_inputs, hidden0) #LSTM層
        output = output[:, -1, :] # [1, hidden_size] (ex. torch.Size([1, 50]))
        #output = torch.cat((output, last_actions), dim=1) #[1, hidden_size+ActionHistoryDim] (ex.torch.Size([1, 53]))
        output = self.output_layer(output) #全結合層
        return pfrl.action_value.DiscreteActionValue(output)

In [10]:
model = PredictorLSTM(inputDim, 50, 3) #modelの宣言
criterion = nn.MSELoss() #評価関数の宣言

In [11]:
#optimizer = SGD(model.parameters(), lr=0.0001) #最適化関数の宣言
optimizer = torch.optim.Adam(model.parameters(), eps=1e-3)
# Set the discount factor that discounts future rewards.
gamma = 0.9

# Use epsilon-greedy for exploration
explorer = pfrl.explorers.ConstantEpsilonGreedy(
    epsilon=0.1, random_action_func=env.action_space.sample)

# DQN uses Experience Replay.
# Specify a replay buffer and its capacity.
replay_buffer = pfrl.replay_buffers.ReplayBuffer(capacity=10**2)

# Since observations from CartPole-v0 is numpy.float64 while
# As PyTorch only accepts numpy.float32 by default, specify
# a converter as a feature extractor function phi.
phi = lambda x: x.astype(numpy.float32, copy=False)

# Set the device id to use GPU. To use CPU only, set it to -1.
gpu = 0

# Now create an agent that will interact with the environment.
agent = pfrl.agents.DoubleDQN(
    model,
    optimizer,
    replay_buffer,
    gamma,
    explorer,
    replay_start_size=40,
    update_interval=5,
    target_update_interval=20,
    phi=phi,
    gpu=gpu,
)

In [12]:
n_episodes = 5000
max_step_len = 10000
mr = 0
pl = 0
print(datetime.datetime.now(),'start episodes')
for i in range(1, n_episodes + 1):
    obs = env.reset()
    #obs = obs.to('cpu').detach().numpy().copy()
    R = 0  # return (sum of rewards)
    t = 0  # time step
    while True:
        # Uncomment to watch the behavior in a GUI window
        #env.render()
        action = agent.act(obs)
        obs, reward, done, ops = env.step(action)
        #obs = obs.to('cpu').detach().numpy().copy()
        R += reward
        t += 1
        reset = t == max_step_len
        agent.observe(obs, reward, done, reset)
        if reset:
            print("max steps")
            break
        elif done:
            break
    if i % 10 == 0:
        print('statistics:', agent.get_statistics(), 'R:', R/t, 'Mean R:', mr/10, 'PL:', env.pl, 'Mean PL:', pl/10)
        mr = 0
        pl = 0
    else:
        pl += env.pl
        mr += R/t
print('Finished.')

2022-04-01 22:51:43.079519 start episodes


KeyboardInterrupt: 

結果

損益がプラスの場合でもRewardがマイナスになっている。
Mean R: -350.2810497539654 PL: -25455.236926080022 Mean PL: 582725.8480107023

これよりも高いRewardだが、損益はマイナスとなるケースがある
Mean R: -192.19638108751334 PL: 2673622.0302433698 Mean PL: -19351.08942841183

stayによるrewardか、invalid_rewardのどちらか、あるいは両方が影響しているため、期待どおりの報酬となるように調査と修正が必要

In [15]:
torch.save(model.state_dict(), 'bc_rl_5min_macd_with_budget_LSTM_v2')

In [11]:
model.load_state_dict(torch.load('bc_rl_5min_macd_with_budget_LSTM_v1', map_location=device))

<All keys matched successfully>

改善点

hidden_sizeは隠れ層のFeatureSizeでござった

Task 1: num_layersが隠れ層の数なので、こちらを増やしてみる


replay bufferはバッファーされたExperience(state, action, rewards)からランダムにサンプリングして学習に用いる
このためLSTMのように前回の出力を入力に用いるモデルとは相性が悪いと思われる

ゲームのような場合、直ぐに詰んで同じ状況ばかりが入力になるので、Replay bufferによって入力をランダムにすることで、入力の状態がばらけて学習が安定するのだろうが、reset毎に初期位置をランダム化している現在のEnvの実装では、Replay bufferが無いほうがLSTMとしては都合が良さそうである

Task 2:Replay Bufferを無効にする方法はないだろうか

In [13]:
data_client = CSVClient()
env = BC5Env(data_client, columns=["macd"], useBudgetColumns=True, featureFirst=False,use_diff=True)

In [14]:
import torch
import torch.nn as nn
from torch.optim import SGD
import math
import numpy as np

class PredictorLSTM(nn.Module):
    def __init__(self, inputDim, hiddenDim, num_layers, n_actions):
        super(PredictorLSTM, self).__init__()
        self.ActionHistoryDim = 2
        self.rnn = nn.LSTM(input_size = inputDim - self.ActionHistoryDim,
                            hidden_size = hiddenDim,
                            batch_first=True,
                            num_layers=num_layers)
        self.rnn.to(device)
        self.output_layer = nn.Linear(hiddenDim, n_actions)#+self.ActionHistoryDim, n_actions)
        self.output_layer.to(device)
    
    def forward(self, inputs, hidden0=None):
        batch_size, seq_len, feature_len = inputs.shape[0], inputs.shape[1],inputs.shape[2]
        ohlc_inputs = inputs[:,:, 0: feature_len - self.ActionHistoryDim]
        last_actions = inputs[:, -1, -self.ActionHistoryDim:] # [1, ActionHistoryDim] (ex.torch.Size([1, 3]))
        output, (hidden, cell) = self.rnn(ohlc_inputs, hidden0) #LSTM層
        output = output[:, -1, :] # [1, hidden_size] (ex. torch.Size([1, 50]))
        #output = torch.cat((output, last_actions), dim=1) #[1, hidden_size+ActionHistoryDim] (ex.torch.Size([1, 53]))
        output = self.output_layer(output) #全結合層
        return pfrl.action_value.DiscreteActionValue(output)

In [15]:
model = PredictorLSTM(inputDim, inputDim,30, 3) #modelの宣言
criterion = nn.MSELoss() #評価関数の宣言

In [16]:
batch_size=10

In [17]:
#optimizer = SGD(model.parameters(), lr=0.0001) #最適化関数の宣言
optimizer = torch.optim.Adam(model.parameters(), eps=1e-3)
# Set the discount factor that discounts future rewards.
gamma = 0.9

# Use epsilon-greedy for exploration
explorer = pfrl.explorers.ConstantEpsilonGreedy(
    epsilon=0.1, random_action_func=env.action_space.sample)

# DQN uses Experience Replay.
# Specify a replay buffer and its capacity.
#replay_buffer = pfrl.replay_buffers.ReplayBuffer(capacity=batch_size)
replay_buffer = pfrl.replay_buffers.EpisodicReplayBuffer(capacity=batch_size)

# Since observations from CartPole-v0 is numpy.float64 while
# As PyTorch only accepts numpy.float32 by default, specify
# a converter as a feature extractor function phi.
phi = lambda x: x.astype(numpy.float32, copy=False)

# Set the device id to use GPU. To use CPU only, set it to -1.
gpu = 0

# Now create an agent that will interact with the environment.
agent = pfrl.agents.DoubleDQN(
    model,
    optimizer,
    replay_buffer,
    gamma,
    explorer,
    minibatch_size=batch_size,
    replay_start_size=batch_size,
    update_interval=1,
    target_update_interval=100,
    phi=phi,
    gpu=gpu
)

In [8]:
n_episodes = 5000
max_step_len = 10000
mr = 0
pl = 0
print(datetime.datetime.now(),'start episodes')
for i in range(1, n_episodes + 1):
    obs = env.reset()
    #obs = obs.to('cpu').detach().numpy().copy()
    R = 0  # return (sum of rewards)
    t = 0  # time step
    while True:
        # Uncomment to watch the behavior in a GUI window
        #env.render()
        action = agent.act(obs)
        obs, reward, done, ops = env.step(action)
        #obs = obs.to('cpu').detach().numpy().copy()
        R += reward
        t += 1
        reset = t == max_step_len
        agent.observe(obs, reward, done, reset)
        if reset:
            print("max steps")
            break
        elif done:
            break
    pl += env.pl
    mr += R/t
    if i % 10 == 0:
        print('statistics:', agent.get_statistics(), 'R:', R/t, 'Mean R:', mr/10, 'PL:', env.pl, 'Mean PL:', pl/10)
        mr = 0
        pl = 0
print('Finished.')

2022-03-29 19:57:36.389617 start episodes


NameError: name 'agent' is not defined

改善点

replay bufferはバッファーされたExperience(state, action, rewards)からランダムにサンプリングして学習に用いる
このためLSTMのように前回の出力を入力に用いるモデルとは相性が悪いと思われる

ゲームのような場合、直ぐに詰んで同じ状況ばかりが入力になるので、Replay bufferによって入力をランダムにすることで、入力の状態がばらけて学習が安定するのだろうが、reset毎に初期位置をランダム化している現在のEnvの実装では、Replay bufferが無いほうがLSTMとしては都合が良さそうである

Task 1: LSTMと同じだけの隠れ層（30）を持つ全結合グラフのモデルを試してLSTMの結果と比較する
MACDの場合、既に平滑化されているためConv層がいるか微妙であり、またLSTMのグラフもConv層はないはずなので、Conv層無しで試してみる。

Task 2:OHLCの差分配列にした際、値は-1から1の範囲に収まっているが、最大値は0.5程度となっており、他の指標（Volume等）と比べて、0.5の値の持つ意味（あるいは重み）が異なっていた。
MACDの場合はどうなっているか確認し、値の範囲が適切でなければ正規化を追加で、あるいは差分をやめて正規化のみを適用する

In [4]:
data_client = CSVClient('../../data_source/bitcoin_5_2017T0710-2021T103022.csv')
env = BC5Env(data_client, columns=["macd"], useBudgetColumns=True, use_diff=True)

In [5]:
import torch
import torch.nn as nn
from torch.optim import SGD
import math
import numpy as np

class PredictorMultiple(nn.Module):
    def __init__(self, layer_num, size, inputDim, n_actions, removeHistoryData = True):
        super().__init__()
        self.size = size
        self.rhd = removeHistoryData
        self.ActionHistoryDim = 2
        if removeHistoryData:
            input_dims = inputDim - self.ActionHistoryDim
        else:
            input_dims = inputDim
        self.layerDips = size * input_dims
        self.layers = nn.ModuleList()
        for i in range(0, layer_num):
            layer = nn.Linear(self.layerDips, self.layerDips)
            self.layers.append(layer)
        
        out_in_dims = self.layerDips
        if self.rhd:
            out_in_dims += self.ActionHistoryDim
        self.output_layer = nn.Linear( out_in_dims , n_actions)

    def forward(self, inputs):
        batch_size, feature_len,seq_len = inputs.shape[0], inputs.shape[1],inputs.shape[2]
        if self.rhd:
            feature_len = feature_len - self.ActionHistoryDim
            last_actions = inputs[:, -self.ActionHistoryDim:,-1] # [1, ActionHistoryDim] (ex.torch.Size([1, 3]))
        out = inputs[:, :feature_len, :]
        layerDips = feature_len * seq_len
        #assert layerDips == self.layerDips
        out = out.view(-1, layerDips)
        for layer in self.layers:
            out = torch.tanh(layer(out))
        if self.rhd:
            out = torch.cat((out, last_actions), dim=1)
        out = torch.tanh(self.output_layer(out))
        return pfrl.action_value.DiscreteActionValue(out)

In [6]:
obs = env.reset()
inputDim, size = obs.shape

In [7]:
size

28

In [8]:
batch_size = 2

In [9]:
model = PredictorMultiple(30,size, inputDim, 3, removeHistoryData=False) #modelの宣言
criterion = nn.MSELoss() #評価関数の宣言

In [10]:
#optimizer = SGD(model.parameters(), lr=0.0001) #最適化関数の宣言
optimizer = torch.optim.Adam(model.parameters(), eps=1e-7)
# Set the discount factor that discounts future rewards.
gamma = 0.9

# Use epsilon-greedy for exploration
explorer = pfrl.explorers.ConstantEpsilonGreedy(epsilon=0.1, random_action_func=env.action_space.sample)

# DQN uses Experience Replay.
# Specify a replay buffer and its capacity.
#replay_buffer = pfrl.replay_buffers.ReplayBuffer(capacity=batch_size)
replay_buffer = pfrl.replay_buffers.ReplayBuffer(capacity=batch_size)

# Since observations from CartPole-v0 is numpy.float64 while
# As PyTorch only accepts numpy.float32 by default, specify
# a converter as a feature extractor function phi.
phi = lambda x: x.astype(numpy.float32, copy=False)

# Set the device id to use GPU. To use CPU only, set it to -1.
gpu = 0

# Now create an agent that will interact with the environment.
agent = pfrl.agents.DoubleDQN(
    model,
    optimizer,
    replay_buffer,
    gamma,
    explorer,
    minibatch_size=batch_size,
    replay_start_size=batch_size,
    update_interval=1,
    target_update_interval=1000,
    phi=phi,
    gpu=gpu
)

In [11]:
import stocknet.logger as lg

In [12]:
n_episodes = 1000
max_step_len = 1000
mr = 0
pl = 0
ep_consumed_total_time = datetime.timedelta(0)
print(datetime.datetime.now(),'start episodes')

logger = lg.pt_logs(env, folder='logs/bc_rl_5min_macd_budgets_v2')
## show details
##
for i in range(1, n_episodes + 1):
    obs = env.reset()
    #obs = obs.to('cpu').detach().numpy().copy()
    R = 0  # return (sum of rewards)
    t = 0  # time step
    logs = []
    ep_start_time = datetime.datetime.now()
    while True:
        # Uncomment to watch the behavior in a GUI window
        #env.render()
        action = agent.act(obs)
        obs, reward, done, ops = env.step(action)
        R += reward
        t += 1
        reset = t == max_step_len
        agent.observe(obs, reward, done, reset)
        logger.store(obs, action, reward)
        if reset or done:
            break
    ep_end_time = datetime.datetime.now()
    ep_consumed_time = ep_end_time - ep_start_time
    ep_consumed_total_time += ep_consumed_time
    logger.save(i)
    pl += env.pl
    mr += R/t
    if i % 10 == 0:
        print('statistics:', agent.get_statistics(), 'R:', R/t, 'Mean R:', mr/10, 'PL:', env.pl, 'Mean PL:', pl/10)
        print(f"consumed time: {ep_consumed_time}, may end on :{ep_end_time + (n_episodes -i) *  ep_consumed_total_time/10}")
        ep_consumed_total_time = datetime.timedelta(0)
        mr = 0
        pl = 0
print(f'Finished on {datetime.datetime.now()}')

2022-04-09 12:57:27.476374 start episodes
statistics: [('average_q', -0.0063182074), ('average_loss', 0.00015820811412323998), ('cumulative_steps', 10000), ('n_updates', 9999), ('rlen', 2)] R: -0.0012459722706180524 Mean R: 0.00021950411447532937 PL: -0.6449722706180783 Mean PL: 0.5290041144753187
consumed time: 0:00:36.211206, may end on :2022-04-09 22:49:38.081821
statistics: [('average_q', -0.07034607), ('average_loss', 0.03429108972026346), ('cumulative_steps', 20000), ('n_updates', 19999), ('rlen', 2)] R: -0.0009424651942673742 Mean R: -0.0009292130255372219 PL: -0.0284651942673734 Mean PL: -0.050713025537221124
consumed time: 0:00:36.837387, may end on :2022-04-09 22:47:06.524317
statistics: [('average_q', -0.107270755), ('average_loss', 0.06716067193352378), ('cumulative_steps', 30000), ('n_updates', 29999), ('rlen', 2)] R: -0.0009338362112315788 Mean R: -0.0009381361457170194 PL: -0.02583621123157808 Mean PL: -0.035936145717018676
consumed time: 0:00:40.056035, may end on :2022

In [15]:
torch.save(model.state_dict(), 'bc_rl_5min_macd_50_with_budgets_v1')