In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
for gpu in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

from keras import layers, models

from ConvertCSV import DataReady
from FinRockDataFeeder import PdDataFeeder
from FinRockTradingEnv import TradingEnv, ActionSpace
from FinRockScaler import ZScoreScaler
from FinRockReward import AccountValueChangeReward
from FinRockMetrics import DifferentActions, AccountValue, MaxDrawdown, SharpeRatio
from Indicators import BolingerBands, RSI, PSAR, SMA, MACD

from rockrl.utils.misc import MeanAverage
from rockrl.utils.memory import MemoryManager
from rockrl.tensorflow import PPOAgent
from rockrl.utils.vectorizedEnv import VectorizedEnv

2024-04-03 08:04:06.174361: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-03 08:04:06.218537: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-03 08:04:06.219232: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
df = pd.read_csv("/home/zymantas/Desktop/Training_data/5minData/ATOMUSDT5min.csv")

DataReady(df)

df = df[:-1000] # leave 1000 for testing

                timestamp    open    high     low   close   volume
0     2024-03-11 02:00:00  13.190  13.233  13.179  13.219  10036.8
1     2024-03-11 02:05:00  13.219  13.276  13.217  13.243  20072.7
2     2024-03-11 02:10:00  13.243  13.262  13.201  13.262   8939.3
3     2024-03-11 02:15:00  13.262  13.264  13.203  13.205   9896.9
4     2024-03-11 02:20:00  13.205  13.209  13.150  13.167   8269.5
...                   ...     ...     ...     ...     ...      ...
6529  2024-04-02 19:05:00  10.943  11.030  10.931  11.013  22089.6
6530  2024-04-02 19:10:00  11.013  11.031  10.992  11.031  16393.6
6531  2024-04-02 19:15:00  11.031  11.031  10.950  10.976  10379.1
6532  2024-04-02 19:20:00  10.976  10.976  10.920  10.959  12796.1
6533  2024-04-02 19:25:00  10.959  10.959  10.939  10.949   2230.7

[6534 rows x 6 columns]


In [3]:
pd_data_feeder = PdDataFeeder(
    df,
    indicators = [
        BolingerBands(data=df, period=20, std=2),
        RSI(data=df, period=14),
        PSAR(data=df),
        MACD(data=df),
        SMA(data=df, period=7),
    ]
)

In [4]:
num_envs = 10
env = VectorizedEnv(
    env_object = TradingEnv,
    num_envs = num_envs,
    data_feeder = pd_data_feeder,
    output_transformer = ZScoreScaler(),
    initial_balance = 1000.0,
    max_episode_steps = 1000,
    window_size = 50,
    reward_function = AccountValueChangeReward(),
    action_space = ActionSpace.CONTINUOUS,
    metrics = [
        DifferentActions(),
        AccountValue(),
        MaxDrawdown(),
    ]
)

action_space = env.action_space
input_shape = env.observation_space.shape

In [5]:
def actor_model(input_shape, action_space):
    input = layers.Input(shape=input_shape, dtype=tf.float32)
    x = layers.Flatten()(input)
    x = layers.Dense(512, activation='elu')(x)
    x = layers.Dense(256, activation='elu')(x)
    x = layers.Dense(64, activation='elu')(x)
    x = layers.Dropout(0.2)(x)
    action = layers.Dense(action_space, activation="tanh")(x)
    sigma = layers.Dense(action_space)(x)
    sigma = layers.Dense(1, activation='sigmoid')(sigma)
    output = layers.concatenate([action, sigma]) # continuous action space
    return models.Model(inputs=input, outputs=output)

def critic_model(input_shape):
    input = layers.Input(shape=input_shape, dtype=tf.float32)
    x = layers.Flatten()(input)
    x = layers.Dense(512, activation='elu')(x)
    x = layers.Dense(256, activation='elu')(x)
    x = layers.Dense(64, activation='elu')(x)
    x = layers.Dropout(0.2)(x)
    output = layers.Dense(1, activation=None)(x)
    return models.Model(inputs=input, outputs=output)


agent = PPOAgent(
    actor = actor_model(input_shape, action_space),
    critic = critic_model(input_shape),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.00005),
    batch_size=128,
    lamda=0.95,
    kl_coeff=0.5,
    c2=0.01,
    writer_comment='ppo_sinusoid',
    action_space="continuous",
)
pd_data_feeder.save_config(agent.logdir)
env.env.save_config(agent.logdir)

Failed to save config to file


In [6]:
memory = MemoryManager(num_envs=num_envs)
meanAverage = MeanAverage(best_mean_score_episode=1000)
states, infos = env.reset()
rewards = 0.0
while True:
    action, prob = agent.act(states)

    next_states, reward, terminated, truncated, infos = env.step(action)
    memory.append(states, action, reward, prob, terminated, truncated, next_states, infos)
    states = next_states

    for index in memory.done_indices():
        env_memory = memory[index]
        history = agent.train(env_memory)
        mean_reward = meanAverage(np.sum(env_memory.rewards))

        if meanAverage.is_best(agent.epoch):
            agent.save_models('ppo_sinusoid')

        if history['kl_div'] > 0.2 and agent.epoch > 1000:
            agent.reduce_learning_rate(0.995, verbose=False)

        info = env_memory.infos[-1]
        print(agent.epoch, np.sum(env_memory.rewards), mean_reward, info["metrics"]['account_value'], history['kl_div'])
        agent.log_to_writer(info['metrics'])
        states[index], infos[index] = env.reset(index=index)

    if agent.epoch >= 20000:
        break

env.close()
exit()

1 -0.004384427442823635 -0.004384427442823635 993.5058660373886 0.47585535
2 -0.016554312786515972 -0.010469370114669804 981.3367469436496 0.4507783
3 0.007013673151919041 -0.004641689025806855 1004.9443111603944 0.43849286
4 -0.07257887777067439 -0.02162598621202374 927.6663340156654 0.43192577
5 -0.05110487861864464 -0.027521764693347918 948.2513135482995 0.41806927
6 -0.0056696171654057395 -0.023879740105357553 992.3276692400724 0.43470556
7 0.0050238260730619926 -0.019750659222726192 1003.0871878400271 0.42156616
8 -0.01799760052461267 -0.019531526885462003 979.7083838197415 0.42614332
9 0.0015619255237801326 -0.017187809951101765 999.7398497182492 0.41109324
10 -0.07723196162169456 -0.023192225118161047 923.48336106292 0.44688314
11 -0.10580866199758213 -0.030702810289017505 897.5005796009834 0.3340033
12 -0.07041825190663145 -0.034012430423818664 929.8987532729063 0.36458513
13 -0.08377531743664132 -0.03784034480942041 917.068395122879 0.3444623
14 -0.0775047680448277 -0.04067351