# RL Financial model
Uses PPO method to train the model and realistic market simulator on dataset prices

## Adding imports

In [None]:
import time, random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import util

from sklearn.preprocessing import MinMaxScaler
from util import DataManager
from env import MarketEnv, MarketEnvProvider
from model import TransformerModel
from agent import Agent, AgentMemory

%matplotlib inline

## Setting up the environment

In [None]:
seed = 42
np.set_printoptions(precision=3, suppress=True)
tf.keras.backend.clear_session()
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

## Preparing the dataset

### Reading initial OHLCV data

In [None]:
df = pd.read_csv(f'datasets/btcusdt_5m.csv')
df.drop('timestamp', axis='columns', inplace=True)
df.head(20)

### Adding indicators + Scaling data
Including different length RSI, MA, ATR, ADX and MACD indicators

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
df = pd.DataFrame(scaler.fit_transform(df), columns=['o', 'h', 'l', 'c', 'v'])

In [None]:
df['pct_change'] = (df['c'] - df['c'].shift(-1)) / df['c'].shift(-1)
df['rsi7'] = util.rsi(df, 7)
df['rsi14'] = util.rsi(df, 14)
df['rsi28'] = util.rsi(df, 28)
df['ma20'] = util.ma(df, 20)
df['ma30'] = util.ma(df, 30)
df['ma40'] = util.ma(df, 40)
df['atr7'] = util.atr(df, 7)
df['atr14'] = util.atr(df, 14)
df['atr28'] = util.atr(df, 28)
df['adx14'] = util.adx(df, 14)
df['adx28'] = util.adx(df, 28)
df['adx56'] = util.adx(df, 56)
df['macd12-26-9'] = util.macd(df, 12, 26, 9)

### Clenaing data from NANs, zeros and INF

In [None]:
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df = pd.DataFrame(scaler.fit_transform(df), columns=['o', 'h', 'l', 'c', 'v', 'pct_change', 'rsi7', 'rsi14', 'rsi28', 'ma20', 'ma30', 'ma40', 'atr7', 'atr14', 'atr28', 'adx14', 'adx28', 'adx56', 'macd12-26-9'])
df.dropna(inplace=True)
df = df[(df != 0).all(axis=1)]
df.head(20)

## Plotting indicators

In [None]:
start = 60000
end = start + 500
util.plot(df[start:end], main_color='lightcoral', indicators=[['cornflowerblue', 'MA20', df['ma20'][start:end]], ['b', 'MA30', df['ma30'][start:end]], ['midnightblue', 'MA40', df['ma40'][start:end]]], separated_indicators=[
    ['lightcoral', 'Percentage Change', -1, -1, df['pct_change'][start:end]],
    ['lightcoral', 'Volume', -1, -1, df['v'][start:end]],
    ['lightcoral', 'RSI7', 0, 1, df['rsi7'][start:end]],
    ['lightcoral', 'RSI14', 0, 1, df['rsi14'][start:end]],
    ['lightcoral', 'RSI28', 0, 1, df['rsi28'][start:end]],
    ['lightcoral', 'ATR7', 0, df['atr14'][start:end].max() * 1.5, df['atr7'][start:end]],
    ['lightcoral', 'ATR14', 0, df['atr14'][start:end].max() * 1.5, df['atr14'][start:end]],
    ['lightcoral', 'ATR28', 0, df['atr14'][start:end].max() * 1.5, df['atr28'][start:end]],
    ['lightcoral', 'ADX14', 0, 1, df['adx14'][start:end]],
    ['lightcoral', 'ADX28', 0, 1, df['adx28'][start:end]],
    ['lightcoral', 'ADX56', 0, 1, df['adx56'][start:end]],
    ['lightcoral', 'MACD12-26-9', -1, -1, df['macd12-26-9'][start:end]]
])

## Building agent
Building based on configuration and saved runtime data

In [None]:
data = DataManager(f'cache/info.json')

In [None]:
cfg = data.get_config()
initial_cap, env_size, timestamps, batch_size, n_epochs, n_memory_envs, episodes = 1000, cfg['env_size'], cfg['timestamps'], cfg['batch_size'], cfg['n_epochs'], cfg['n_memory_envs'], cfg['episodes'] # 64, 64, 32, 16, 16, 1024

In [None]:
provider = MarketEnvProvider(data=df, initial_capital=initial_cap, position_size=0.98, commission=0.001, timestamps=timestamps, env_size=env_size)
memory = AgentMemory(batch_size=batch_size, max_capacity=(env_size * n_memory_envs))
agent = Agent(input_shape=(timestamps, df.shape[1] + provider.get_env().get_additional_info_len()), n_actions=3, gamma=0.99, lr=0.001, gae_lambda=0.95, policy_clip=0.25, entropy_coef=0.01, batch_size=batch_size, n_epochs=n_epochs, memory=memory)

In [None]:
actor_weights_loc, critic_weights_loc = data.get_weights_loc()
if actor_weights_loc != -1:
    print('Weights found: injecting...')
    agent.inject_weights(actor_weights_loc, critic_weights_loc)
    print('Weights injected')

In [None]:
agent.info()

## Training

In [None]:
checkpoint_timeout = 1
offset = episodes - data.get_updated_config(checkpoint_timeout)['episodes']
for episode in range(episodes - offset):
    print(f'episode {offset + episode + 1} out of {episodes}')
    rewards_buf, cap_gain_buf = [], []
    for epoch in range(n_epochs):
        print(f'experience epoch {epoch + 1} out of {n_epochs}')
        env = provider.get_env()
        price_change, cap_change, rewards, actions, probabilities = agent.go(env, log=True)
        history = env.get_history()
        if epoch == 0:
            util.plot_trades(history, price_change, True)
            util.plot_trades(history, price_change, False)
            plt.figure(figsize=(20, 6))
            plt.plot(cap_change, color='lightcoral')
            plt.legend(['Capital Change'], loc='upper left')
            plt.grid(True)
            plt.show()
        util.plot_rewards_distribution(rewards, actions, colors=['indianred', 'mediumblue', 'forestgreen'], zero_value_percent=0.025, zero_value_line_color='k')
        util.plot_probabilities_distribution(probabilities, colors=['indianred', 'mediumblue', 'forestgreen'])
        rewards_buf.append(sum(rewards))
        cap_gain_buf.append((np.mean(cap_change) - initial_cap) / initial_cap)
        print(f'avg_reward={np.mean(rewards)}')
    actor_losses, critic_losses = agent.train(log=True)
    if episode % checkpoint_timeout == 0:
        actor, critic = agent.get_models()
        ts = int(time.time())
        print(f'saving checkpoint at episode {episode + 1} in timestamp {ts}')
        data.load_runtime(ts, np.mean(actor_losses), np.mean(critic_losses), actor, critic, rewards_buf, cap_gain_buf)
        print('checkpoint saved')
    plt.figure(figsize=(20, 6))
    plt.plot(actor_losses, color='g')
    plt.plot(critic_losses, color='r')
    plt.legend(['Actor Losses', 'Critic Losses'], loc='upper left')
    plt.grid(True)
    plt.show()

## Delete history

In [None]:
# data.delete_runtimes()