In [None]:
import lib._util.visualplot as vp
import lib._util.fileproc as fp

In [None]:
# Environment
from lib._class.environment.ForexEnv import ForexEnv

# Agent
from lib._class.agent.Agent import Agent

# Agent (Matrix)
from lib._class.agent.QLearningAgent import QLearningAgent
from lib._class.agent.SarsaAgent import SarsaAgent
from lib._class.agent.SarsaLambdaAgent import SarsaLambdaAgent

# Agent (Neural Network)
from lib._class.agent.QNetworkAgent import QNetworkAgent
from lib._class.agent.DQNAgent import DQNAgent
from lib._class.agent.D2QNAgent import D2QNAgent
from lib._class.agent.D3QNAgent import D3QNAgent

In [None]:
# Change tensorflow default behavior (where it uses all of the memory at the outset)
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

# Plotly
import plotly.express as px
import plotly.graph_objects as go

# Time measurement
import time
from datetime import timedelta

# Sound notification
import winsound

import numpy as np
import pandas as pd

from IPython.display import clear_output

# Profiling
%load_ext line_profiler

# Useful Functions

In [None]:
SOURCE_PATH_DATA = 'resources/output/eda/file/'
OUT_PATH_IMAGE   = 'resources/output/RL/image/'
OUT_PATH_GRAPH = 'resources/output/RL/graph/'
OUT_PATH_FILE = 'resources/output/RL/file/'

def time_taken(seconds):
    print(f'\nTime Taken: {str(timedelta(seconds=seconds))}')
    winsound.Beep(frequency=1000, duration=100)
    winsound.Beep(frequency=1500, duration=50)

# Initialize Environment

In [None]:
currency_pair = 'EURUSD'
filename      = f'DAT_ASCII_{currency_pair}_Day_Feature.csv'
# filename      = f'DAT_ASCII_{currency_pair}_Hour_Feature.csv'
# filename      = f'DAT_ASCII_{currency_pair}_Min_Feature.csv'

env = ForexEnv(SOURCE_PATH_DATA, filename, train_size=.5)

### Chart: Environment

In [None]:
EXEC_START = time.time()


data = []
price_types = ['bid', 'ask']
for price_index, prices in enumerate([env.bids, env.asks]):
    data.append(go.Scattergl(
        x = pd.DataFrame(env.datetimes)[0],
        y = prices,
        mode = 'lines',
        name = price_types[price_index].title()
    ))

title = f'{currency_pair} - Forex Environment ({len(env.datetimes) :,} Timesteps)'
vp.plot_graph(data, title, 'Date Time', 'Price', out_path=OUT_PATH_GRAPH)


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

# Training

In [None]:
def train(episodes, agent_type):
    try:
        # Training parameters
        pretrain_size = 2_500
        sample_size   = 1_000
        memory_size   = 10_000
        neurons       = [32, 64, 128]

        # Agent
        if agent_type == 'Normal':
            agent = Agent(env)

        # Off-Policy agent
        elif agent_type == 'Q-Learning':
            agent = QLearningAgent(env)

        elif agent_type == 'Q-Network':
            agent = QNetworkAgent(env, pretrain_size=pretrain_size, sample_size=sample_size, memory_size=memory_size)
            agent.main_model.model_diagram(OUT_PATH_IMAGE, agent_type)

        elif agent_type == 'DQN':
            agent = DQNAgent(env, pretrain_size=pretrain_size, sample_size=sample_size, memory_size=memory_size, neurons=neurons)
            agent.main_model.model_diagram(OUT_PATH_IMAGE, agent_type)

        elif agent_type == 'D2QN':
            agent = D2QNAgent(env, pretrain_size=pretrain_size, sample_size=sample_size, memory_size=memory_size, neurons=neurons)
            agent.main_model.model_diagram(OUT_PATH_IMAGE, agent_type)
            
        elif agent_type == 'D3QN':
            agent = D3QNAgent(env, pretrain_size=pretrain_size, sample_size=sample_size, memory_size=memory_size, neurons=neurons)
            agent.main_model.model_diagram(OUT_PATH_IMAGE, agent_type)

        # On-Policy agent
        elif agent_type == 'SARSA':
            agent = SarsaAgent(env)

        elif agent_type == 'SARSA Lambda':
            agent = SarsaLambdaAgent(env, episodic_trace=False)


        # Performance tracking
        result_dict = {
            'total_reward': [],
            'max_reward':   [],
            'min_reward':   [],
            'trades':       [],
            'step_count':   [],
            'memory_count': []
        }

        # Training iteration
        learn_ep = 0
        for episode in range(episodes):
            # Performance tracking
            total_timestep = 0
            total_reward   = 0
            min_reward     = 0
            max_reward     = 0
            
            # Reward function
            avg_rewards = []
            
            # Walkthrough environment
            done  = False
            state = env.reset()
            
             # Choose action
            norm_state = env.normalize_state(state)
            action     = agent.choose_action(norm_state)

            while not done:
                # Take action
                next_state, reward, done, info_dict = env.step(action)
                trade_done       = info_dict['trade_done']
                trade_next_state = info_dict['trade_next_state']
                
                # Performance tracking
                total_reward   += reward
                total_timestep += 1
                min_reward     = reward if reward < min_reward else min_reward
                max_reward     = reward if reward > max_reward else max_reward
                
                # Reward engineering
                entry_action = next_state[1]
                reward       = info_dict['roi'] if trade_done else info_dict['float_roi'] if info_dict['have_open'] else 0
                
                if entry_action != -1:
                    avg_rewards.append(reward)
                    reward = np.mean(avg_rewards)
                    reward = round(reward, 5)
                
                if trade_done:
                    avg_rewards = []
                
                # Normalize reward
                reward = env.normalize_reward(reward)
                
                # Choose next action (based on trade state)
                norm_next_state = env.normalize_state(trade_next_state)
                next_action     = agent.choose_action(norm_next_state)

                # Learning
                norm_state = env.normalize_state(state)
                experience = (norm_state, action, reward, norm_next_state, trade_done)
                learned    = agent.learn(experience, next_action, learn_ep)
                
                # Choose next action (based on actual state)
                norm_next_state = env.normalize_state(next_state)
                next_action     = agent.choose_action(norm_next_state)

                state  = next_state
                action = next_action
                
            # Increase learned episode
            learn_ep += int(learned)

            # Result summary
            result_dict['total_reward'].append(total_reward)
            result_dict['max_reward'].append(max_reward)
            result_dict['min_reward'].append(min_reward)
            result_dict['trades'].append(env.trade_dict)
            result_dict['trades'][-1]['episode'] = episode +1
            result_dict['step_count'].append(total_timestep)
            try:
                memory_count = agent.memory.counter
            except:
                memory_count = -1
            result_dict['memory_count'].append(memory_count)

            # Progress
            # clear_output(wait=True)
            ε = agent.hyperparams_dict['epsilon']['value']
            
            print(f'EP: {episode +1 :,} ({learn_ep :,}) | ε: {ε :.3f} | SUM(R): {total_reward :>7,.1f} | MAX(R): {max_reward :>7,.1f} | MIN(R): {min_reward :>7,.1f} | SUM(T): {total_timestep :>5} | M: {memory_count :,}')
        return result_dict, agent
    
    except KeyboardInterrupt:
        print('\n!!! KeyboardInterrupt Exception !!!')
        return result_dict, agent

In [None]:
EXEC_START = time.time()


# agent_type = 'Normal'
# agent_type = 'Q-Learning'
# agent_type = 'SARSA'
# agent_type = 'SARSA Lambda'
# agent_type = 'Q-Network'
agent_type = 'DQN'
# agent_type = 'D2QN'
# agent_type = 'D3QN'

episodes = 20_000

# FOR PROFILING PURPOSE
# %lprun -f train \
result_dict, agent = train(episodes, agent_type)


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

### Save Model

In [None]:
agent.save_model_checkpoint(OUT_PATH_FILE)

# Training Result

In [None]:
EXEC_START = time.time()


result_df = pd.DataFrame({
    'total_reward': result_dict['total_reward'],
    'max_reward':   result_dict['max_reward'],
    'min_reward':   result_dict['min_reward'],
    'memory_count': result_dict['memory_count'],
    'step_count':   result_dict['step_count']
})
result_df['episode'] = [x+1 for x in range(len(result_df))]
trade_df = pd.concat([pd.DataFrame(x) for x in result_dict['trades']], ignore_index=True)

# Profits
profit_df = trade_df.groupby('episode').agg({
    'profits': 'sum',
    'pip_change': 'sum'
}).reset_index()
result_df = result_df.merge(profit_df, on='episode', how='left')
result_df['profits'] = result_df['profits'].fillna(0)
result_df['pip_change'] = result_df['pip_change'].fillna(0)

# Export trades
fp.generate_csv(trade_df, out_path=OUT_PATH_FILE, out_filename=f'{agent_type}_trades.csv', export_index=False)


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

### Chart: Rolling Reward

In [None]:
EXEC_START = time.time()


data = []
for reward_type in ['total_reward', 'max_reward', 'min_reward']:
    data.append(go.Scattergl(
        x = [x+1 for x in range(episodes)],
        y = result_df.rolling(1_000).mean()[reward_type],
        mode = 'lines',
        name = f'Rolling {reward_type.title()}'
    ))

title = f'{currency_pair} - Rolling Reward - {agent_type}'
vp.plot_graph(data, title, 'Episode', 'Value', out_path=OUT_PATH_GRAPH)


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

### Chart: Rolling Profit

In [None]:
EXEC_START = time.time()


data = []

for earn_type in ['profits', 'pip_change']:
    data.append(go.Scattergl(
        x = [x+1 for x in range(episodes)],
        y = result_df.rolling(1_000).mean()[earn_type],
        mode = 'lines',
        name = f'Rolling {earn_type.title()}'
    ))

title = f'{currency_pair} - Rolling Profit - {agent_type}'
vp.plot_graph(data, title, 'Episode', 'Amount', out_path=OUT_PATH_GRAPH)


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

### Chart: Memory Collected

In [None]:
EXEC_START = time.time()


data = []
data.append(go.Scattergl(
    x = [x+1 for x in range(episodes)],
    y = result_df['memory_count'],
    mode = 'lines',
    name = f'Memory Collected'
))

title = f'{currency_pair} - Memory Collected - {agent_type}'
vp.plot_graph(data, title, 'Episode', 'Count', out_path=OUT_PATH_GRAPH)


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

### Chart: Rolling Step Taken

In [None]:
EXEC_START = time.time()


data = []
data.append(go.Scattergl(
    x = [x+1 for x in range(episodes)],
    y = result_df.rolling(1_000).mean()['step_count'],
    mode = 'lines',
    name = f'Rolling Step Taken'
))

title = f'{currency_pair} - Rolling Step Taken - {agent_type}'
vp.plot_graph(data, title, 'Episode', 'Count', out_path=OUT_PATH_GRAPH)


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

### Chart: Action Profit-Loss

In [None]:
EXEC_START = time.time()


closed_df = trade_df[trade_df['status'].isin([
    env.constant_values()['TRADE_STATUS']['CLOSE_TRADE'],
    env.constant_values()['TRADE_STATUS']['MARGIN_CALL']]
)]
profit_df = closed_df.groupby(['episode', 'action', 'status']).agg({
    'profits': 'sum'
}).reset_index()


data = []
labels = ['Sell Entry', 'Buy Entry']
for index, action in enumerate([env.constant_values()['TRADE_ACTION']['BUY'], env.constant_values()['TRADE_ACTION']['SELL']]):
    for status in [env.constant_values()['TRADE_STATUS']['CLOSE_TRADE'], env.constant_values()['TRADE_STATUS']['MARGIN_CALL']]:
        pnl_df = profit_df[(profit_df['action'] == action) & (profit_df['status'] == status)]
        data.append(go.Scattergl(
            x = pnl_df['episode'],
            y = pnl_df['profits'],
            mode = 'lines',
            name = f'{labels[index]} - {status} ({len(pnl_df) :,})'
        ))

title = f'{currency_pair} - Action Profit-Loss - {agent_type}'
vp.plot_graph(data, title, 'Episode', 'Profit-Loss', out_path=OUT_PATH_GRAPH)


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

# Testing

In [None]:
# TODO
# agent.load_model_checkpoint(OUT_PATH_FILE)

### Chart: Trade

In [None]:
EXEC_START = time.time()


data = []
price_types = ['bid', 'ask']
for price_index, prices in enumerate([env.bids, env.asks]):
    data.append(go.Scattergl(
        x = pd.DataFrame(env.datetimes)[0],
        y = prices,
        mode = 'lines',
        name = price_types[price_index].title(),
        
        # Additional settings
        hoverinfo='skip'
    ))

markers = ['triangle-up', 'triangle-down']
trade_actions = ['buy', 'sell']
for trade_index, trade_action in enumerate(trade_actions):
    action_df = trade_df[trade_df['action'] == trade_index]
    
    data.append(go.Scattergl(
        x = action_df['datetime'],
        y = action_df['price'],
        mode = 'markers',
        name = trade_action.title(),
        
        # Additional settings
        marker = dict(
            size=15,
            symbol=markers[trade_index]
        ),
        hovertext=[f'Date Time: {row.datetime}<br />Action Index: {row.Index}<br />{"Open" if row.Index % 2 == 0 else "Closed"} at {row.price}<br />Profit: {row.profits}'
                   for row in action_df.itertuples()],
        hoverinfo='text'
    ))

title = f'{currency_pair} - Trade - {agent_type}'
vp.plot_graph(data, title, 'Date Time', 'Price', out_path=OUT_PATH_GRAPH)


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

### Chart: Trade Profits

In [None]:
EXEC_START = time.time()


y = trade_df['profits']

data = []
data.append(go.Scattergl(
    x = [x for x in range(len(trade_df))],
    y = y,
    mode = 'lines',
    name = f'Profits ({sum(y) :,.2f})'
))

title = f'{currency_pair} - Trade Profits - {agent_type}'
vp.plot_graph(data, title, '', 'Amount', out_path=OUT_PATH_GRAPH)


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)