In [None]:
import pandas as pd

# Raise warning as error to capture floating-point errors during computation:
# https://stackoverflow.com/questions/34955158/what-might-be-the-cause-of-invalid-value-encountered-in-less-equal-in-numpy/34955622
# https://www.soa.org/news-and-publications/newsletters/compact/2014/may/com-2014-iss51/losing-my-precision-tips-for-handling-tricky-floating-point-arithmetic/
import numpy as np
np.seterr(all='raise')

# Plotly
from plotly.offline import iplot, plot, init_notebook_mode
import plotly.graph_objects as go
init_notebook_mode(connected=True)

# Time measurement
import time
from datetime import timedelta

import os
import random
from IPython.display import clear_output

# Profiling
%load_ext line_profiler

# Useful Functions

In [None]:
SOURCE_PATH_DATA = 'resources/data/'
OUT_PATH_GRAPH = 'resources/output/graph/'
OUT_PATH_FILE = 'resources/output/file/'

def create_directory(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

def plot_graph(data, title, xlabel=None, ylabel=None, generate_file=True):
    layout = go.Layout(
        title = title,
        xaxis = dict(
            title=xlabel,
            gridcolor='rgb(159, 197, 232)'
        ),
        yaxis = dict(
            title=ylabel,
            gridcolor='rgb(159, 197, 232)'
        ),
        hovermode='x',
        showlegend=True,
        legend_orientation='h',
        plot_bgcolor='rgba(0, 0, 0, 0)'
    )

    fig = go.Figure(data=data, layout=layout)
    fig.update_yaxes(hoverformat=".5f")

    if generate_file:
        generate_plot(fig, f'{OUT_PATH_GRAPH}', f'{title}.html')
    else:
        generate_plot(fig)

def generate_plot(fig, out_path=None, out_filename=None):
    if out_path is None:
        iplot(fig)
    else:
        create_directory(out_path)
        out_file = f'{out_path}{out_filename}'
        plot(fig, filename=out_file, auto_open=False)
        
        print(f'Generated: {out_file}')

def generate_csv(df, out_path, out_filename, export_index=None):
    create_directory(out_path)
    out_file = f'{out_path}{out_filename}'
    df.to_csv(out_file, sep=';', index=export_index, header=True)
    
    print(f'Generated: {out_file}')

def time_taken(seconds):
    print(f'\nTime Taken: {str(timedelta(seconds=seconds))}')

# Environment

In [None]:
class Environment:
    def __init__(self, source_path, filename, nrows=None, train_size=.7, train=True):
        source_path = SOURCE_PATH_DATA
        filename    = f'DAT_ASCII_{currency_pair}_T_201901.csv'
        self.__train_test_split(source_path, filename, nrows=nrows, train_size=train_size, train=train)
        
    def __train_test_split(self, source_path, filename, chunk_size=50_000, nrows=None, train_size=.7, train=True):
        source_file = f'{source_path}{filename}'
        df_chunks = pd.read_csv(source_file, sep=',',
                                header=None, names=['datetime', 'bid', 'ask', 'vol'],
                                usecols=['datetime', 'bid', 'ask'],
                                parse_dates=['datetime'],
                                date_parser=lambda x: pd.to_datetime(x, format="%Y%m%d %H%M%S%f"),
                                chunksize=chunk_size, nrows=nrows)
        timeseries_df = pd.concat(df_chunks)
        
        row_count  = len(timeseries_df) if nrows is None else nrows
        split_size = round(row_count * train_size)
        
        if train:
            timeseries_df = timeseries_df[:split_size].reset_index().drop(columns=['index'])
        else:
            timeseries_df = timeseries_df[split_size:].reset_index().drop(columns=['index'])
        
        self.indexes   = timeseries_df.index.values
        self.datetimes = timeseries_df['datetime'].values
        self.bids      = timeseries_df['bid'].values
        self.asks      = timeseries_df['ask'].values
        
    def constant_values(self):
        return {
            'TRADE_STATUS': {
                'OPEN': 'OPEN',
                'CLOSE': 'CLOSED',
                'CLOSE_TRADE': 'CLOSE_TRADE'
            },
            'TRADE_ACTION': {
                'BUY': 0,
                'SELL': 1,
                'HOLD': 2
            }
        }
        
    def state_space(self):
        return np.array(['entry_action', 'bid_fluctuation_pct', 'ask_fluctuation_pct'])
        
    def state_size(self):
        return len(self.state_space())
        
    def action_space(self):
        const_action_dict = self.constant_values()['TRADE_ACTION']
        return [const_action_dict['BUY'], const_action_dict['SELL'], const_action_dict['HOLD']]
        
    def action_size(self):
        return len(self.action_space())
        
    def available_actions(self):
        const_status_dict = self.constant_values()['TRADE_STATUS']
        actions = self.action_space()
        
        # Have open trades
        trade_dict = self.trading_params_dict['trade_dict']
        if const_status_dict['OPEN'] in trade_dict['status']:
            open_index  = trade_dict['status'].index(const_status_dict['OPEN'])
            open_action = trade_dict['action'][open_index]

            # Ensure agent is able to have only 1 open trade while trading
            actions.remove(open_action)
        return actions
    
    def __price_by_action(self, action, bid, ask, closed_trade):
        const_action_dict = self.constant_values()['TRADE_ACTION']
        
        # Close trade by Selling at Ask price, and Buying at Bid price
        if closed_trade:
            return bid if action == const_action_dict['BUY'] else ask
        
        # Open trade by Buying at Ask price, and Selling at Bid price
        else:
            return ask if action == const_action_dict['BUY'] else bid
    
    def __profit_by_action(self, entry_action, entry_price, curr_bid, curr_ask):
        const_action_dict = self.constant_values()['TRADE_ACTION']
        if entry_action == const_action_dict['BUY']:
            return curr_ask - entry_price
        
        elif entry_action == const_action_dict['SELL']:
            return entry_price - curr_bid
        return 0
    
    def update_timestep(self, index):
        try:
            self.timestep = {
                'index': self.indexes[index],
                'datetime': self.datetimes[index],
                'bid': self.bids[index],
                'ask': self.asks[index]
            }
            return False
        
        except:
            self.timestep = {}
            return True
    
    def reset(self):
        # State
        self.default_entry_action        = -1 # np.inf # None
        self.default_bid_fluctuation_pct = -0 # np.inf # None
        self.default_ask_fluctuation_pct = -0 # np.inf # None
        
        entry_action        = self.default_entry_action
        bid_fluctuation_pct = self.default_bid_fluctuation_pct
        ask_fluctuation_pct = self.default_ask_fluctuation_pct
        self.observe_bid    = None
        self.observe_ask    = None
        self.state = np.array([entry_action, bid_fluctuation_pct, ask_fluctuation_pct])
        
        # Timestep
        index = 0
        self.update_timestep(index)
        
        # Trading
        self.trading_params_dict = {
            'orig_bal': 100_000.,
            'acct_bal': 100_000.,
            'unit':     100_000.,
            
            'trade_dict': {
                'action':   [],
                'datetime': [],
                'price':    [],
                'status':   [],
                'profits':  [],
                'acct_bal': []
            }
        }
        return self.state
    
    def step(self, action):
        const_action_dict = self.constant_values()['TRADE_ACTION']
        const_status_dict = self.constant_values()['TRADE_STATUS']
        
        self.observe_bid = self.timestep['bid'] if self.observe_bid is None else self.observe_bid
        self.observe_ask = self.timestep['ask'] if self.observe_ask is None else self.observe_ask
        
        trade_dict = self.trading_params_dict['trade_dict']
        
        # Get entry action & price
        # - if there's no entry action, treat current action as action to open a trade
        # - if there's entry action, treat current action as action to close a trade
        try:
            # NOTE: not to use pd.DataFrame() to convert trade_dict to dataframe, as it is slower
            open_index      = trade_dict['status'].index(const_status_dict['OPEN'])
            trade_actions   = trade_dict['action'][open_index:]
            trade_prices    = trade_dict['price'][open_index:]
            trade_datetimes = trade_dict['datetime'][open_index:]
            
            entry_action = trade_actions[0]
            
            # Not allowed to close open trades with same entry action
            if entry_action == action:
                trade_actions  = []
                trade_prices   = []
                trade_datetime = []
            
        except:
            trade_actions  = []
            trade_prices   = []
            trade_datetime = []

            entry_action = self.default_entry_action
        
        
        profit = 0
        closed_trade = False
        sufficient_margin = True
        if action in [const_action_dict['BUY'], const_action_dict['SELL']]:
            # Close open trades
            for trade_index, trade_price in enumerate(trade_prices):
                profit += self.__profit_by_action(entry_action, trade_price, self.timestep['bid'], self.timestep['ask'])
                profit *= self.trading_params_dict['unit']
                profit = round(profit, 5)
                
                trade_dict['status'][trade_dict['datetime'].index(trade_datetimes[trade_index])] = const_status_dict['CLOSE']
                closed_trade = True

            # Add trade transaction
            self.trading_params_dict['acct_bal'] += profit
            price = self.__price_by_action(action, self.timestep['bid'], self.timestep['ask'], closed_trade)

            # Add back free margin upon close trade
            if closed_trade:
                self.trading_params_dict['acct_bal'] += (sum(trade_prices) * self.trading_params_dict['unit'])
                
            # Deduct required margin upon opening trade
            else:
                required_margin = price * self.trading_params_dict['unit']
                if self.trading_params_dict['acct_bal'] >= required_margin:
                    self.trading_params_dict['acct_bal'] -= required_margin
                else:
                    sufficient_margin = False
            
            
            trade_dict['action'].append(action)
            trade_dict['datetime'].append(self.timestep['index'])
            trade_dict['price'].append(price)
            trade_dict['status'].append(const_status_dict['CLOSE_TRADE'] if closed_trade else const_status_dict['OPEN'])
            trade_dict['profits'].append(profit)
            trade_dict['acct_bal'].append(round(self.trading_params_dict['acct_bal'], 5))
        
        
        # Calculate floating P/L
#         float_profit = 0
#         if (entry_action != self.default_entry_action) & (not closed_trade):
#             for trade_index, trade_price in enumerate(trade_prices):
#                 float_profit = self.__profit_by_action(entry_action, trade_price, self.timestep['bid'], self.timestep['ask'])
#                 float_profit *= self.trading_params_dict['unit']
#                 float_profit = round(float_profit, 5)
        
        # Calculate equity %
#         equity_pct = (self.trading_params_dict['acct_bal'] + float_profit) / self.trading_params_dict['orig_bal'] * 100
#         equity_pct = round(equity_pct, 10)
        
        # Observe the price at current timestemp if open or closed trades, else observe the entry price
        self.observe_bid = self.timestep['bid'] if closed_trade else self.observe_bid if action == const_action_dict['HOLD'] else self.timestep['bid']
        self.observe_ask = self.timestep['ask'] if closed_trade else self.observe_ask if action == const_action_dict['HOLD'] else self.timestep['ask']
        
        # Calculate fluctuation %
        bid_fluctuation_pct = self.timestep['bid'] - self.observe_bid / self.observe_bid * 100
        ask_fluctuation_pct = self.timestep['ask'] - self.observe_ask / self.observe_ask * 100
        #bid_fluctuation_pct = (self.timestep['bid'] - self.observe_bid) / self.observe_bid * 100
        #ask_fluctuation_pct = (self.timestep['ask'] - self.observe_ask) / self.observe_ask * 100
        
        
        # State
        state_entry_action = self.default_entry_action if closed_trade else self.state[0] if action == const_action_dict['HOLD'] else action
        next_state = np.array([state_entry_action, bid_fluctuation_pct, ask_fluctuation_pct])
        
        # Reward
        reward = profit
        
        done = self.update_timestep(self.timestep['index'] +1)
        if not done:
            # Stop trading if do not have balance, and there's no open trade
            if (self.trading_params_dict['acct_bal'] <= 0) & (const_status_dict['OPEN'] not in trade_dict['status']):
                done = True
                
            # Stop trading if do not have enough balance to pay for required margin
            elif not sufficient_margin:
                done = True
        
        # Additional information
        info_dict = {
            'closed_trade': closed_trade
        }
        return (next_state, reward, done, info_dict)

# Agent

In [None]:
class Agent:
    def __init__(self, env):
        self.env = env
        self.action_space = [x for x in range(self.env.action_size())]
        
    def choose_action(self, state):
        return random.choice(self.action_space)

## Off-Policy Agent

### Q-Matrix

### Q-Network

### Deep Q-Network (DQN)

### Double DQN

### Dueling Double DQN

## On-Policy Agent

### SARSA

### SARSA (λ)

# Initialize Environment

In [None]:
currency_pair = 'EURUSD'
filename      = f'DAT_ASCII_{currency_pair}_T_201901.csv'

env = Environment(SOURCE_PATH_DATA, filename, nrows=200, train_size=.5)

# Training

In [None]:
EXEC_START = time.time()


# Initialize agent
agent = Agent(env)

# Performance tracking
result_dict = {
    'total_profit': [],
    'used_margin': [],
    'acct_bal': [],
    'trades': []
}

# Training iteration
episodes = 100
for episode in range(episodes):
    # Walkthrough environment
    done = False
    state = env.reset()

    while not done:
        action = agent.choose_action(state)
        next_state, reward, done, info_dict = env.step(action)

        if done:
            break

        state  = next_state
    
    # Result Summary
    trade_df      = pd.DataFrame(env.trading_params_dict['trade_dict'])
    total_profits = sum(trade_df['profits'])

    open_trade_df = trade_df[trade_df['status'].isin([env.constant_values()['TRADE_STATUS']['OPEN']])]
    used_margin   = sum(open_trade_df['price'] * env.trading_params_dict['unit'])

    result_dict['total_profit'].append(round(total_profits, 5))
    result_dict['used_margin'].append(round(used_margin, 5))
    result_dict['acct_bal'].append(round(env.trading_params_dict['acct_bal'], 5))
    result_dict['trades'].append(env.trading_params_dict['trade_dict'])
    
    # Progress
    clear_output(wait=True)
    print(f'EP: {episode+1 :,} / {episodes :,} | R: {total_profits :,.2f}')

EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

### Chart: Training Equity

In [None]:
EXEC_START = time.time()


data = []
data.append(go.Scattergl(
    x = [x+1 for x in range(episodes)],
    y = np.array(result_dict['acct_bal']) + np.array(result_dict['used_margin']),
    mode = 'lines',
    name = 'Equity'
))

title = f'{currency_pair} - Training Equity'
plot_graph(data, title, 'Episode', 'Equity')


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

### Chart: Training Profits

In [None]:
EXEC_START = time.time()

y = trade_df['profits']

data = []
data.append(go.Scattergl(
    x = [x for x in range(len(trade_df))],
    y = y,
    mode = 'lines',
    name = f'Profits ({sum(y) :,.2f})'
))

title = f'{currency_pair} - Training Profits'
plot_graph(data, title, '', 'Amount')


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)

### Chart: Training Trades

In [None]:
EXEC_START = time.time()


data = []
price_types = ['bid', 'ask']
for price_index, prices in enumerate([env.bids, env.asks]):
    data.append(go.Scattergl(
        x = env.indexes,
        y = prices,
        mode = 'lines',
        name = price_types[price_index].title(),
        
        # Additional settings
        hoverinfo='skip'
    ))

markers = ['triangle-up', 'triangle-down']
trade_actions = ['buy', 'sell']
for trade_index, trade_action in enumerate(trade_actions):
    action_df = trade_df[trade_df['action'] == trade_index]
    
    data.append(go.Scattergl(
        x = action_df['datetime'],
        y = action_df['price'],
        mode = 'markers',
        name = trade_action.title(),
        
        # Additional settings
        marker = dict(
            size=15,
            symbol=markers[trade_index]
        ),
        hovertext=[f'Date Time: {row.datetime}<br />Action Index: {row.Index}<br />{"Open" if row.Index % 2 == 0 else "Closed"} at {row.price}<br />Profit: {row.profits}'
                   for row in action_df.itertuples()],
        hoverinfo='text'
    ))

title = f'{currency_pair} - Training Trades'
plot_graph(data, title, 'Date Time', 'Price')


EXEC_END = time.time()
time_taken(EXEC_END - EXEC_START)