# Import Libraries

In [None]:
import numpy as np 
import pandas as pd

In [None]:
import chainer
import chainer.functions as F
import chainer.links as L

In [None]:
from plotly import tools
from plotly.graph_objs import *
from plotly.offline import init_notebook_mode, iplot, iplot_mpl, plot

# Test if running in Jupyter Notebook

In [None]:
def is_notebook():
    try:
        shell = get_ipython().__class__.__name__
        if shell == 'ZMQInteractiveShell':
            return True   # Jupyter notebook or qtconsole
        elif shell == 'TerminalInteractiveShell':
            return False  # Terminal running IPython
        else:
            return False  # Other type (?)
    except NameError:
        return False      # Probably standard Python interpreter

# Environment Class

In [None]:
# Action Constants
act_dict = {0:"HOLD", 1:"BUY", 2:"SELL"}

In [None]:
class Environment1:
    
    def __init__(self, data, tnx_cost=0, funding_cost=0, clip_rewards=False, debug=False, history_t=90):
        self.data = data
        self.history_t = history_t
        self.tnx_cost = tnx_cost
        self.funding_cost = funding_cost
        self.clip_rewards = clip_rewards
        self.debug = debug
        self.reset()
        
    def reset(self):
        self.t = 0
        self.done = False
        self.profits = 0
        self.positions = []
        self.position_value = 0
        self.invested_capital = 0
        self.history = [0 for _ in range(self.history_t)]
        return [self.position_value] + self.history
    
    def step(self, act):
        
        if self.debug: print(f"\n\nStep:{self.t} Action: {act_dict[act]}")
        
        self.t += 1  
        step_reward = 0
        
        prev_price = self.data.iloc[(self.t-1), :]['Close']
        curr_price = self.data.iloc[self.t, :]['Close']
        
        # act = 0: stay, 1: buy, 2: sell
        if act_dict[act] == 'BUY':
            self.position_value = 0
            
            for trade_price in self.positions:
                self.position_value += (curr_price - trade_price)
            
            self.positions.append(curr_price)
            self.invested_capital += curr_price
            if self.debug: print(f"{act_dict[act]} @ {curr_price} New Position: {self.positions}")

            step_reward = curr_price * self.tnx_cost  # adding tnx cost to discourage unecessary frequent buying  
            
        elif act_dict[act] == 'SELL': # sell
            self.position_value = 0

            if len(self.positions) == 0:
                step_reward = 0   # if no position then no reward, tnx cost nor funding cost
                if self.debug: print(f"Action:{act_dict[act]} No open position. Reward:{step_reward}")
            
            else:
                step_profits = 0
                
                for trade_price in self.positions:
                    trade_profit = (curr_price - trade_price)
                    step_profits += trade_profit
                    if self.debug: print(f"{act_dict[act]} @ {curr_price} for purchase @ {trade_price}. Profit: {trade_profit}. Cumm step profit: {step_profits}")
                
                step_reward = step_profits
                self.profits += step_profits
                if self.debug: print(f"Step Profit = Step Reward:{step_reward} Cummulative Strategy Profits:{self.profits}")
                
                self.positions = []
                self.invested_capital = 0
                
        elif act_dict[act] == 'HOLD':
            self.position_value = 0
            
            for trade_price in self.positions:
                self.position_value += (curr_price - trade_price)
            
            step_reward = self.invested_capital * self.funding_cost ### only if there is an open position value
            if self.debug: print(f"{act_dict[act]} Curr Position:{self.positions} InvestedCap:{self.invested_capital} MTM:{self.position_value} Reward:{step_reward}")
            
        else: 
            print(f"Invalid Action:{act}:{act_dict[act]}")
        
               
        # Generate the returns history to be used by DQN for training & prediction.
        self.history.pop(0)
        self.history.append(curr_price - prev_price)
               
        reward = step_reward
        if self.clip_rewards: 
            if   step_reward > 0: reward = 1
            elif step_reward < 0: reward = -1 
        
        if self.debug: print(f"Raw Reward:{step_reward}  Actual Reward:{reward}")  # this is the reward at current step 
        
        return [self.position_value] + self.history , reward, self.done

# Q Network Class

In [None]:
class Q_Network(chainer.Chain):

    def __init__(self, input_size=91, hidden_size=100, output_size=3):
        super(Q_Network, self).__init__(
            fc1 = L.Linear(input_size, hidden_size),
            fc2 = L.Linear(hidden_size, hidden_size),
            fc3 = L.Linear(hidden_size, output_size)
        )

    def __call__(self, x):
        h = F.relu(self.fc1(x))
        h = F.relu(self.fc2(h))
        y = self.fc3(h)
        return y

    def reset(self):
        self.zerograds()

# Candlestick plot of train and test datasets

In [None]:
def plot_train_test(train=[], test=[], date_split=''):
    
    data = []
    if len(train) > 0:
        data += [Candlestick(x=train.index, open=train['Open'], high=train['High'], low=train['Low'], close=train['Close'], \
                    name='train')]
    if len(test) > 0:
        data += [Candlestick(x=test.index, open=test['Open'], high=test['High'], low=test['Low'], close=test['Close'], \
                    name='test')]
    
    if date_split != '':
        layout = {
             'shapes': [
                 {'x0': date_split, 'x1': date_split, 'y0': 0, 'y1': 1, 'xref': 'x', 'yref': 'paper', \
                  'line': {'color': 'rgb(0,0,0)', 'width': 1}}
             ],
            'annotations': [
                {'x': date_split, 'y': 1.0, 'xref': 'x', 'yref': 'paper', 'showarrow': False, 'xanchor': 'left', \
                 'text': ' test data'},
                {'x': date_split, 'y': 1.0, 'xref': 'x', 'yref': 'paper', 'showarrow': False, 'xanchor': 'right', \
                 'text': 'train data '}
            ]
        }
        figure = Figure(data=data, layout=layout)
    else:
        figure = Figure(data=data)
    
    if is_notebook():
        iplot(figure)
    else:
        plot(figure)

# Applying of Strategy on Train & Test Dataset

In [None]:
def apply_trained_model(Policy_Network, train_env=Environment1([]), test_env=Environment1([])):
    
    # train
    train_actions = []
    train_rewards = []
    train_invested_capital = []
    train_position_value = []
    train_profits = []
    if len(train_env.data) != 0:
        state = train_env.reset()

        for _ in range(len(train_env.data)-1):

            action_value = Policy_Network(np.array(state, dtype=np.float32).reshape(1, -1))

            # if there are no positions to sell and highest action value is "Sell", choose second best action
            if len(train_env.positions) == 0 and np.argmax(action_value.data) == 2:
                action_value.data[0][np.argmax(action_value.data)] = np.min(action_value.data)

            action = np.argmax(action_value.data)
            next_state, reward, done = train_env.step(action)

            train_actions.append(action)
            train_rewards.append(reward)
            train_invested_capital.append(train_env.invested_capital)
            train_position_value.append(train_env.position_value)
            train_profits.append(train_env.profits)

            state = next_state
    
    # test
    test_actions = []
    test_rewards = []
    test_invested_capital = []
    test_position_value = []
    test_profits = []
    if len(test_env.data) != 0:
        state = test_env.reset()

        for _ in range(len(test_env.data)-1):

            action_value = Policy_Network(np.array(state, dtype=np.float32).reshape(1, -1))

            # if there are no positions to sell and highest action value is "Sell", choose second best action
            if len(test_env.positions) == 0 and np.argmax(action_value.data) == 2:
                action_value.data[0][np.argmax(action_value.data)] = np.min(action_value.data)

            action = np.argmax(action_value.data)
            next_state, reward, done = test_env.step(action)

            test_actions.append(action)
            test_rewards.append(reward)
            test_invested_capital.append(test_env.invested_capital)
            test_position_value.append(test_env.position_value)
            test_profits.append(test_env.profits)

            state = next_state

    return train_env, test_env, \
           train_actions, train_rewards, train_invested_capital, train_position_value, train_profits, \
           test_actions, test_rewards, test_invested_capital, test_position_value, test_profits

# Plot of the actions on candlestick price movements 

In [None]:
def plot_train_test_by_q(algorithm_name, 
                         train_env=Environment1([]), 
                         test_env=Environment1([]), 
                         date_split='',
                         train_actions=[],
                         train_rewards=[],
                         train_position_value=[],
                         train_profits=[],
                         test_actions=[],
                         test_rewards=[],
                         test_position_value=[],
                         test_profits=[]):

    data = []
    act_color0, act_color1, act_color2 = 'gray', 'cyan', 'magenta'
    
    if len(train_env.data) != 0:
        train_copy = train_env.data.copy()
        train_copy['action'] = train_actions + [np.nan]
        train_copy['reward'] = train_rewards + [np.nan]
        train0 = train_copy[train_copy['action'] == 0]
        train1 = train_copy[train_copy['action'] == 1]
        train2 = train_copy[train_copy['action'] == 2]

        data += [
            Candlestick(x=train0.index, open=train0['Open'], high=train0['High'], low=train0['Low'], close=train0['Close'], \
                        increasing=dict(line=dict(color=act_color0)), decreasing=dict(line=dict(color=act_color0))),
            Candlestick(x=train1.index, open=train1['Open'], high=train1['High'], low=train1['Low'], close=train1['Close'], \
                        increasing=dict(line=dict(color=act_color1)), decreasing=dict(line=dict(color=act_color1))),
            Candlestick(x=train2.index, open=train2['Open'], high=train2['High'], low=train2['Low'], close=train2['Close'], \
                        increasing=dict(line=dict(color=act_color2)), decreasing=dict(line=dict(color=act_color2)))
        ]
    
    if len(test_env.data) != 0:
        test_copy = test_env.data.copy()
        test_copy['action'] = test_actions + [np.nan]
        test_copy['reward'] = test_rewards + [np.nan]
        test0 = test_copy[test_copy['action'] == 0]
        test1 = test_copy[test_copy['action'] == 1]
        test2 = test_copy[test_copy['action'] == 2]

        data += [
            Candlestick(x=test0.index, open=test0['Open'], high=test0['High'], low=test0['Low'], close=test0['Close'], \
                        increasing=dict(line=dict(color=act_color0)), decreasing=dict(line=dict(color=act_color0))),
            Candlestick(x=test1.index, open=test1['Open'], high=test1['High'], low=test1['Low'], close=test1['Close'], \
                        increasing=dict(line=dict(color=act_color1)), decreasing=dict(line=dict(color=act_color1))),
            Candlestick(x=test2.index, open=test2['Open'], high=test2['High'], low=test2['Low'], close=test2['Close'], \
                        increasing=dict(line=dict(color=act_color2)), decreasing=dict(line=dict(color=act_color2)))
        ]
    
    title = '{}:'.format(algorithm_name)
    if len(train_env.data) != 0:
        title += 'train s-reward {}, profits {} '.format(int(sum(train_rewards)), int(train_profits[-1]))
    if len(test_env.data) != 0:
        title += 'test s-reward {}, profits {}'.format(int(sum(test_rewards)), int(test_profits[-1]))

    layout = {
        'title': title,
        'showlegend': False
    }
                
    if date_split != '':
        layout['shapes'] = [
                 {'x0': date_split, 'x1': date_split, 'y0': 0, 'y1': 1, 'xref': 'x', 'yref': 'paper', 'line': {'color': 'rgb(0,0,0)', 'width': 1}}
             ]
        layout['annotations'] = [
                {'x': date_split, 'y': 1.0, 'xref': 'x', 'yref': 'paper', 'showarrow': False, 'xanchor': 'left', 'text': ' test data'},
                {'x': date_split, 'y': 1.0, 'xref': 'x', 'yref': 'paper', 'showarrow': False, 'xanchor': 'right', 'text': 'train data '}
            ]
        
    figure = Figure(data=data, layout=layout)
    
    if is_notebook():
        iplot(figure)
    else:
        plot(figure)
        
    print("Gray: HOLD, Cyan: BUY, Magenta: SELL")
    print("Max Position Value: " + str(max(train_position_value+test_position_value)))
    print("Min Position Value: " + str(min(train_position_value+test_position_value)))
    print("Last Action Taken: " + str(act_dict[(train_actions+test_actions)[-1]]))