installing tensorflow, tensortrade and technical analysis libraries

In [None]:
!pip install tf
!pip install tensortrade
!pip3 install ta

Collecting tf
  Downloading tf-1.0.0.tar.gz (620 bytes)
Building wheels for collected packages: tf
  Building wheel for tf (setup.py) ... [?25l[?25hdone
  Created wheel for tf: filename=tf-1.0.0-py3-none-any.whl size=1285 sha256=5f84d74b012d5655e84e3d3d08851f1c95e6999e19274b9c2555998a41c67a31
  Stored in directory: /root/.cache/pip/wheels/db/c7/58/cca67875b41ff853d3fdaa20b54a780ef2e045fbcacaef1ee3
Successfully built tf
Installing collected packages: tf
Successfully installed tf-1.0.0
Collecting tensortrade
  Downloading tensortrade-1.0.3.tar.gz (32.6 MB)
[K     |████████████████████████████████| 32.6 MB 17 kB/s 
Collecting pyyaml>=5.1.2
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 14.2 MB/s 
[?25hCollecting stochastic>=0.6.0
  Downloading stochastic-0.6.0-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 4.7 MB/s 
Collecting ipython>=7.12.0
  Downloading ipython-7.28.0-py3-none-any.whl

Collecting ta
  Downloading ta-0.7.0.tar.gz (25 kB)
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.7.0-py3-none-any.whl size=28718 sha256=bf95dfad34a57b9250c9186882d40f091866df3cdc497bf989305008696a2c1f
  Stored in directory: /root/.cache/pip/wheels/5e/74/e0/72395003bd1d3c8f3f5860c2d180ff15699e47a2733d8ebd38
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.7.0


importing necessary libraries and components

In [None]:
import ta
import pandas as pd
import tensortrade.env.default as default
from tensortrade.data.cdd import CryptoDataDownload
from tensortrade.feed.core import Stream, DataFeed, NameSpace
from tensortrade.oms.instruments import USD, BTC, ETH, LTC
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order

Uploading crypto data and technical analysis features( Rsi and moving averages)

In [None]:
cdd = CryptoDataDownload()

data = cdd.fetch("Bitfinex", "USD", "BTC", "1h")
def rsi(price: Stream[float], period: float) -> Stream[float]:
    r = price.diff()
    upside = r.clamp_min(0).abs()
    downside = r.clamp_max(0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)


def macd(price: Stream[float], fast: float, slow: float, signal: float) -> Stream[float]:
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal


features = []
for c in data.columns[1:]:
    s = Stream.source(list(data[c]), dtype="float").rename(data[c].name) #Creates a stream from an iterable
    features += [s]

cp = Stream.select(features, lambda s: s.name == "close") #Selects a stream closing price streams from the list of streams
        

features = [
    cp.log().diff().rename("lr"),
    rsi(cp, period=20).rename("rsi"),
    macd(cp, fast=10, slow=50, signal=5).rename("macd")
]

feed = DataFeed(features)
feed.compile() # compiles together streams to be run in an organized manner

Creating environment components(action_scheme,reward_scheme, Portfolio,datafeed,exchange, traded pair..) and creating the environment itself which is a gym object.

In [None]:
bitfinex = Exchange("bitfinex", service=execute_order)(
    Stream.source(list(data["close"]), dtype="float").rename("USD-BTC")
)

portfoliox = Portfolio(USD, [
    Wallet(bitfinex, 10000 * USD),
    Wallet(bitfinex, 0 * BTC)
])


renderer_feed = DataFeed([
    Stream.source(list(data["date"])).rename("date"),
    Stream.source(list(data["open"]), dtype="float").rename("open"),
    Stream.source(list(data["high"]), dtype="float").rename("high"),
    Stream.source(list(data["low"]), dtype="float").rename("low"),
    Stream.source(list(data["close"]), dtype="float").rename("close"),
    Stream.source(list(data["volume"]), dtype="float").rename("volume")
])

from tensortrade.env.default.actions import ManagedRiskOrders,BSH

env = default.create(
    portfolio=portfoliox,
    action_scheme= "managed-risk",
    reward_scheme="risk-adjusted",
    feed=feed,
    renderer_feed=renderer_feed,
    renderer=default.renderers.PlotlyTradingChart(),
    window_size=20
)

env.observer.feed.next()

{'external': {'lr': nan, 'macd': 0.0, 'rsi': nan},
 'internal': {'bitfinex:/BTC:/free': 0.0,
  'bitfinex:/BTC:/locked': 0.0,
  'bitfinex:/BTC:/total': 0.0,
  'bitfinex:/BTC:/worth': 0.0,
  'bitfinex:/USD-BTC': 8739.0,
  'bitfinex:/USD:/free': 10000.0,
  'bitfinex:/USD:/locked': 0.0,
  'bitfinex:/USD:/total': 10000.0,
  'net_worth': 10000.0},
 'renderer': {'close': 8739.0,
  'date': Timestamp('2018-05-15 06:00:00'),
  'high': 8793.0,
  'low': 8714.9,
  'open': 8723.8,
  'volume': 8988053.53}}

A function to retrieve the time of an action when its done.

In [None]:
def timeofaction(price):
        v = cdd.fetch("Bitfinex", "USD", "BTC", "1h")
        k=0
        for i in v.loc[:,"close"]:
          k+=1  
          if (i == float(price)):
            print(v.iloc[k,0]) 
            break
           

This is the most important piece of code, this is where the agent is defined, the training function(train) , the gradient descent function, the neural network and the deep Q learning algorithm.  

In [None]:
import random
import numpy as np
import tensorflow as tf
from tensortrade.data.cdd import CryptoDataDownload
from collections import namedtuple
from tensortrade.oms.instruments import ExchangePair
from tensortrade.agents import Agent, ReplayMemory
from datetime import datetime
from itertools import product
import logging
from abc import abstractmethod
from itertools import product
from typing import Union, List, Any

from gym.spaces import Space, Discrete

from tensortrade.core import Clock
from tensortrade.env.generic import ActionScheme, TradingEnv
from tensortrade.oms.instruments import ExchangePair
from tensortrade.oms.orders import (
    Broker,
    Order,
    OrderListener,
    OrderSpec,
    proportion_order,
    risk_managed_order,
    TradeSide,
    TradeType
)
from tensortrade.oms.wallets import Portfolio
from tensortrade.env.default.actions import ManagedRiskOrders,BSH

DQNTransition = namedtuple('DQNTransition', ['state', 'action', 'reward', 'next_state', 'done'])
actionscheme = ManagedRiskOrders([0.02, 0.04, 0.06],[0.01, 0.02, 0.03],10, None,TradeType.MARKET,None,0.02,0.00)
#actionscheme = BSH(cash=portfoliox.wallets[0],asset=portfoliox.wallets[1])
actionscheme.portfolio = portfoliox

 
actionscheme.action_space





class DQNAgents(Agent,ManagedRiskOrders):
    def __init__(self,
                 env: 'TradingEnv',
                 policy_network: tf.keras.Model = None):
        self.env = env
        self.n_actions = env.action_space.n
        self.observation_shape = env.observation_space.shape

        self.policy_network = policy_network or self.build_policy_network()

        self.target_network = tf.keras.models.clone_model(self.policy_network)
        self.target_network.trainable = False
        
        self.env.agent_id = self.id

    def build_policy_network(self):
        network = tf.keras.Sequential([
            tf.keras.layers.LSTM(40,input_shape=self.observation_shape,return_sequences=True),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.LSTM(40,return_sequences=False),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(self.n_actions, activation="sigmoid"),
            tf.keras.layers.Dense(self.n_actions, activation="softmax")
        ])
        return network

    def restore(self, path: str, **kwargs):
        self.policy_network = tf.keras.models.load_model(path)
        self.target_network = tf.keras.models.clone_model(self.policy_network)
        self.target_network.trainable = False

    def save(self, path: str, **kwargs):
        episode: int = kwargs.get('episode', None)

        if episode:
            filename = "policy_network__" + self.id[:7] + "__" + datetime.now().strftime("%Y%m%d_%H%M%S") + ".hdf5"
        else:
            filename = "policy_network__" + self.id[:7] + "__" + datetime.now().strftime("%Y%m%d_%H%M%S") + ".hdf5"

        self.policy_network.save(path + filename)

    def get_action(self, state: np.ndarray, **kwargs) -> int:
        threshold: float = kwargs.get('threshold', 0)
        rand = random.random() #select a random number between 0 and 1
        if rand < threshold:  # if the number is smaller than the threshold we select an action via exploration.
            return np.random.choice(self.n_actions)
        else: #if not we exploit the knowledge of our agent to select an action.
            return np.argmax(self.policy_network(np.expand_dims(state, 0)))

    def apply_gradient_descent(self, memory: ReplayMemory, batch_size: int, learning_rate: float, discount_factor: float):
        optimizer = tf.keras.optimizers.Adam(lr=learning_rate) #using Adam optimizer
        loss = tf.keras.losses.Huber() #Using the huber loss function for calculating the loss
        transitions = memory.sample(batch_size)
        batch = DQNTransition(*zip(*transitions))
        state_batch = tf.convert_to_tensor(batch.state) #preprocess states by turning them into neural network friendly input(tensors)
        action_batch = tf.convert_to_tensor(batch.action)#preprocess actions by turning them into neural network friendly input(tensors)
        reward_batch = tf.convert_to_tensor(batch.reward, dtype=tf.float32)#preprocess rewards by turning them into neural network friendly input(tensors)
        next_state_batch = tf.convert_to_tensor(batch.next_state)#preprocess next states by turning them into neural network friendly input(tensors)
        done_batch = tf.convert_to_tensor(batch.done)#preprocess "done" status by turning them into neural network friendly input(tensors)
        with tf.GradientTape() as tape:
            state_action_values = tf.math.reduce_sum(
                self.policy_network(state_batch) * tf.one_hot(action_batch, self.n_actions),
                axis=1 ) #We feed a batch of states to the neural network and output the action Q-values
            next_state_values = tf.where( done_batch,
                tf.zeros(batch_size),
                tf.math.reduce_max(self.target_network(next_state_batch), axis=1))# the target network is used for computing the non-stationary target Q-values
            expected_state_action_values = reward_batch + (discount_factor * next_state_values)
            loss_value = loss(expected_state_action_values, state_action_values)#The loss between Q-values and target Q-values is calculated
        variables = self.policy_network.trainable_variables
        gradients = tape.gradient(loss_value, variables) #Calculate the gradient of the loss function
        optimizer.apply_gradients(zip(gradients, variables))#Apply gradient descent on the loss fucntion.
         
    
    def train(self,
              n_steps: int = None,
              n_episodes: int = None,
              save_every: int = None,
              save_path: str = None,
              callback: callable = None,
              **kwargs) -> float:
        batch_size: int = kwargs.get('batch_size', 50)
        discount_factor: float = kwargs.get('discount_factor', 0.9999)
        learning_rate: float = kwargs.get('learning_rate', 0.005)
        eps_start: float = kwargs.get('eps_start', 0.9)
        eps_end: float = kwargs.get('eps_end', 0.05)
        eps_decay_steps: int = kwargs.get('eps_decay_steps', 200)
        update_target_every: int = kwargs.get('update_target_every', 100)
        memory_capacity: int = kwargs.get('memory_capacity', 500)
        render_interval: int = kwargs.get('render_interval', 50)  # in steps, None for episode end renderers only
        
        
        
        if n_steps and not n_episodes:
            n_episodes = np.iinfo(np.int32).max

        print('====      AGENT ID: {}      ===='.format(self.id))


        
        memory = ReplayMemory(memory_capacity, transition_type=DQNTransition)
        episode = 0
        total_steps_done = 0
        total_reward = 0
        stop_training = False
        while episode < n_episodes and not stop_training:
            state = self.env.reset()
            done = False
            steps_done = 0

            while not done:
                threshold = eps_end + (eps_start - eps_end) * np.exp(-total_steps_done / eps_decay_steps)#diminishing the threshold
                action = self.get_action(state, threshold=threshold) #select an action via exploration or exploitation.
                next_state, reward, done, _ = self.env.step(action) #observe reward and next state.
                memory.push(state, action, reward, next_state, done) #store the state,action, reward and next state in the memory.
                state = next_state #update of state
                total_reward += reward #update of total reward
                steps_done += 1 #update of state.
                total_steps_done +=1
                if len(memory) < batch_size:
                    continue
                self.apply_gradient_descent(memory, batch_size, learning_rate, discount_factor)#The gradient descent function which minimizes the loss
                if n_steps and steps_done >= n_steps:
                    done = True

                if render_interval is not None and steps_done % render_interval == 0:
                    c = actionscheme.get_orders(action=action,portfolio=portfoliox)
                    try:
                      print("episode: {},action: {},time: {},quantity: {},price: {},score: {}".format(episode,c[0].side,timeofaction(c[0].price),c[0].quantity,c[0].price,env.step(action)[3]))
                    except IndexError:
                      print("no action has been done in this timestep")
                    finally:
                       print("episode: {},score: {}".format(episode,env.step(action)[3]))

                if steps_done % update_target_every == 0:
                    self.target_network = tf.keras.models.clone_model(self.policy_network)
                    self.target_network.trainable = False

            is_checkpoint = save_every and episode % save_every == 0

            if save_path and (is_checkpoint or episode == n_episodes - 1):
                self.save(save_path, episode=episode)

            if not render_interval or steps_done < n_steps:
                  c = actionscheme.get_orders(action=action,portfolio=portfoliox)
                  try:
                    print("episode: {},action: {},time: {},quantity: {},price: {},score: {}".format(episode,c[0].side,timeofaction(c[0].price),c[0].quantity,c[0].price,env.step(action)[3]))
                  except IndexError:
                    print("no action has been done in this timestep") 
                  finally:
                    print("episode: {},score: {}".format(episode,env.step(action)[3]))
            self.env.save()
            episode += 1
        mean_reward = total_reward / steps_done
        return mean_reward 
       

This is the agent training function just plug in the number of timesteps and the number of episodes you want the agent to train for and watch it trade everytime it did an action it will be printed out, if you see that an action has resulted in profit and would like to know the time and price of the currency at the time of the action just type in code timeofaction(price) to see how the agent trades, if however you felt confident in its ability to trade just type in code  restore(filepath of model) and it will restore the saved model.  

In [None]:
from tensortrade.agents import DQNAgent

agent= DQNAgents(env)
agent.policy_network.summary()
#agent.train(n_steps=2000, n_episodes=20, save_path="agents/")

Example of the time of an action, you observe the action after its done if a profit has been made after the action you can type time of action(price) to know the time in which that action has been made and therefore extract knowledge of how the agent has made profit.
 

In [None]:
timeofaction(6295.10)

2018-06-14 00:00:00


In [None]:
#save("\agents")
#restore("\agents")