## Install TensorTrade

In [13]:
# !python3 -m pip install git+https://github.com/tensortrade-org/tensortrade.git

## Setup Data Fetching

In [14]:
import pandas as pd
import tensortrade.env.default as default
import PyQt5
# from tensortrade.data.cdd import CryptoDataDownload
from tensortrade.feed.core import Stream, DataFeed
from tensortrade.oms.exchanges import Exchange, ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.instruments import USD, BTC, ETH
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.agents import DQNAgent
import os

%matplotlib qt

In [15]:
# cdd = CryptoDataDownload()

# data = cdd.fetch("Bitstamp", "USD", "BTC", "1h")
data = pd.read_csv("../data/others/BTCUSDT-2h-data.csv")
train_data = data[0:int(len(data)*0.7)]   # training
test_data = data[int(len(data)*0.7):]   #  validation
print(len(data)*0.7)
# for i in data["date"]:
#     i = i[:10]
#     t.append(i)
#     print(i)
# data["date"] = t


4687.2


In [16]:
data.head(3)

Unnamed: 0,date,open,high,low,close,volume,longAccount,shortAccount,longShortRatio,sumOpenInterest
0,2019-10-14 04:00:00,8430.0,8430.0,8137.3,8245.47,10530.762,0.5476,0.4524,1.2104,62477.972
1,2019-10-14 06:00:00,8244.59,8307.38,8239.5,8274.66,3183.442,0.5668,0.4332,1.3084,61210.463
2,2019-10-14 08:00:00,8274.33,8290.0,8243.87,8285.69,2823.762,0.5712,0.4288,1.3321,60669.912


## Create features with the feed module

In [17]:
# def rsi(price: Stream[float], period: float) -> Stream[float]:
#     r = price.diff()
#     upside = r.clamp_min(0).abs()
#     downside = r.clamp_max(0).abs()
#     rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
#     return 100*(1 - (1 + rs) ** -1)


# def macd(price: Stream[float], fast: float, slow: float, signal: float) -> Stream[float]:
#     fm = price.ewm(span=fast, adjust=False).mean()
#     sm = price.ewm(span=slow, adjust=False).mean()
#     md = fm - sm
#     signal = md - md.ewm(span=signal, adjust=False).mean()
#     return signal


features = []
for c in train_data.columns[1:]:
    s = Stream.source(list(train_data[c]), dtype="float").rename(train_data[c].name)
    features += [s]

op = Stream.select(features, lambda s: s.name == "open")
hp = Stream.select(features, lambda s: s.name == "high")
lp = Stream.select(features, lambda s: s.name == "low")
cp = Stream.select(features, lambda s: s.name == "close")

oi = Stream.select(features, lambda s: s.name == "sumOpenInterest")
lsur = Stream.select(features, lambda s: s.name == "longShortRatio")
vol = Stream.select(features, lambda s: s.name == "volume")

features = [
    # Stream.source(list(data["open"]), dtype="float16").rename("open"),
    # Stream.source(list(data["high"]), dtype="float16").rename("high"),
    # Stream.source(list(data["low"]), dtype="float16").rename("low"),
    # Stream.source(list(data["close"]), dtype="float16").rename("close"), 
    # Stream.source(list(data["volume"]), dtype="float16").rename("volume"),
    # op.pct_change().rename("od"),
    # hp.pct_change().rename("hd"),
    # lp.pct_change().rename("ld"),
    cp.pct_change().rename("cd"),
    # oi.pct_change().rename("oid"),
    # lsur.pct_change().rename("lsurd"),
    # vol.pct_change().rename("vold")
    # rsi(cp, period=14).rename("rsi"),
    # macd(cp, fast=10, slow=50, signal=5).rename("macd")
]

feed = DataFeed(features)
feed.compile()
exops = ExchangeOptions(commission=0.005)


## Setup Trading Environment

In [18]:
bitstamp = Exchange("bitstamp", service=execute_order,options=exops)(
    Stream.source(list(train_data["close"]), dtype="float").rename("USD-BTC")
)
cash = Wallet(bitstamp, 10000 * USD)
asset = Wallet(bitstamp, 0 * BTC)
portfolio = Portfolio(USD, [
    cash,
    asset
])
from tensortrade.env.default.actions import BSH
# from tensortrade.env.default.rewards import PBR

renderer_feed = DataFeed([
    Stream.source(list(train_data["date"])).rename("date"),
    Stream.source(list(train_data["open"]), dtype="float8").rename("open"),
    Stream.source(list(train_data["high"]), dtype="float8").rename("high"),
    Stream.source(list(train_data["low"]), dtype="float8").rename("low"),
    Stream.source(list(train_data["close"]), dtype="float8").rename("close"), 
    Stream.source(list(train_data["volume"]), dtype="float8").rename("volume") 
])


# pbr_price = Stream.source(list(data["close"]), dtype="float8").rename("close")

# pbr_price.run()

train_env = default.create(
    portfolio=portfolio,
    action_scheme=BSH(cash=cash,asset=asset),
    # action_scheme="simple",
    # reward_scheme=PBR(price=pbr_price),
    reward_scheme="simple",
    feed=feed,
    renderer_feed=renderer_feed,
    # renderer="matplot",
    renderer="screen-log",
    # renderer=default.renderers.PlotlyTradingChart(),
    window_size=18
)

In [19]:
train_env.observer.feed.next()

{'internal': {'bitstamp:/USD-BTC': 8245.47,
  'bitstamp:/USD:/free': 10000.0,
  'bitstamp:/USD:/locked': 0.0,
  'bitstamp:/USD:/total': 10000.0,
  'bitstamp:/BTC:/free': 0.0,
  'bitstamp:/BTC:/locked': 0.0,
  'bitstamp:/BTC:/total': 0.0,
  'bitstamp:/BTC:/worth': 0.0,
  'net_worth': 10000.0},
 'external': {'cd': nan},
 'renderer': {'date': '2019-10-14 04:00:00',
  'open': 8430.0,
  'high': 8430.0,
  'low': 8137.3,
  'close': 8245.47,
  'volume': 10530.762}}

In [20]:
# import tensorflow as tf
# def solve_cudnn_error():
#     gpus = tf.config.experimental.list_physical_devices('GPU')
#     if gpus:
#         try:
#             # Currently, memory growth needs to be the same across GPUs
#             for gpu in gpus:
#                 tf.config.experimental.set_memory_growth(gpu, True)
#             logical_gpus = tf.config.experimental.list_logical_devices('GPU')
#             print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
#         except RuntimeError as e:
#             # Memory growth must be set before GPUs have been initialized
#             print(e)
# solve_cudnn_error()

## Setup and Train DQN Agent

In [21]:
import tensorflow as tf
from tensortrade.agents import DQNAgent

train_env.reset()
network = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=train_env.observation_space.shape), # window_size * 4
    tf.keras.layers.Flatten(),
    # tf.keras.layers.Dense(256, activation='relu'),
    # tf.keras.layers.Dropout(0.1),
    # tf.keras.layers.Dense(256, activation='relu'),
    # tf.keras.layers.Dense(256, activation='relu'),
    # # tf.keras.layers.Dropout(0.1),
    # tf.keras.layers.Dense(128, activation='relu'),
    # tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    # tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    # tf.keras.layers.Dropout(0.3),
    # tf.keras.layers.Dropout(0.3),
    # tf.keras.layers.Dense(train_env.action_space.n, activation='relu'),
    # tf.keras.layers.Dense(64, activation='relu'),
    # tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(train_env.action_space.n, activation="sigmoid"), # 2 actions
    tf.keras.layers.Dense(train_env.action_space.n, activation="softmax")
    ])
print(network.summary())
agent = DQNAgent(train_env, policy_network=network)
with tf.device("gpu:0"):
    agent.train(n_episodes=30,n_steps=4800,memory_capacity=10000,render_interval=200)

# agent.save("./")


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 18)                0         
_________________________________________________________________
dense_10 (Dense)             (None, 128)               2432      
_________________________________________________________________
dense_11 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_12 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_13 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_14 (Dense)             (None, 2)                 130       
_________________________________________________________________
dense_15 (Dense)             (None, 2)                

  "The `lr` argument is deprecated, use `learning_rate` instead.")


[2021-09-29 21:04:39.047572] Episode: 1/30 Step: 201/4800
[2021-09-29 21:04:44.322398] Episode: 1/30 Step: 401/4800
[2021-09-29 21:04:46.235831] Episode: 1/30 Step: 471/4800
[2021-09-29 21:04:51.311979] Episode: 2/30 Step: 201/4800
[2021-09-29 21:04:56.377122] Episode: 2/30 Step: 401/4800
[2021-09-29 21:05:01.464843] Episode: 2/30 Step: 601/4800
[2021-09-29 21:05:06.581536] Episode: 2/30 Step: 801/4800
[2021-09-29 21:05:11.852426] Episode: 2/30 Step: 1001/4800
[2021-09-29 21:05:16.991588] Episode: 2/30 Step: 1201/4800
[2021-09-29 21:05:22.120745] Episode: 2/30 Step: 1401/4800
[2021-09-29 21:05:24.026176] Episode: 2/30 Step: 1475/4800
[2021-09-29 21:05:28.999301] Episode: 3/30 Step: 201/4800
[2021-09-29 21:05:34.032437] Episode: 3/30 Step: 401/4800
[2021-09-29 21:05:34.842620] Episode: 3/30 Step: 433/4800
[2021-09-29 21:05:39.839750] Episode: 4/30 Step: 201/4800
[2021-09-29 21:05:45.056024] Episode: 4/30 Step: 401/4800
[2021-09-29 21:05:50.262513] Episode: 4/30 Step: 601/4800
[2021-09-2

In [22]:
%matplotlib qt

performance = pd.DataFrame.from_dict(train_env.action_scheme.portfolio.performance, orient='index')
performance.plot()

<AxesSubplot:>

In [23]:
features = []
for c in test_data.columns[1:]:
    s = Stream.source(list(test_data[c]), dtype="float").rename(test_data[c].name)
    features += [s]

op = Stream.select(features, lambda s: s.name == "open")
hp = Stream.select(features, lambda s: s.name == "high")
lp = Stream.select(features, lambda s: s.name == "low")
cp = Stream.select(features, lambda s: s.name == "close")

oi = Stream.select(features, lambda s: s.name == "sumOpenInterest")
lsur = Stream.select(features, lambda s: s.name == "longShortRatio")
vol = Stream.select(features, lambda s: s.name == "volume")

features = [
    # Stream.source(list(data["open"]), dtype="float16").rename("open"),
    # Stream.source(list(data["high"]), dtype="float16").rename("high"),
    # Stream.source(list(data["low"]), dtype="float16").rename("low"),
    # Stream.source(list(data["close"]), dtype="float16").rename("close"), 
    # Stream.source(list(data["volume"]), dtype="float16").rename("volume"),
    # op.pct_change().rename("od"),
    # hp.pct_change().rename("hd"),
    # lp.pct_change().rename("ld"),
    cp.pct_change().rename("cd"),
    # oi.pct_change().rename("oid"),
    # lsur.pct_change().rename("lsurd"),
    # vol.pct_change().rename("vold")
    # rsi(cp, period=14).rename("rsi"),
    # macd(cp, fast=10, slow=50, signal=5).rename("macd")
]

feed = DataFeed(features)
feed.compile()
exops = ExchangeOptions(commission=0.001)

bbitstamp = Exchange("bitstamp", service=execute_order,options=exops)(
    Stream.source(list(test_data["close"]), dtype="float").rename("USD-BTC")
)
cash = Wallet(bitstamp, 10000 * USD)
asset = Wallet(bitstamp, 0 * BTC)
portfolio = Portfolio(USD, [
    cash,
    asset
])
from tensortrade.env.default.actions import BSH
# from tensortrade.env.default.rewards import PBR

renderer_feed = DataFeed([
    Stream.source(list(test_data["date"])).rename("date"),
    Stream.source(list(test_data["open"]), dtype="float8").rename("open"),
    Stream.source(list(test_data["high"]), dtype="float8").rename("high"),
    Stream.source(list(test_data["low"]), dtype="float8").rename("low"),
    Stream.source(list(test_data["close"]), dtype="float8").rename("close"), 
    Stream.source(list(test_data["volume"]), dtype="float8").rename("volume") 
])

# pbr_price = Stream.source(list(data["close"]), dtype="float8").rename("close")

# pbr_price.run()

test_env = default.create(
    portfolio=portfolio,
    action_scheme=BSH(cash=cash,asset=asset),
    # action_scheme="simple",
    # reward_scheme=PBR(price=pbr_price),
    reward_scheme="simple",
    feed=feed,
    renderer_feed=renderer_feed,
    # renderer="matplot",
    renderer="screen-log",
    # renderer=default.renderers.PlotlyTradingChart(),
    window_size=18
)

import numpy as np
import matplotlib.pyplot as plt

obs = test_env.reset()
t = []
while True:
    action = agent.get_action(obs)
    obs, rewards, dones, info = test_env.step(action)
    test_env.render()
    t.append(info)
    if dones:
        break

[2021-09-29 21:13:38.731715] Step: 2/
[2021-09-29 21:13:38.735716] Step: 3/
[2021-09-29 21:13:38.738716] Step: 4/
[2021-09-29 21:13:38.740717] Step: 5/
[2021-09-29 21:13:38.743717] Step: 6/
[2021-09-29 21:13:38.746718] Step: 7/
[2021-09-29 21:13:38.749719] Step: 8/
[2021-09-29 21:13:38.751719] Step: 9/
[2021-09-29 21:13:38.754720] Step: 10/
[2021-09-29 21:13:38.757721] Step: 11/
[2021-09-29 21:13:38.759721] Step: 12/
[2021-09-29 21:13:38.762722] Step: 13/
[2021-09-29 21:13:38.764722] Step: 14/
[2021-09-29 21:13:38.767723] Step: 15/
[2021-09-29 21:13:38.770724] Step: 16/
[2021-09-29 21:13:38.772724] Step: 17/
[2021-09-29 21:13:38.775725] Step: 18/
[2021-09-29 21:13:38.778725] Step: 19/
[2021-09-29 21:13:38.781726] Step: 20/
[2021-09-29 21:13:38.783726] Step: 21/
[2021-09-29 21:13:38.786727] Step: 22/
[2021-09-29 21:13:38.789728] Step: 23/
[2021-09-29 21:13:38.791728] Step: 24/
[2021-09-29 21:13:38.794729] Step: 25/
[2021-09-29 21:13:38.796730] Step: 26/
[2021-09-29 21:13:38.799730] Step

In [24]:
performance = pd.DataFrame.from_dict(test_env.action_scheme.portfolio.performance, orient='index')
performance.plot()

<AxesSubplot:>