In [30]:
import os
import sys
sys.path.insert(0, os.path.abspath("./tensortrade"))
import ta
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
import numpy as np

from tensortrade.feed.core import Stream, DataFeed, NameSpace
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.instruments import USD, BTC
from tensortrade.oms.wallets import Wallet, Portfolio
import tensortrade.env.default as default


from stable_baselines.common.policies import MlpLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import A2C, PPO2

In [2]:
def load_csv(filename):
    data_path = os.path.abspath("./tensortrade/playground/data/")
    df = pd.read_csv(os.path.join(data_path,filename), skiprows=1)
    df.drop(columns=["symbol", "volume_btc"], inplace=True)
    # Fix timestamp form "2019-10-17 09-AM" to "2019-10-17 09-00-00 AM"
    df["date"] = df["date"].str[:14] + "00-00 " + df["date"].str[-2:]
    # Convert the date column type from string to datetime for proper sorting.
    df["date"] = pd.to_datetime(df["date"])
    # Make sure historical prices are sorted chronologically, oldest first.
    df.sort_values(by="date", ascending=True, inplace=True)
    df.reset_index(drop=True, inplace=True)
    # Format timestamps as you want them to appear on the chart buy/sell marks.
    df["date"] = df["date"].dt.strftime("%Y-%m-%d %I:%M %p")

    return df


def log_and_diff(df, cols):
    for col in cols:
        df[col] = np.log(df[col]) - np.log(df[col]).shift(1)

In [24]:
prices = load_csv("Coinbase_BTCUSD_1h.csv")
prices = prices.head(1000)
df = prices
print(df.head())
df.drop(columns=["date"], inplace=True)
df = ta.add_all_ta_features(df, "open", "high", "low", "close", "volume", fillna=True)
df = df + 1 - 2 * df.min()  # Make positive
log_and_diff(df, df.columns)
df.drop(df.index[:2], inplace=True)
prices.drop(prices.index[:2], inplace=True)

                  date     open     high      low    close     volume
0  2017-07-01 11:00 AM  2505.56  2513.38  2495.12  2509.17  287000.32
1  2017-07-01 12:00 PM  2509.17  2512.87  2484.99  2488.43  393142.50
2  2017-07-01 01:00 PM  2488.43  2488.43  2454.40  2454.43  693254.01
3  2017-07-01 02:00 PM  2454.43  2473.93  2450.83  2459.35  712864.80
4  2017-07-01 03:00 PM  2459.35  2475.00  2450.00  2467.83  682105.41
  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [25]:
df.shape

(998, 88)

In [26]:
coinbase = Exchange("coinbase", service=execute_order)(
    Stream.source(prices["close"].tolist(), dtype="float").rename("USD-BTC")
)
portfolio = Portfolio(
    USD,
    [Wallet(coinbase, 10000 * USD), Wallet(coinbase, 0 * BTC)],
)


with NameSpace("coinbase"):
    streams = [Stream.source(df[c].tolist(), dtype="float").rename(c) for c in df.columns]
feed = DataFeed(streams)

# # Screen log

env = default.create(
    portfolio=portfolio,
    action_scheme=default.actions.SimpleOrders(),
    reward_scheme=default.rewards.SimpleProfit(),
    feed=feed,
    renderer="screen-log",  # ScreenLogger used with default settings
    window_size=2,
)
print(env.observation_space)
print(env.action_space)

Box(-inf, inf, (2, 88), float32)
Discrete(21)


In [27]:
from stable_baselines.common.env_checker import check_env
from stable_baselines.common.policies import MlpLstmPolicy
check_env(env)



In [29]:
model = A2C(MlpLstmPolicy, env, verbose=1).learn(total_timesteps=10000)


Wrapping the env in a DummyVecEnv.
----------------------------------
| explained_variance | -4.86e+04 |
| fps                | 16        |
| nupdates           | 1         |
| policy_entropy     | 3.04      |
| total_timesteps    | 5         |
| value_loss         | 0.00314   |
----------------------------------
---------------------------------
| explained_variance | -9.92    |
| fps                | 293      |
| nupdates           | 100      |
| policy_entropy     | 3.04     |
| total_timesteps    | 500      |
| value_loss         | 0.000437 |
---------------------------------
---------------------------------
| explained_variance | -2.72    |
| fps                | 307      |
| nupdates           | 200      |
| policy_entropy     | 3.04     |
| total_timesteps    | 1000     |
| value_loss         | 0.00457  |
---------------------------------
---------------------------------
| explained_variance | -26.7    |
| fps                | 329      |
| nupdates           | 300      |
| pol

In [33]:
model = PPO2(MlpLstmPolicy, env, verbose=1, nminibatches=1).learn(total_timesteps=1000)


Wrapping the env in a DummyVecEnv.


--------------------------------------
| approxkl           | 1.1316652e-05 |
| clipfrac           | 0.0           |
| explained_variance | -2.02         |
| fps                | 30            |
| n_updates          | 1             |
| policy_entropy     | 3.044346      |
| policy_loss        | -0.0030572973 |
| serial_timesteps   | 128           |
| time_elapsed       | 1.31e-05      |
| total_timesteps    | 128           |
| value_loss         | 0.00016125702 |
--------------------------------------
--------------------------------------
| approxkl           | 3.862817e-06  |
| clipfrac           | 0.0           |
| explained_variance | -2.21         |
| fps                | 240           |
| n_updates          | 2             |
| policy_entropy     | 3.0443256     |
| policy_loss        | -0.0010505458 |
| serial_timesteps   | 256           |
| time_elapsed       | 4.19          |
| total_timesteps    | 256           |
| value_loss         | 0.00