## Importing dependencies

In [1]:
import gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv
from gym_anytrading.datasets import STOCKS_GOOGL

## RL dependencies

In [2]:
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import A2C

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."


## Processing dependencies

In [3]:
import numpy as np
import pandas as pnd
from matplotlib import pyplot as plt

In [4]:
df = pnd.read_csv('datasets/AAPL.csv')
df.head()

Unnamed: 0,date,volume,open,close,high,low,adjclose
0,2018-11-02,91263400,209.550003,207.479996,213.649994,205.429993,207.479996
1,2018-11-01,58323200,219.050003,222.220001,222.360001,216.809998,222.220001
2,2018-10-31,38358900,216.880005,218.860001,220.449997,216.619995,218.860001
3,2018-10-30,36660000,211.149994,213.300003,215.179993,209.270004,213.300003
4,2018-10-29,45935500,219.190002,212.240005,219.690002,206.089996,212.240005


In [5]:
df['date'] = pnd.to_datetime(df['date'])
df.dtypes

date        datetime64[ns]
volume               int64
open               float64
close              float64
high               float64
low                float64
adjclose           float64
dtype: object

In [6]:
df.sort_values('date', ascending=True, inplace=True)
df.head()

Unnamed: 0,date,volume,open,close,high,low,adjclose
9555,1980-12-12,117258400,0.513393,0.513393,0.515625,0.513393,0.023186
9554,1980-12-15,43971200,0.488839,0.486607,0.488839,0.486607,0.021977
9553,1980-12-16,26432000,0.453125,0.450893,0.453125,0.450893,0.020364
9552,1980-12-17,21610400,0.462054,0.462054,0.464286,0.462054,0.020868
9551,1980-12-18,18362400,0.475446,0.475446,0.477679,0.475446,0.021473


In [7]:
df.set_index('date', inplace=True)
df.drop(columns=['adjclose'], inplace=True)

In [8]:
df.head()

Unnamed: 0_level_0,volume,open,close,high,low
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980-12-12,117258400,0.513393,0.513393,0.515625,0.513393
1980-12-15,43971200,0.488839,0.486607,0.488839,0.486607
1980-12-16,26432000,0.453125,0.450893,0.453125,0.450893
1980-12-17,21610400,0.462054,0.462054,0.464286,0.462054
1980-12-18,18362400,0.475446,0.475446,0.477679,0.475446


In [9]:
def aapl_process_data(df, window_size, frame_bound):
    start = frame_bound[0] - window_size
    end = frame_bound[1]
    prices = df.loc[:, 'low'].to_numpy()[start:end]
    signal_features = df.loc[:, ['close', 'open', 'high', 'low']].to_numpy()[start:end]
    return prices, signal_features


class aapl_stocks(StocksEnv):

    def __init__(self, prices, signal_features, **kwargs):
        self._prices = prices
        self._signal_features = signal_features
        super().__init__(**kwargs)

    def _process_data(self):
        return self._prices, self._signal_features


In [10]:
prices, signal_features = aapl_process_data(df=df, window_size=30, frame_bound=(30, len(df)))
env = aapl_stocks(prices, signal_features, df=df, frame_bound=(30, len(df)), window_size=6)

In [11]:
env.signal_features

array([[  0.51339287,   0.51339287,   0.515625  ,   0.51339287],
       [  0.48660713,   0.4888393 ,   0.4888393 ,   0.48660713],
       [  0.45089287,   0.453125  ,   0.453125  ,   0.45089287],
       ...,
       [218.86000061, 216.88000488, 220.44999695, 216.61999512],
       [222.22000122, 219.05000305, 222.36000061, 216.80999756],
       [207.47999573, 209.55000305, 213.6499939 , 205.42999268]])

In [12]:
env.action_space

Discrete(2)

In [13]:
from finta import TA

df['SMA'] = TA.SMA(df, 12)
df['RSI'] = TA.RSI(df)
df['OBV'] = TA.OBV(df)
df.fillna(0, inplace=True)
df.head(15)

Unnamed: 0_level_0,volume,open,close,high,low,SMA,RSI,OBV
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1980-12-12,117258400,0.513393,0.513393,0.515625,0.513393,0.0,0.0,0.0
1980-12-15,43971200,0.488839,0.486607,0.488839,0.486607,0.0,0.0,-43971200.0
1980-12-16,26432000,0.453125,0.450893,0.453125,0.450893,0.0,0.0,-70403200.0
1980-12-17,21610400,0.462054,0.462054,0.464286,0.462054,0.0,16.554039,-48792800.0
1980-12-18,18362400,0.475446,0.475446,0.477679,0.475446,0.0,31.259631,-30430400.0
1980-12-19,12157600,0.504464,0.504464,0.506696,0.504464,0.0,51.28943,-18272800.0
1980-12-22,9340800,0.529018,0.529018,0.53125,0.529018,0.0,61.509486,-8932000.0
1980-12-23,11737600,0.551339,0.551339,0.553571,0.551339,0.0,68.068523,2805600.0
1980-12-24,12000800,0.580357,0.580357,0.582589,0.580357,0.0,74.219068,14806400.0
1980-12-26,13893600,0.633929,0.633929,0.636161,0.633929,0.0,81.358079,28700000.0


In [20]:
def add_signals(env):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'low'].to_numpy()[start:end]
    signal_features = env.df.loc[:, ['low', 'volume','SMA', 'RSI', 'OBV']].to_numpy()[start:end]
    return prices, signal_features

class aapl_custom_env(StocksEnv):
    _process_data = add_signals

env2 = aapl_custom_env(df=df, window_size=10, frame_bound=(10,len(df)))

In [21]:
env2.signal_features

array([[ 5.13392866e-01,  1.17258400e+08,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [ 4.86607134e-01,  4.39712000e+07,  0.00000000e+00,
         0.00000000e+00, -4.39712000e+07],
       [ 4.50892866e-01,  2.64320000e+07,  0.00000000e+00,
         0.00000000e+00, -7.04032000e+07],
       ...,
       [ 2.16619995e+02,  3.83589000e+07,  2.18136667e+02,
         4.97008863e+01,  3.93234619e+10],
       [ 2.16809998e+02,  5.83232000e+07,  2.18142501e+02,
         5.30868059e+01,  3.93817851e+10],
       [ 2.05429993e+02,  9.12634000e+07,  2.17000000e+02,
         4.02775853e+01,  3.92905217e+10]])

In [22]:
env_maker = lambda: env2
env3 = DummyVecEnv([env_maker])

In [23]:
model = A2C('MlpLstmPolicy', env3, verbose=1)
model.learn(total_timesteps=500000)

---------------------------------
| explained_variance | -41.5    |
| fps                | 8        |
| nupdates           | 1        |
| policy_entropy     | 0.693    |
| total_timesteps    | 5        |
| value_loss         | 0.00257  |
---------------------------------
---------------------------------
| explained_variance | -3.52    |
| fps                | 357      |
| nupdates           | 100      |
| policy_entropy     | 0.693    |
| total_timesteps    | 500      |
| value_loss         | 0.000273 |
---------------------------------
---------------------------------
| explained_variance | -0.971   |
| fps                | 412      |
| nupdates           | 200      |
| policy_entropy     | 0.692    |
| total_timesteps    | 1000     |
| value_loss         | 0.000493 |
---------------------------------
---------------------------------
| explained_variance | -0.0707  |
| fps                | 455      |
| nupdates           | 300      |
| policy_entropy     | 0.693    |
| total_timest

KeyboardInterrupt: 

In [19]:
state = env3.reset()
while True:
    action = env.action_space.sample()
    n_state, reward, done, info = env.step(action)
    if done:
        print('info', info)
        break
plt.figure(figsize=(50,20))
plt.cla()
env.render_all()
plt.show()

IndexError: index 9558 is out of bounds for axis 0 with size 9556