<a href="https://colab.research.google.com/github/aCStandke/ReinforcementLearning/blob/main/SecondStockEnivornment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip uninstall tensorflow
!pip install tensorflow==1.13.2
!pip install stable-baselines3[extra]

Found existing installation: tensorflow 1.13.2
Uninstalling tensorflow-1.13.2:
  Would remove:
    /usr/local/bin/freeze_graph
    /usr/local/bin/saved_model_cli
    /usr/local/bin/tensorboard
    /usr/local/bin/tf_upgrade_v2
    /usr/local/bin/tflite_convert
    /usr/local/bin/toco
    /usr/local/bin/toco_from_protos
    /usr/local/lib/python3.7/dist-packages/tensorflow-1.13.2.dist-info/*
    /usr/local/lib/python3.7/dist-packages/tensorflow/*
Proceed (y/n)? y
  Successfully uninstalled tensorflow-1.13.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==1.13.2
  Using cached tensorflow-1.13.2-cp37-cp37m-manylinux1_x86_64.whl (92.7 MB)
Collecting tensorboard<1.14.0,>=1.13.0
  Using cached tensorboard-1.13.1-py3-none-any.whl (3.2 MB)
Installing collected packages: tensorboard, tensorflow
  Attempting uninstall: tensorboard
    Found existing installation: tensorboard 2.10.0
    Uninstalling tensorboard-2.10.0:
     

In [43]:
import random
import json
import gym
from gym import spaces
import pandas as pd
import numpy as np
import json
import datetime as dt
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
import collections
import datetime

In [3]:
# Stock Environment Parameters
MAX_ACCOUNT_BALANCE = 2147483647
MAX_NUM_SHARES = 2147483647
MAX_SHARE_PRICE = 5000
MAX_OPEN_POSITIONS = 5
MAX_STEPS = 20000
TRADING_DAYS = 5
INITIAL_ACCOUNT_BALANCE = 10000

In [97]:
# Stock/ETF Trading Enviornment
class StockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, data):
        super(StockTradingEnv, self).__init__()

        self.data = data
        self.reward_range = (0, MAX_ACCOUNT_BALANCE)

        # Actions of the format Buy x%, Sell x%, Hold, etc.
        self.action_space = spaces.Box(
            low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16)

        # Prices contains the OHCL values for the last five prices
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(31,), dtype=np.float16)

    def _next_observation(self):
        # Get the stock data points for the last 5 days and scale to between 0-1
        frame = np.array([
                    data.high[0:5],
                    data.low[0:5],
                    data.close[0:5],
                    data.open[0:5],
                    data.volume[0:5]
                    ])

        # Append additional data and scale each value to between 0-1
        obs = np.append(frame, [[
            self.balance / MAX_ACCOUNT_BALANCE,
            self.max_net_worth / MAX_ACCOUNT_BALANCE,
            self.shares_held / MAX_NUM_SHARES,
            self.cost_basis / MAX_SHARE_PRICE,
            self.total_shares_sold / MAX_NUM_SHARES,
            self.total_sales_value / (MAX_NUM_SHARES * MAX_SHARE_PRICE),
        ]])

        return obs

    def _take_action(self, action):
        # Set the current price to a random price within the time step
        current_price = random.uniform(
            self.data.open[self.current_step], self.data.close[self.current_step])

        action_type = action[0]
        amount = action[1]

        if action_type < 1:
            # Buy amount % of balance in shares
            total_possible = int(self.balance / current_price)
            shares_bought = int(total_possible * amount)
            prev_cost = self.cost_basis * self.shares_held
            additional_cost = shares_bought * current_price

            self.balance -= additional_cost
            self.cost_basis = (
                prev_cost + additional_cost) / (self.shares_held + shares_bought)
            self.shares_held += shares_bought

        elif action_type < 2:
            # Sell amount % of shares held
            shares_sold = int(self.shares_held * amount)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_sold += shares_sold
            self.total_sales_value += shares_sold * current_price

        self.net_worth = self.balance + self.shares_held * current_price

        if self.net_worth > self.max_net_worth:
            self.max_net_worth = self.net_worth

        if self.shares_held == 0:
            self.cost_basis = 0

    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        if self.current_step > len(self.data.open[:]) - 6:
            self.current_step = 0

        delay_modifier = (self.current_step / MAX_STEPS)

        reward = self.balance * delay_modifier
        done = self.net_worth <= 0

        obs = self._next_observation()

        return obs, reward, done, {}

    def reset(self):
        # Reset the state of the environment to an initial state
        self.balance = INITIAL_ACCOUNT_BALANCE
        self.net_worth = INITIAL_ACCOUNT_BALANCE
        self.max_net_worth = INITIAL_ACCOUNT_BALANCE
        self.shares_held = 0
        self.cost_basis = 0
        self.total_shares_sold = 0
        self.total_sales_value = 0

        # Set the current step to a random point within the data frame
        self.current_step = random.randint(
            0, len(self.data.open[:]) - 6)

        return self._next_observation()

    def render(self, mode='human', close=False):
        # Render the environment to the screen
        profit = self.net_worth - INITIAL_ACCOUNT_BALANCE

        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(
            f'Shares held: {self.shares_held} (Total sold: {self.total_shares_sold})')
        print(
            f'Avg cost for held shares: {self.cost_basis} (Total sales value: {self.total_sales_value})')
        print(
            f'Net worth: {self.net_worth} (Max net worth: {self.max_net_worth})')
        print(f'Profit: {profit}')


In [8]:
df = pd.read_csv('/content/drive/MyDrive/Datasets/StockMarketData/archive/Data/ETFs/spy.us.txt')
df = df.sort_values('Date')
data=df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]

In [52]:
dt = data['Date'].array
rh = (data['High'].values-data['Open'].values)/data['Open'].values
rl = (data['Low'].values-data['Open'].values)/data['Open'].values
rc = (data['Close'].values-data['Open'].values)/data['Open'].values
ro = (data['Open'].values-data['Close'].values)/data['Close'].values
vol = data['Volume'].values

In [53]:
Data = collections.namedtuple('Data', field_names=['date','high', 'low', 'close', 'open', 'volume'])

In [57]:
data=Data(date=dt,high=rh, low=rl, close=rc, open=ro, volume=vol)

In [61]:
frame = np.array([
    data.date[0:5], 
    data.high[0:5],
    data.low[0:5],
    data.close[0:5],
    data.open[0:5],
    data.volume[0:5]
])

In [62]:
frame

array([['2005-02-25', '2005-02-28', '2005-03-01', '2005-03-02',
        '2005-03-03'],
       [0.011740001908943438, 0.0012316437707248667,
        0.006177532788443316, 0.009599847923201265, 0.001981318992357845],
       [-0.0008590245299225848, -0.0093794410232117, 0.0,
        -0.0010455279916357708, -0.007925275969430977],
       [0.009735611339123894, -0.004452865940312637,
        0.0038015586390420615, 0.003421727972626171,
        -0.003585243890933064],
       [-0.009641743075905191, 0.004472782641796715,
        -0.0037871615224389856, -0.0034100596760443256,
        0.0035981441151405687],
       [70221808, 79695344, 54607412, 73733090, 71286823]], dtype=object)

In [66]:
obs = np.append(frame,[[
      INITIAL_ACCOUNT_BALANCE / MAX_ACCOUNT_BALANCE,
      INITIAL_ACCOUNT_BALANCE / MAX_ACCOUNT_BALANCE,
      0 / MAX_NUM_SHARES,
      0 / MAX_SHARE_PRICE,
      0 / MAX_NUM_SHARES,
      0 / (MAX_NUM_SHARES * MAX_SHARE_PRICE)
        ]])

In [98]:
# The algorithms require a vectorized environment to run
env = DummyVecEnv([lambda: StockTradingEnv(data)])
model = PPO2(MlpPolicy, env, verbose=1)

  "Box bound precision lowered by casting to {}".format(self.dtype)


In [99]:
model.learn(total_timesteps=20000)



OverflowError: ignored

In [None]:
# obs = env.reset()
# for i in range(2000):
#   action, _states = model.predict(obs)
#   obs, rewards, done, info = env.step(action)
#   env.render()