<a href="https://colab.research.google.com/github/Tikquuss/RL/blob/master/Stock_Trading_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
try:
    %tensorflow_version 1.x
    %matplotlib inline
except Exception:
    pass

TensorFlow 1.x selected.


In [None]:
import random
import json
import gym
from gym import spaces
import pandas as pd
import numpy as np

# ! pip install stable-baselines
import gym
import json
import datetime as dt
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
#from env.StockTradingEnv import StockTradingEnv
import pandas as pd

In [None]:
MAX_ACCOUNT_BALANCE = 2147483647
MAX_NUM_SHARES = 2147483647
MAX_SHARE_PRICE = 5000
MAX_OPEN_POSITIONS = 5
MAX_STEPS = 20000

INITIAL_ACCOUNT_BALANCE = 10000

In [None]:
class StockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, df):
        super(StockTradingEnv, self).__init__()

        self.df = df
        self.reward_range = (0, MAX_ACCOUNT_BALANCE)

        # Actions of the format Buy x%, Sell x%, Hold, etc.
        self.action_space = spaces.Box(
            low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16)

        # Prices contains the OHCL values for the last five prices
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(6, 6), dtype=np.float16)

    def _next_observation(self):
        # Get the stock data points for the last 5 days and scale to between 0-1
        frame = np.array([
            self.df.loc[self.current_step: self.current_step +
                        5, 'Open'].values / MAX_SHARE_PRICE,
            self.df.loc[self.current_step: self.current_step +
                        5, 'High'].values / MAX_SHARE_PRICE,
            self.df.loc[self.current_step: self.current_step +
                        5, 'Low'].values / MAX_SHARE_PRICE,
            self.df.loc[self.current_step: self.current_step +
                        5, 'Close'].values / MAX_SHARE_PRICE,
            self.df.loc[self.current_step: self.current_step +
                        5, 'Volume'].values / MAX_NUM_SHARES,
        ])

        # Append additional data and scale each value to between 0-1
        obs = np.append(frame, [[
            self.balance / MAX_ACCOUNT_BALANCE,
            self.max_net_worth / MAX_ACCOUNT_BALANCE,
            self.shares_held / MAX_NUM_SHARES,
            self.cost_basis / MAX_SHARE_PRICE,
            self.total_shares_sold / MAX_NUM_SHARES,
            self.total_sales_value / (MAX_NUM_SHARES * MAX_SHARE_PRICE),
        ]], axis=0)

        return obs

    def _take_action(self, action):
        # Set the current price to a random price within the time step
        current_price = random.uniform(
            self.df.loc[self.current_step, "Open"], self.df.loc[self.current_step, "Close"])

        action_type = action[0]
        amount = action[1]

        if action_type < 1:
            # Buy amount % of balance in shares
            total_possible = int(self.balance / current_price)
            shares_bought = int(total_possible * amount)
            prev_cost = self.cost_basis * self.shares_held
            additional_cost = shares_bought * current_price

            self.balance -= additional_cost
            self.cost_basis = (
                prev_cost + additional_cost) / (self.shares_held + shares_bought)
            self.shares_held += shares_bought

        elif action_type < 2:
            # Sell amount % of shares held
            shares_sold = int(self.shares_held * amount)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_sold += shares_sold
            self.total_sales_value += shares_sold * current_price

        self.net_worth = self.balance + self.shares_held * current_price

        if self.net_worth > self.max_net_worth:
            self.max_net_worth = self.net_worth

        if self.shares_held == 0:
            self.cost_basis = 0

    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        if self.current_step > len(self.df.loc[:, 'Open'].values) - 6:
            self.current_step = 0

        delay_modifier = (self.current_step / MAX_STEPS)

        reward = self.balance * delay_modifier
        done = self.net_worth <= 0

        obs = self._next_observation()

        return obs, reward, done, {}

    def reset(self):
        # Reset the state of the environment to an initial state
        self.balance = INITIAL_ACCOUNT_BALANCE
        self.net_worth = INITIAL_ACCOUNT_BALANCE
        self.max_net_worth = INITIAL_ACCOUNT_BALANCE
        self.shares_held = 0
        self.cost_basis = 0
        self.total_shares_sold = 0
        self.total_sales_value = 0

        # Set the current step to a random point within the data frame
        self.current_step = random.randint(
            0, len(self.df.loc[:, 'Open'].values) - 6)

        return self._next_observation()

    def render(self, mode='human', close=False):
        # Render the environment to the screen
        profit = self.net_worth - INITIAL_ACCOUNT_BALANCE

        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(
            f'Shares held: {self.shares_held} (Total sold: {self.total_shares_sold})')
        print(
            f'Avg cost for held shares: {self.cost_basis} (Total sales value: {self.total_sales_value})')
        print(
            f'Net worth: {self.net_worth} (Max net worth: {self.max_net_worth})')
        print(f'Profit: {profit}')

In [None]:
! git clone https://github.com/notadamking/Stock-Trading-Environment

Cloning into 'Stock-Trading-Environment'...
remote: Enumerating objects: 38, done.[K
remote: Total 38 (delta 0), reused 0 (delta 0), pack-reused 38[K
Unpacking objects: 100% (38/38), done.


In [None]:
df = pd.read_csv('/content/Stock-Trading-Environment/data/AAPL.csv')
df = df.sort_values('Date')

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Open,High,Low,Close,Volume
0,0,1998-01-02,13.63,16.25,13.5,16.25,6411700.0
1,1,1998-01-05,16.5,16.56,15.19,15.88,5820300.0
2,2,1998-01-06,15.94,20.0,14.75,18.94,16182800.0
3,3,1998-01-07,18.81,19.0,17.31,17.5,9300200.0
4,4,1998-01-08,17.44,18.62,16.94,18.19,6910900.0


In [None]:
# The algorithms require a vectorized environment to run
env = DummyVecEnv([lambda: StockTradingEnv(df)])
model = PPO2(MlpPolicy, env, verbose=1)
model.learn(total_timesteps=20000)



---------------------------------------
| approxkl           | 1.6340778e-07  |
| clipfrac           | 0.0            |
| ep_rewmean         | nan            |
| eplenmean          | nan            |
| explained_variance | 6.56e-07       |
| fps                | 321            |
| nupdates           | 2              |
| policy_entropy     | 2.8382025      |
| policy_loss        | -4.9880473e-05 |
| serial_timesteps   | 256            |
| time_elapsed       | 2.62e-06       |
| total_timesteps    | 256            |
| value_loss         | 4126068.2      |
---------------------------------------
---------------------------------------
| approxkl           | 5.4058734e-07  |
| clipfrac           | 0.0            |
| ep_rewmean         | nan            |
| eplenmean          | nan            |
| explained_variance | 5.25e-06       |
| fps                | 633            |
| nupdates           | 3              |
| policy_entropy     | 2.8388908      |
| policy_loss        | -0.00020909111 |


<stable_baselines.ppo2.ppo2.PPO2 at 0x7fd3fa59a278>

In [None]:
obs = env.reset()
for i in range(2000):
  action, _states = model.predict(obs)
  obs, rewards, done, info = env.step(action)
  env.render()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Net worth: 14128.273383169842 (Max net worth: 14384.572811020396)
Profit: 4128.273383169842
Step: 3602
Balance: 14128.273383169842
Shares held: 0 (Total sold: 344)
Avg cost for held shares: 0 (Total sales value: 32881.17123509331)
Net worth: 14128.273383169842 (Max net worth: 14384.572811020396)
Profit: 4128.273383169842
Step: 3603
Balance: 14128.273383169842
Shares held: 0 (Total sold: 344)
Avg cost for held shares: 0 (Total sales value: 32881.17123509331)
Net worth: 14128.273383169842 (Max net worth: 14384.572811020396)
Profit: 4128.273383169842
Step: 3604
Balance: 14128.273383169842
Shares held: 0 (Total sold: 344)
Avg cost for held shares: 0 (Total sales value: 32881.17123509331)
Net worth: 14128.273383169842 (Max net worth: 14384.572811020396)
Profit: 4128.273383169842
Step: 3605
Balance: 14128.273383169842
Shares held: 0 (Total sold: 344)
Avg cost for held shares: 0 (Total sales value: 32881.17123509331)
Net worth: 