In [11]:
#Required packaged to run code
import numpy as np
import pandas as pd
import datetime as dt
import random
import json
import gym

from gym import spaces
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import ACKTR

import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import style
from mpl_finance import candlestick_ochl as candlestick

In [12]:
#Read df from CSV
df = pd.read_csv('MSFT.csv')
df = df.sort_values('Date')

In [13]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-02-16,245.029999,246.130005,242.919998,243.699997,241.674210,26728500
1,2021-02-17,241.320007,244.309998,240.940002,244.199997,242.727814,21653500
2,2021-02-18,241.800003,243.929993,240.860001,243.789993,242.320282,16925600
3,2021-02-19,243.750000,243.860001,240.179993,240.970001,239.517288,25262600
4,2021-02-22,237.419998,237.929993,232.399994,234.509995,233.096222,36446900
...,...,...,...,...,...,...,...
249,2022-02-09,309.869995,311.929993,307.390015,311.209991,311.209991,31284700
250,2022-02-10,304.040009,309.119995,300.700012,302.380005,302.380005,45386200
251,2022-02-11,303.190002,304.290009,294.220001,295.040009,295.040009,39143900
252,2022-02-14,293.769989,296.760010,291.350006,295.000000,295.000000,36339400


In [14]:
#parameters for graph

VOLUME_CHART_HEIGHT = 0.33

UP_COLOR = '#27A59A'
DOWN_COLOR = '#EF534F'
UP_TEXT_COLOR = '#73D3CC'
DOWN_TEXT_COLOR = '#DC2C27'
def date2num(date):
    converter = mdates.strpdate2num('%Y-%m-%d')
    return converter(date)

#generate graph for trading environment 
class stock_trading_graph:   

    def __init__(self, df, title="MICROSOFT"):
        self.df = df
        self.net_worths = np.zeros(len(df['Date']))   

        # Create a figure on screen and set the title
        fig = plt.figure()
        fig.suptitle(title)

        # Create top subplot for net worth axis
        self.net_worth_ax = plt.subplot2grid(
            (6, 1), (0, 0), rowspan=2, colspan=1)

        # Create bottom subplot for shared price/volume axis
        self.price_ax = plt.subplot2grid(
            (6, 1), (2, 0), rowspan=8, colspan=1, sharex=self.net_worth_ax)

        # Create a new axis for volume which shares its x-axis with price
        self.volume_ax = self.price_ax.twinx()

        # Add padding to make graph easier to view
        plt.subplots_adjust(left=0.11, bottom=0.24,
                            right=0.90, top=0.90, wspace=0.2, hspace=0)

        # Show the graph without blocking the rest of the program
        plt.ion()

    def _render_net_worth(self, step_loc, net_worth, step_range, dates):        
        # Clear the frame rendered last step
        self.net_worth_ax.clear()

        # Plot net worths
        self.net_worth_ax.plot_date(
            dates, self.net_worths[step_range], '-', label='Net Worth')

        # Show legend, which uses the label we defined for the plot above
        self.net_worth_ax.legend()
        legend = self.net_worth_ax.legend(loc=2, ncol=2, prop={'size': 8})
        legend.get_frame().set_alpha(0.4)

        last_date = date2num(self.df['Date'].values[step_loc])
        last_net_worth = self.net_worths[step_loc]

        # Annotate the current net worth on the net worth graph
        self.net_worth_ax.annotate('{0:.2f}'.format(net_worth), (last_date, last_net_worth),
                                   xytext=(last_date, last_net_worth),
                                   bbox=dict(boxstyle='round',
                                             fc='w', ec='k', lw=1),
                                   color="black",
                                   fontsize="small")

        # Add space above and below min/max net worth
        self.net_worth_ax.set_ylim(
            min(self.net_worths[np.nonzero(self.net_worths)]) / 1.25, max(self.net_worths) * 1.25)

    def _render_price(self, step_loc, net_worth, dates, step_range):        
        self.price_ax.clear()

        # Format df for OHCL candlestick graph
        candlesticks = zip(dates,
                           self.df['Open'].values[step_range], self.df['Close'].values[step_range],
                           self.df['High'].values[step_range], self.df['Low'].values[step_range])

        # Plot price using candlestick graph from mpl_finance
        candlestick(self.price_ax, candlesticks, width=1,
                    colorup=UP_COLOR, colordown=DOWN_COLOR)

        last_date = date2num(self.df['Date'].values[step_loc])
        last_close = self.df['Close'].values[step_loc]
        last_high = self.df['High'].values[step_loc]

        # Print the current price to the price axis
        self.price_ax.annotate('{0:.2f}'.format(last_close), (last_date, last_close),
                               xytext=(last_date, last_high),
                               bbox=dict(boxstyle='round',
                                         fc='w', ec='k', lw=1),
                               color="black",
                               fontsize="small")

        # Shift price axis up to give volume chart space
        ylim = self.price_ax.get_ylim()
        self.price_ax.set_ylim(ylim[0] - (ylim[1] - ylim[0])
                               * VOLUME_CHART_HEIGHT, ylim[1])

    def _render_volume(self, step_loc, net_worth, dates, step_range):        
        self.volume_ax.clear()

        volume = np.array(self.df['Volume'].values[step_range])

        pos = self.df['Open'].values[step_range] -             self.df['Close'].values[step_range] < 0
        neg = self.df['Open'].values[step_range] -             self.df['Close'].values[step_range] > 0

        # Color volume bars based on price direction on that date
        self.volume_ax.bar(dates[pos], volume[pos], color=UP_COLOR,
                           alpha=0.4, width=1, align='center')
        self.volume_ax.bar(dates[neg], volume[neg], color=DOWN_COLOR,
                           alpha=0.4, width=1, align='center')

        # Cap volume axis height below price chart and hide ticks
        self.volume_ax.set_ylim(0, max(volume) / VOLUME_CHART_HEIGHT)
        self.volume_ax.yaxis.set_ticks([])

    def _render_trades(self, step_loc, trades, step_range):        
        for trade in trades:
            if trade['step'] in step_range:
                date = date2num(self.df['Date'].values[trade['step']])
                high = self.df['High'].values[trade['step']]
                low = self.df['Low'].values[trade['step']]

                if trade['type'] == 'buy':
                    high_low = low
                    color = UP_TEXT_COLOR
                else:
                    high_low = high
                    color = DOWN_TEXT_COLOR

                total = '{0:.2f}'.format(trade['total'])

                # Print the current price to the price axis
                self.price_ax.annotate(f'${total}', (date, high_low),
                                       xytext=(date, high_low),
                                       color=color,
                                       fontsize=8,
                                       arrowprops=(dict(color=color)))

    def render(self, step_loc, net_worth, trades, window_size=40):        
        self.net_worths[step_loc] = net_worth        
        window_start = max(step_loc - window_size, 0)
        step_range = range(window_start, step_loc + 1)        
        # Format dates as timestamps, necessary for candlestick graph
        dates = np.array([date2num(x)
                          for x in self.df['Date'].values[step_range]])

        self._render_net_worth(step_loc, net_worth, step_range, dates)
        self._render_price(step_loc, net_worth, dates, step_range)
        self._render_volume(step_loc, net_worth, dates, step_range)
        self._render_trades(step_loc, trades, step_range)

        # Format the date ticks to be more easily read
        self.price_ax.set_xticklabels(self.df['Date'].values[step_range], rotation=45,
                                      horizontalalignment='right')

        # Hide duplicate net worth date labels
        plt.setp(self.net_worth_ax.get_xticklabels(), visible=False)

        # Necessary to view frames before they are unrendered
        plt.pause(0.001)

    def close(self):
        plt.close()


In [15]:
#Initial Parameters for Environment
MAXIMUM_ACC_BALANCE = 4163954875
MAXIMUM_SHARE_NUM = 4163954875
MAXIMUM_PRICE_SHARE = 15000
MAXIMUM_OPEN_POS = 5
MAXIMUM_NUM_STEPS = 20000

ACCOUNT_BALANCE_INITIAL = 150000

PAST_FRAME_SIZE = 10
class stock_trading_env(gym.Env):
    metadf = {'render.modes': ['live']}
    visualization = None

    def __init__(self, df):
        super(stock_trading_env, self).__init__()

        self.df = self.adjusted_df(df)
        self.reward_range = (0, MAXIMUM_ACC_BALANCE)

        # Actions of the format Buy, Sell, Hold, the shares.
        self.action_space = spaces.Box(
            low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16)

        # Prices contains the Open, High, Close, Low values for given past days
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(5, PAST_FRAME_SIZE + 2), dtype=np.float16)

    def adjusted_df(self, df):
        adjusted_ratio = df['Adj Close'] / df['Close']
        df['Open'] = df['Open'] *adjusted_ratio
        df['High'] = df['High'] *adjusted_ratio
        df['Low'] = df['Low'] *adjusted_ratio
        df['Close'] = df['Close'] *adjusted_ratio

        return df

    def _next_observation(self):
        frame = np.zeros((5, PAST_FRAME_SIZE + 1))

        # Get the stock df points for the last 5 days and scale to between 0-1
        np.put(frame, [0, 4], [
            self.df.loc[self.current_step_locator: self.current_step_locator +
                        PAST_FRAME_SIZE, 'Open'].values / MAXIMUM_PRICE_SHARE,
            self.df.loc[self.current_step_locator: self.current_step_locator +
                        PAST_FRAME_SIZE, 'High'].values / MAXIMUM_PRICE_SHARE,
            self.df.loc[self.current_step_locator: self.current_step_locator +
                        PAST_FRAME_SIZE, 'Low'].values / MAXIMUM_PRICE_SHARE,
            self.df.loc[self.current_step_locator: self.current_step_locator +
                        PAST_FRAME_SIZE, 'Close'].values / MAXIMUM_PRICE_SHARE,
            self.df.loc[self.current_step_locator: self.current_step_locator +
                        PAST_FRAME_SIZE, 'Volume'].values / MAXIMUM_SHARE_NUM,
        ])

        # Append additional df and scale each value to between 0-1 to make observation
        obs = np.append(frame, [
            [self.balance / MAXIMUM_ACC_BALANCE],
            [self.max_net_worth / MAXIMUM_ACC_BALANCE],
            [self.shares_held / MAXIMUM_SHARE_NUM],
            [self.purchase_analysis / MAXIMUM_PRICE_SHARE],
            [self.total_sales_value / (MAXIMUM_SHARE_NUM * MAXIMUM_PRICE_SHARE)],
        ], axis=1)

        return obs
    
    #This method is used to take action using trained model/agent.
    def _take_action(self, action):
        current_price = random.uniform(
            self.df.loc[self.current_step_locator, "Open"], self.df.loc[self.current_step_locator, "Close"])

        action_type = action[0]
        amount = action[1]

        if action_type < 1:
            # Buy amount % of balance in shares
            available_bal = int(self.balance / current_price)
            purchase_share = int(available_bal * amount)
            last_balance = self.purchase_analysis * self.shares_held
            after_purchase_balance = purchase_share * current_price

            self.balance -= after_purchase_balance
            self.purchase_analysis = (last_balance + after_purchase_balance) / (self.shares_held + purchase_share)
            self.shares_held += purchase_share

            if purchase_share > 0:
                self.trades.append({'step': self.current_step_locator,
                                    'shares': purchase_share, 'total': after_purchase_balance,
                                    'type': "buy"})

        elif action_type < 2:
            # Sell amount % of shares held
            shares_sold = int(self.shares_held * amount)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.number_share_sold += shares_sold
            self.total_sales_value += shares_sold * current_price

            if shares_sold > 0:
                self.trades.append({'step': self.current_step_locator,
                                    'shares': shares_sold, 'total': shares_sold * current_price,
                                    'type': "sell"})

        self.net_worth = self.balance + self.shares_held * current_price

        if self.net_worth > self.max_net_worth:
            self.max_net_worth = self.net_worth

        if self.shares_held == 0:
            self.purchase_analysis = 0

    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step_locator += 1

        delay_modifier = (self.current_step_locator / MAXIMUM_NUM_STEPS)

        reward = self.balance * delay_modifier + self.current_step_locator
        done = self.net_worth <= 0 or self.current_step_locator >= len(
            self.df.loc[:, 'Open'].values)

        obs = self._next_observation()

        return obs, reward, done, {}
    
    #Reset the environment to initial state
    def reset(self):
        # Reset the state of the environment to an initial state
        self.balance = ACCOUNT_BALANCE_INITIAL
        self.net_worth = ACCOUNT_BALANCE_INITIAL
        self.max_net_worth = ACCOUNT_BALANCE_INITIAL
        self.shares_held = 0
        self.purchase_analysis = 0
        self.number_share_sold = 0
        self.total_sales_value = 0
        self.current_step_locator = 0
        self.trades = []

        return self._next_observation()

    #Used to render df to display
    def render(self, mode='live', **kwargs):
        profit = self.net_worth - ACCOUNT_BALANCE_INITIAL

        print(f'Step: {self.current_step_locator}')
        print(f'Balance: {self.balance}')
        print(
            f'Shares held: {self.shares_held} (Total sold: {self.number_share_sold})')
        print(
            f'Avg cost for held shares: {self.purchase_analysis} (Total sales value: {self.total_sales_value})')
        print(
            f'Net worth: {self.net_worth} (Max net worth: {self.max_net_worth})')
        print(f'Profit: {profit}')
        
        #Call visualization class to create visualization
        if self.visualization == None:
            self.visualization = stock_trading_graph(self.df)

        if self.current_step_locator > PAST_FRAME_SIZE:
            self.visualization.render(self.current_step_locator, self.net_worth, self.trades, window_size=PAST_FRAME_SIZE)

    def close(self):
        if self.visualization != None:
            self.visualization.close()
            self.visualization = None

In [16]:
# The algorithms require a vectorized environment to run
vec_env = DummyVecEnv([lambda: stock_trading_env(df)])

gym_model = ACKTR(MlpPolicy, vec_env, verbose=1)
gym_model.learn(total_timesteps = 1000)

monitor = vec_env.reset()
for i in range(200):
    action, _states = gym_model.predict(monitor)
    obs, rewards, done, info = vec_env.step(action)
    vec_env.render()





















----------------------------------
| explained_variance | -4.77e-07 |
| fps                | 14        |
| nupdates           | 1         |
| policy_entropy     | 2.84      |
| policy_loss        | 0.238     |
| total_timesteps    | 20        |
| value_loss         | 5.3e+05   |
----------------------------------
Step: 1
Balance: 59497.628868095475
Shares held: 374 (Total sold: 0)
Avg cost for held shares: 241.98494955054687 (Total sales value: 0)
Net worth: 150000.0 (Max net worth: 150000)
Profit: 0.0
Step: 2
Balance: 59497.628868095475
Shares held: 374 (Total sold: 0)
Avg cost for held shares: 241.98494955054687 (Total sales value: 0)
Net worth: 149467.64578696125 (Max net worth: 150000)
Profit: -532.3542130387505
Step: 3
Balance: 59497.628868095475
Shares held: 374 (Total sold: 0)
Avg cost for held shares: 241.98494955054687 (Total sales value: 0)
Net worth: 149561.86464507322 (Max net worth: 150000)
Profit: -438.13535492678056
Step: 4
Balance: 59497.628868095475
Shares held: 374 (T

The strpdate2num class was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use time.strptime or dateutil.parser.parse or datestr2num instead.
  # Remove the CWD from sys.path while we load stuff.


Step: 12
Balance: 53045.328282372626
Shares held: 407 (Total sold: 211)
Avg cost for held shares: 236.83826833323496 (Total sales value: 49687.35127229291)
Net worth: 146784.69496304938 (Max net worth: 150000)
Profit: -3215.3050369506236
Step: 13
Balance: 45382.82211171537
Shares held: 441 (Total sold: 211)
Avg cost for held shares: 235.95392603692488 (Total sales value: 49687.35127229291)
Net worth: 144770.0345017109 (Max net worth: 150000)
Profit: -5229.965498289093
Step: 14
Balance: 97142.73832379103
Shares held: 215 (Total sold: 437)
Avg cost for held shares: 235.95392603692488 (Total sales value: 101447.26748436855)
Net worth: 146383.36657864176 (Max net worth: 150000)
Profit: -3616.633421358245
Step: 15
Balance: 97142.73832379103
Shares held: 215 (Total sold: 437)
Avg cost for held shares: 235.95392603692488 (Total sales value: 101447.26748436855)
Net worth: 146247.30463562912 (Max net worth: 150000)
Profit: -3752.695364370884
Step: 16
Balance: 142.9414535776741
Shares held: 633 