In [1]:
import numpy as np
import pandas as pd
import pickle
import gym
from google.colab import files
import tensorflow as tf
from tensorflow import keras

# https://github.com/openai/gym/tree/master/gym/spaces

In [1]:
# files.upload()

In [3]:
Call_model = tf.keras.models.load_model('2.5Call_LR0.0001.h5')
Call_data = pd.read_csv("Call_data.csv")
SPY = pd.read_csv("SPY.csv")

In [2]:
# pickle_in = open("scalerX.pickle","rb")
# scalerX = pickle.load(pickle_in)
# pickle_in = open("scalery.pickle","rb")
# scalery = pickle.load(pickle_in)

In [5]:
class OptionsTradingEnv(gym.Env):
    """
    An Options trading environment for OpenAI gym
    """
    # - human: render to the current display or terminal and return nothing. 
    # Usually for human consumption.
    metadata = {'render.modes': ['human']}

    def __init__(self, df1, df2):
        super(OptionsTradingEnv, self).__init__()
        self.df = df1
        self.contract_days = len(df1['START_DATE'].unique())
        self.days = df2['Date'].values
        self.underlying_asset_price = df2['Close'].values

    # private method
    def _next_observation(self): 
        if self.current_step < len(self.days):
            # Get the Options chain 
            observation = self.df[self.df['START_DATE'] == self.days[self.current_step]]
            self.observation = observation

            return observation
        else:
            return

    def _take_action(self, action):
        # # action in dict type with keys buy and sell
        # contracts_to_buy = action['Buy']
        # contracts_to_sell = action['Sell']
        if len(action) != 0:
            for i in range(len(action)):
                # Assume the bought price is the ask price
                options_price = self.observation['ASK'].iloc[action[i]]
                if self.balance >= options_price:
                    contract = {
                        'START_DATE': self.observation['START_DATE'].iloc[action[i]],
                        'END_DATE': self.observation['END_DATE'].iloc[action[i]],
                        'SKRIKE': self.observation['SKRIKE'].iloc[action[i]],
                        'ASK': options_price,
                        'OPTIONS_VALUE': self.underlying_asset_price[self.current_step] - self.observation['SKRIKE'].iloc[action[i]]
                    }
                    self.Bought_contracts.append(contract)
                    self.balance -= options_price

    def step(self, action):
        number_of_contracts = len(self.Bought_contracts)
        contracts_to_sell = []
        if number_of_contracts > 0:
            for i in range(number_of_contracts):
                # Profit for call options
                profit = max(0, self.underlying_asset_price[self.current_step] - self.Bought_contracts[i]['SKRIKE'])       
                self.Bought_contracts[i]['OPTIONS_VALUE'] = profit
                if self.Bought_contracts[i]['END_DATE'] == self.days[self.current_step]:
                    # This is at the expiration date
                    self.balance += profit
                    # Delete the contract from the list
                    contracts_to_sell.append(i)
                elif profit - self.Bought_contracts[i]['ASK'] > 0:
                    # Exercise the contract with probability ACT_RATE
                    if np.random.binomial(n = 1, p = self.act_rate) == 1:
                        self.balance += profit
                        contracts_to_sell.append(i)
 
        # Delete all the exercised contracts
        self.Bought_contracts = [self.Bought_contracts[i] for i in range(number_of_contracts) if i not in contracts_to_sell]
        # Calculate the net worth
        self.net_worth = self.balance + np.sum([self.Bought_contracts[i]['OPTIONS_VALUE'] for i in range(len(self.Bought_contracts))])

        done = self.net_worth <= 0

        # Execute one time step within the environment
        self._take_action(action)
        self.current_step += 1

        if self.current_step >= self.contract_days and len(self.Bought_contracts) == 0:
            # print('We are done here')
            done = True
            return [], self.net_worth, done
        else:
            return self._next_observation(), self.net_worth, done

    def reset(self):
        # Reset the state of the environment to an initial state
        self.balance = INITIAL_ACCOUNT_BALANCE
        self.net_worth = INITIAL_ACCOUNT_BALANCE
        self.act_rate = ACT_RATE

        # Set the current step to 0
        self.current_step = 0
        self.Bought_contracts = []

        return self._next_observation()

    def render(self, mode = 'human', show = False):
        # Render the environment to the screen
        print('The current step is', self.current_step)
        print('Todays clos price is', self.underlying_asset_price[self.current_step])
        print('Hold contracts', self.Bought_contracts)
        print('The balance is', self.balance)
        print('The current net worth is', self.net_worth)
        print('-----------------------------------------------------------------------------------')

In [6]:
INITIAL_ACCOUNT_BALANCE = 1000
Features = ['UNDERLYING', 'SKRIKE', 'MATURITY', 'DELTA', 'BID', 'ASK', 'IMPLIED_VOL', 'LIQUIDITY', 'INTEREST_RATE']

profit_list = []
for ACT_RATE in np.arange(0.05, 1, 0.05):
    profit = []
    for _ in range(20):
        Env = OptionsTradingEnv(Call_data, SPY)
        cur_state = Env.reset()
        done = False
        while not done:
            # Env.render()
            if Env.current_step < Env.contract_days:
                # Predict the call options prices
                X = scalerX.transform(cur_state[Features].values)
                Options_price_pred = scalery.inverse_transform(Call_model.predict(X))
                # Buy undervalued call options
                price_diff = Options_price_pred.reshape(-1) - cur_state['ASK'].values
                if sum(price_diff > 0) > 5:
                    action = np.argsort(price_diff)[::-1][:5]
                else:
                    action = np.argsort(price_diff)[::-1][:sum(price_diff > 0)]
            else:
                action = []
            cur_state, NETWORTH, done = Env.step(action)
        profit.append(Env.net_worth - INITIAL_ACCOUNT_BALANCE)
    profit_list.append((np.mean(profit), np.std(profit)))

In [7]:
for i in range(len(np.arange(0.05, 1, 0.05))):
    print('When the exercise rate is {}, the mean profit is {}, with a std of {}.'.format(np.round(np.arange(0.05, 1, 0.05)[i], 2), 
                                                                                          np.round(profit_list[i][0], 2), 
                                                                                          np.round(profit_list[i][1], 2)))

When the exercise rate is 0.05, the mean profit is -9.08, with a std of 6.63.
When the exercise rate is 0.1, the mean profit is -15.26, with a std of 7.07.
When the exercise rate is 0.15, the mean profit is -18.94, with a std of 6.82.
When the exercise rate is 0.2, the mean profit is -23.98, with a std of 6.34.
When the exercise rate is 0.25, the mean profit is -26.97, with a std of 5.61.
When the exercise rate is 0.3, the mean profit is -32.24, with a std of 3.78.
When the exercise rate is 0.35, the mean profit is -35.47, with a std of 4.41.
When the exercise rate is 0.4, the mean profit is -37.01, with a std of 5.94.
When the exercise rate is 0.45, the mean profit is -37.91, with a std of 4.4.
When the exercise rate is 0.5, the mean profit is -39.37, with a std of 3.4.
When the exercise rate is 0.55, the mean profit is -40.03, with a std of 2.98.
When the exercise rate is 0.6, the mean profit is -40.79, with a std of 3.16.
When the exercise rate is 0.65, the mean profit is -42.38, wi

In [8]:
profit_list = []
for ACT_RATE in np.arange(0.05, 1, 0.05):
    profit = []
    for _ in range(20):
        Env = OptionsTradingEnv(Call_data, SPY)
        cur_state = Env.reset()
        done = False
        while not done:
            if Env.current_step < Env.contract_days:
                action = np.random.choice(np.arange(cur_state.shape[0]), size = 5, replace = False)
            else:
                action = []
            cur_state, NETWORTH, done = Env.step(action)
            # Env.render()
        profit.append(Env.net_worth - INITIAL_ACCOUNT_BALANCE)
    profit_list.append((np.mean(profit), np.std(profit)))
# profit_list

In [9]:
for i in range(len(np.arange(0.05, 1, 0.05))):
    print('When the exercise rate is {}, the mean profit is {}, with a std of {}.'.format(np.round(np.arange(0.05, 1, 0.05)[i], 2), 
                                                                                          np.round(profit_list[i][0], 2), 
                                                                                          np.round(profit_list[i][1], 2)))

When the exercise rate is 0.05, the mean profit is -477.12, with a std of 64.9.
When the exercise rate is 0.1, the mean profit is -459.02, with a std of 28.94.
When the exercise rate is 0.15, the mean profit is -478.82, with a std of 50.71.
When the exercise rate is 0.2, the mean profit is -465.51, with a std of 52.89.
When the exercise rate is 0.25, the mean profit is -478.32, with a std of 57.76.
When the exercise rate is 0.3, the mean profit is -479.64, with a std of 55.33.
When the exercise rate is 0.35, the mean profit is -486.89, with a std of 44.67.
When the exercise rate is 0.4, the mean profit is -461.39, with a std of 63.22.
When the exercise rate is 0.45, the mean profit is -471.95, with a std of 41.21.
When the exercise rate is 0.5, the mean profit is -466.5, with a std of 60.04.
When the exercise rate is 0.55, the mean profit is -474.95, with a std of 58.15.
When the exercise rate is 0.6, the mean profit is -496.55, with a std of 61.55.
When the exercise rate is 0.65, the 