# RL Market Environment Inplementation
The main function of a RL environment is to provide the traning data, 4-elements tuple (s, a, r, s'), via interacting with agents.

A series of these tuples consist of training data for RL agent.

In [1]:
import sys
sys.path.append('/Users/cheng/Google Drive/PhD/Research/Portfolio Selection via TBN/codes/')
from module.backtesting import *
from module.environment_new import *
import gym

In [3]:
class market_environment(vectorized_backtesting):
    def __init__ (self, year_range):
        super().__init__()
        stock_num = self.stock_price.shape[1]
        self.action_space = gym.spaces.Box(low = 0.0, high = 1.0, shape=(1, ), dtype=np.float32)
        self.action_space_discrete = gym.spaces.Discrete(100)
        self.observation_space = gym.spaces.Box(low = -100, high = 100, shape=(1, stock_num + 1), dtype=np.float32)
        self.seed()
        #self.reset()
        self.done = False
        self.action = None
        self.reward_type = 'sharpe_ratio'
        self.year_range = year_range
        self.year = year_range[0]
    
    def seed(self):
        pass
    def reset(self):
        #self.year_range = year_range
        self.year = self.year_range[0]
        self.done = False
        self.action = None


    def step (self, action):
        '''
        Core function in environment. Take action as input, and respond to agent.
        Args:
            action: np.array
                    shrinkage intensity
        Returns:
            state: np.array
            reward: float
            done: Bool
        '''
        if self.done:
            # should never reach this point
            print("EPISODE DONE!!!")
        else:
            assert self.action_space.contains(action)
            self.action = action
            self.state = self.get_state(action)
            self.reward = self.get_reward()
            self.done = self.is_done()
            self.year += 1
        try:
            assert self.observation_space.contains(self.state)
        except AssertionError:
            print("INVALID STATE", self.state)
        return [self.state, self.reward, self.done]

    def get_state(self, action):
        '''
        Take agent's action and get back env's next state
        Args:
            action: a number (shrinkage intensity)
        Return:
            state - according to state mapping
        '''
        if not self.done:
            #self.action = action
            portfolio_mean_return = self.get_portfolio_mean_return(self.year, self.year)
            # portfolio_SR = self.get_sharpe_ratio()
            # portfolio_TO = self.get_turn_over_for_each_period()
            stocks_returns = self.get_stock_mean_returns(self.year)
            state = np.append(stocks_returns, portfolio_mean_return).reshape(1,-1)
            return state
        else:
            print('The end of period\n')
            # exit()
    
    def get_portfolio(self, year):
        covariance_shrunk = self.get_shrank_cov(covariance_matrix=self.covariance_aggregate.loc[year - 1].values,\
                                                shrink_target=np.identity(23),\
                                                a=self.action)
        portfolio = self.get_GMVP(covariance_matrix = covariance_shrunk)
        return portfolio
    
    def is_done(self):
        '''
        Check whether agent arrive to the endpoint of the epoc
        '''
        if self.year != self.year_range[-1]:
            self.done = False
        else:
            self.done = True
            
        return self.done
    
    def get_reward(self):
        '''
        map the reward_type to the reward function
        '''
        options = {#'excess_return' : self.excess_return,
                   #'log_return' : self.log_return,
                   #'moving_average' : self.moving_average,
                   'sharpe_ratio' : self.get_sharpe_ratio
                  }
        
        reward = options[self.reward_type]()# whether self?
        return reward

# Test

In [7]:
year_range = np.arange(2000, 2003)
env = market_environment(year_range)
#env.year = 2000
#env.action = 0.3

In [8]:
env.reset()

In [4]:
action = np.array([1])
env.step(action)

[array([[-0.77891805,  0.27538629,  0.09018561,  0.55473796,  0.09526784,
         -0.11049116,  0.02016256,  0.10128134, -0.22888538, -0.12259082,
          0.17466226,  0.13547645, -0.0910565 ,  0.27295561,  0.40582724,
         -0.82923405,  0.27705591, -0.19094159,  0.56494237,  0.92530383,
         -0.11271183,  0.44684607, -0.13042818,  0.07586234]]),
 0.09188408504593096,
 False]