<a href="https://colab.research.google.com/github/HEYanTao/AssetLocationSimulation/blob/main/AssetLocationTestEnv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!apt-get update && apt-get install ffmpeg freeglut3-dev xvfb  # For visualization
!pip install "stable-baselines3[extra]>=2.0.0a4"

In [None]:
import stable_baselines3
stable_baselines3.__version__
import gym
import json
import datetime as dt
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import PPO
import numpy as np
import pandas as pd
from gym import spaces
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
import random
class AssetLocationEnvironment(gym.Env):
    """A stock trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human']} ## This means the print out will be in a fashion for human read

    def __init__(self,reward_type='sharpe'): #Choose between goal and sharpe
        super(AssetAllocationEnvironment2, self).__init__()

        self.market_cycle = random.randint(0, 3) # current market cycle
        self.reward_range = (0, 100) #Doesn't seem necessary

        # initialize market behavior
        self.us_equity_mean = 0 # US equity mean return
        self.us_equity_std = 0 # US equity standard deviation
        self.euro_equity_mean = 0 # euro equity mean return
        self.euro_equity_std = 0 # euro equity standard deviation
        self.em_equity_mean = 0 # emerging markets mean return
        self.em_equity_std = 0 # emerging markets standard deviation
        self.treasury_bond_mean = 0 # treasury bond mean return
        self.treasury_bond_std = 0 # treasury bond standard deviation
        self.corporate_bond_mean = 0 # corporate bond mean return
        self.corporate_bond_std = 0 # corporate bond standard deviation
        self.gold_mean = 0 # gold mean return
        self.gold_std = 0 # gold standard deviation
        self.state1_cash_mean = 0.005 # cash has 0.5% mean return
        self.state1_cash_sd = 0.0 # cash has 0% standard deviation

        # set mean and std of different assets based on  current market cycle
        self._get_market_behavior()

        self.annual_trading_days = 252 # 252 traading days per year
        self.current_step = np.random.normal(0,1,(252,7))# store the current step return matrix, random initialize
        self.current_step[:,0] = self.us_equity_mean/self.annual_trading_days + self.current_step[:,0] * (self.us_equity_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,1] = self.euro_equity_mean/self.annual_trading_days + self.current_step[:,1] * (self.euro_equity_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,2] = self.em_equity_mean/self.annual_trading_days + self.current_step[:,2] * (self.em_equity_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,3] = self.treasury_bond_mean/self.annual_trading_days + self.current_step[:,2] * (self.treasury_bond_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,4] = self.corporate_bond_mean/self.annual_trading_days + self.current_step[:,2] * (self.corporate_bond_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,5] = self.gold_mean/self.annual_trading_days + self.current_step[:,2] * (self.gold_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,6] = self.state1_cash_mean/self.annual_trading_days + self.current_step[:,2] * (self.state1_cash_sd/np.sqrt(self.annual_trading_days))

        self.age = 40 # start at 40 years old
        self.reward_type = reward_type #Choose between goal and sharpe
        self.goal = 10 #reach 99 MV or more
        self.balance = np.array([1.0],dtype = np.float64) #start with 1 in MV
        self.benchmark_balance = self.balance
        self.port_weights = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) # start with all cash portfolio

        # Actions of the format Buy x%, Sell x%, Hold, etc.
        self.action_space = gym.spaces.Box( #continous action space
          low=np.array([0, 0, 0, 0, 0, 0, 0]), high=np.array([1, 1, 1, 1, 1, 1, 1]), dtype=np.float16)
        #US equety Stock, euro equity stock, emergine market equity stock, treasury bond, corporate bond, Gold, Cash

        spaces = {
            'market': gym.spaces.Box(low=-1, high=1, shape=(252,7), dtype=np.float16), #252 trading days and 7 instruments. with daily returns
            'balance': gym.spaces.Box(low=0, high=100, shape=(1,), dtype=np.float64),
        }
        self.observation_space = gym.spaces.Dict(spaces)

    def _next_observation(self, observed_market_regime=None):
        # This is the function that generates the state output

        self._get_market_behavior(observed_market_regime) # determine market behavior

        return_sim = np.random.normal(0,1,(252,7))#Stock, Bond, Cash
        return_sim[:,0] = self.us_equity_mean/self.annual_trading_days + return_sim[:,0] * (self.us_equity_std/np.sqrt(self.annual_trading_days))
        return_sim[:,1] = self.euro_equity_mean/self.annual_trading_days + return_sim[:,1] * (self.euro_equity_std/np.sqrt(self.annual_trading_days))
        return_sim[:,2] = self.em_equity_mean/self.annual_trading_days + return_sim[:,2] * (self.em_equity_std/np.sqrt(self.annual_trading_days))
        return_sim[:,3] = self.treasury_bond_mean/self.annual_trading_days + return_sim[:,2] * (self.treasury_bond_std/np.sqrt(self.annual_trading_days))
        return_sim[:,4] = self.corporate_bond_mean/self.annual_trading_days + return_sim[:,2] * (self.corporate_bond_std/np.sqrt(self.annual_trading_days))
        return_sim[:,5] = self.gold_mean/self.annual_trading_days + return_sim[:,2] * (self.gold_std/np.sqrt(self.annual_trading_days))
        return_sim[:,6] = self.state1_cash_mean/self.annual_trading_days + return_sim[:,2] * (self.state1_cash_sd/np.sqrt(self.annual_trading_days))
        self.current_step = return_sim

        # Append additional data and scale each value to between 0-1
        obs = {
            'market':return_sim,
            'balance':self.balance
        }

        return obs

    def _take_action(self, action):
        # Update class variables based on the actions taken by agent
        self.port_weights = action #Action space is Portfolio allocation
        # If we consider transaction cost or tax, should implement here

    def _portfolio_softmax(self, x):
        # Softmax function to set the portfolio weights according to constraints
        if x.sum()<=0:
            x = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0])
        else:
            x = x / x.sum()

        return x

    def step(self, action):
        # Execute one time step within the environment
        benchmark_portfolio = np.array([0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.16])

        temp_old_market_cycle = self.market_cycle
        self._take_action(self._portfolio_softmax(action)) # allocate portfolio weights based on action by agent
        obs = self._next_observation() # determine next market state

        self.age += 1 #age increase

        temp_return = np.sum((self.current_step.sum(axis=0)+1)*self.port_weights)
        temp_benchmark_return = np.sum((self.current_step.sum(axis=0)+1)*benchmark_portfolio)
        self.balance = self.balance * temp_return  #market value update
        self.benchmark_balance = self.benchmark_balance * temp_benchmark_return
        # reward assignment
        if self.reward_type == 'goal':
            if self.age >= 80: #assume 80 year old is the end of the simulation, or can change to faterlity table
                done = True
                if self.balance >= self.goal: #only one type of reward for reaching the goal
                    reward = 100.0
                else:
                    reward = 0.0
            else:
                done = False
                if temp_return > 0: #remove, unnecessary
                    reward = 0.0#0.1 * temp_return
                else:
                    reward = 0.0#-0.1 * temp_return
        else:
            if self.age >= 80: #assume 80 year old is the end of the simulation, or can change to faterlity table
                done = True
            else:
                done = False
            #Beat a simple strategy
            temp_sd = np.std((self.current_step*self.port_weights).sum(axis=1) - (self.current_step*benchmark_portfolio).sum(axis=1))
            temp_rtn = np.mean((self.current_step*self.port_weights).sum(axis=1)-(self.current_step*benchmark_portfolio).sum(axis=1))
            if temp_sd > 0.0000001:
                temp_sharpe = temp_rtn / temp_sd
            else:
                temp_sharpe = 0.0 #all cash portfolio

            reward = temp_sharpe


        return obs, reward, done, {'balances':[self.balance,self.benchmark_balance],'portfolio':self.port_weights,'market_state':self.market_cycle,'last_market_state':temp_old_market_cycle}

    def reset(self):
        self.market_cycle = random.randint(0, 3) # current market cycle
        # Reset the state of the environment to an initial state
        self.current_step = np.random.normal(0,1,(252,7))# store the current step return matrix, random initialize
        self.current_step[:,0] = self.us_equity_mean/self.annual_trading_days + self.current_step[:,0] * (self.us_equity_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,1] = self.euro_equity_mean/self.annual_trading_days + self.current_step[:,1] * (self.euro_equity_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,2] = self.em_equity_mean/self.annual_trading_days + self.current_step[:,2] * (self.em_equity_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,3] = self.treasury_bond_mean/self.annual_trading_days + self.current_step[:,2] * (self.treasury_bond_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,4] = self.corporate_bond_mean/self.annual_trading_days + self.current_step[:,2] * (self.corporate_bond_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,5] = self.gold_mean/self.annual_trading_days + self.current_step[:,2] * (self.gold_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,6] = self.state1_cash_mean/self.annual_trading_days + self.current_step[:,2] * (self.state1_cash_sd/np.sqrt(self.annual_trading_days))

        self.age = 40 # start at 40 years old
        self.goal = 10 #reach 100 MV or more
        self.balance = np.array([1.0],dtype = np.float64) #start with 50 in MV
        self.benchmark_balance = self.balance
        self.port_weights = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) # start with all cash portfolio

        return self._next_observation()

    def render(self, mode='human', close=False):
        print(f'Current Step: {self.current_step.shape}')
        print(f'Balance: {self.balance}')
        print(f'Age: {self.age}')
        print(f'Position: {self.port_weights}')
        print(f'Market State: {self.market_cycle}')

    def custom_reset(self,observed_market_regime = 0):
        self.market_cycle = observed_market_regime
        # Reset the state of the environment to an initial state
        self.current_step = np.random.normal(0,1,(252,7))# store the current step return matrix, random initialize
        self.current_step[:,0] = self.us_equity_mean/self.annual_trading_days + self.current_step[:,0] * (self.us_equity_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,1] = self.euro_equity_mean/self.annual_trading_days + self.current_step[:,1] * (self.euro_equity_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,2] = self.em_equity_mean/self.annual_trading_days + self.current_step[:,2] * (self.em_equity_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,3] = self.treasury_bond_mean/self.annual_trading_days + self.current_step[:,2] * (self.treasury_bond_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,4] = self.corporate_bond_mean/self.annual_trading_days + self.current_step[:,2] * (self.corporate_bond_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,5] = self.gold_mean/self.annual_trading_days + self.current_step[:,2] * (self.gold_std/np.sqrt(self.annual_trading_days))
        self.current_step[:,6] = self.state1_cash_mean/self.annual_trading_days + self.current_step[:,2] * (self.state1_cash_sd/np.sqrt(self.annual_trading_days))

        self.age = 40 # start at 40 years old
        self.goal = 10 #reach 100 MV or more
        self.balance = np.array([1.0],dtype = np.float64) #start with 50 in MV
        self.benchmark_balance = self.balance
        self.port_weights = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) # start with all cash portfolio

        return self._next_observation(observed_market_regime)

    def _get_market_behavior(self, observed_market_regime = None):
        # market cycle info:
        #    0 = Recovery,
        #    1 = Development
        #    2 = Mature
        #    3 = Crisis

        if observed_market_regime is None:
            proba = random.randint(1, 10)
            # determine market state after probability of transition
            if self.market_cycle == 0:
                if proba > 4: # Transition to Development
                    self.market_cycle = 1
            elif self.market_cycle == 1:
                if proba <= 4: # Transition to Mature
                    self.market_cycle = 2
            elif self.market_cycle == 2:
                if proba <= 3: # Transition to Crisis
                    self.market_cycle = 3
            elif self.market_cycle == 3:
                if proba > 2: # Transition to recovery
                    self.market_cycle = 0
        else:
            self.market_cycle = observed_market_regime

        # determine market behavior
        if self.market_cycle == 0:
            self.us_equity_mean = 0.15
            self.us_equity_std = 0.20
            self.euro_equity_mean = 0.10
            self.euro_equity_std = 0.20
            self.em_equity_mean = 0.25
            self.em_equity_std = 0.30
            self.treasury_bond_mean = -0.01
            self.treasury_bond_std = 0.02
            self.corporate_bond_mean = 0.05
            self.corporate_bond_std = 0.07
            self.gold_mean = -0.10
            self.gold_std = 0.15
        elif self.market_cycle == 1:
            self.us_equity_mean = 0.12
            self.us_equity_std = 0.15
            self.euro_equity_mean = 0.08
            self.euro_equity_std = 0.12
            self.em_equity_mean = 0.20
            self.em_equity_std = 0.20
            self.treasury_bond_mean = 0.01
            self.treasury_bond_std = 0.02
            self.corporate_bond_mean = 0.03
            self.corporate_bond_std = 0.05
            self.gold_mean = 0.01
            self.gold_std = 0.02
        elif self.market_cycle == 2:
            self.us_equity_mean = 0.06
            self.us_equity_std = 0.10
            self.euro_equity_mean = 0.08
            self.euro_equity_std = 0.10
            self.em_equity_mean = 0.10
            self.em_equity_std = 0.20
            self.treasury_bond_mean = 0.01
            self.treasury_bond_std = 0.02
            self.corporate_bond_mean = 0.03
            self.corporate_bond_std = 0.06
            self.gold_mean = -0.02
            self.gold_std = 0.02
        elif self.market_cycle == 3:
            self.us_equity_mean = -0.20
            self.us_equity_std = 0.30
            self.euro_equity_mean = -0.20
            self.euro_equity_std = 0.30
            self.em_equity_mean = -0.40
            self.em_equity_std = 0.50
            self.treasury_bond_mean = 0.10
            self.treasury_bond_std = 0.05
            self.corporate_bond_mean = -0.10
            self.corporate_bond_std = 0.20
            self.gold_mean = 0.20
            self.gold_std = 0.15