In [6]:
import os
import time
import numpy as np
import matplotlib.pyplot as plt
from collections import deque
import gym
import pandas as pd
from pathlib import Path
import sys
import random
from joblib import dump,load
import datetime
import pandas_datareader.data as wb

In [96]:
class PortfolioBuffer():
    def __init__(self,assets_names_list,assets_data_list,window):
        self.names = {0:'CASH'}
        for index,value in enumerate(assets_data_list):
            self.names[index+1] = value
        self.shape = assets_data_list[0].shape
        for i in assets_data_list:
            if self.shape != i.shape:
                raise Exception('Data must be of the same size')
        if len(assets_data_list) != len(assets_names_list):
            raise Exception('The length of assets_names_list is different than the amount of assets in assets_data_list')
        self.data = np.array([np.ones(shape=self.shape)] + assets_data_list)
        self.shape = self.data.shape
        self.pointer = window
        self.window = window
        self.batch_cache = None
        self.length = self.shape[1]
    
    def get_batch(self):
        if not self.batch_cache:
            batch = np.zeros(shape=(self.shape[0],self.window,self.shape[2]))
            for index,data in enumerate(self.data):
                batch[index] = data[self.pointer-self.window:self.pointer]/data[self.pointer-1][0]
            self.batch_cache = batch
        return self.batch_cache
    
    def get_next_batch(self):
        self.pointer += 1
        self.batch_cache = None
        return self.get_batch()
    
    def get_current_price(self,index):
        return self.data[index][pointer-1][0]
    
    def reset(self):
        self.pointer = self.window
    
    
        

In [87]:
google = pd.read_csv('GOOG.csv')[['Adj Close','High','Low']]
xom = pd.read_csv('XOM.csv')[['Adj Close','High','Low']]
assets_data_list = [google.to_numpy(),xom.to_numpy(),xom.to_numpy()]
data = np.array(assets_data_list)
data.shape


(3, 58, 3)

In [None]:
class PortfolioEnvironment(gym.Env):
    def __init__(self,assets_names_list,assets_data_list,fee,initial_capital=100000,look_back_window=50,max_steps=200):
        super(PortfolioEnvironment,self).__init__()
        '''
        assets_names_list: list with the ticker of each security
        assets_data_list: list of pandas dataframes with the data of each security, must have the same length as assets_names_list and the first column of each dataframe must have the price of the asset
        fee: porcentage of operating fee, with decimal, ie 0.1 is equal to 10% fee
        initial_capital: amount of cash at the beginning
        look_back_window: amount of periods to look back while executing a step
        steps: maximum number of possible steps
        '''
        self.buffer = PortfolioBuffer(assets_names_list,np.array(list(map(lambda x: x.to_numpy(),assets_data_list))),look_back_window)
        self.fee = fee
        self.f = buffer.shape[2]
        self.n = look_back_window
        self.m = buffer.shape[0]
        self.max_steps = max_steps
        self.current_steps = 0
        self.initial_capital = initial_capital
        
        self.action_space = gym.spaces.Box(low=0.0,high=2.0,shape=(self.m,),dtype=np.float16)
        self.observation_space = gym.spaces.Box(low=0,high=1,shape=(self.f,self.n,self.m),dtype=np.float16)
        
        #self.weights = np.resize(np.array([1.0]+[0.0]*(self.m-1)),(self.n,self.m))
        self.weights = np.array([1.0]+[0.0]*(self.m-1))
        self.portfolio_value = 1.0
        
    def _buy(self,index,price,amount):
        raise NotImplementedError

    def _sell(self,index,price,amount):
        raise NotImplementedError
        
    def _price_relative_vector(self):
        '''
        returns a matrix with the division of each assets value by the previous one
        '''
        #Toma el tensor X que tiene los precios, agarra solo el precio de cierre y calcula las diferencias
        #Se seleccionan todos, los activos en todos los periodos pero solo la columna 0 que corresponde al precio de cierre
        prices = self.buffer.get_batch()[:,:,0].T
        prices_diff = prices[:-1]/prices[1:]
        return prices_diff
        
    def _weights_at_end_of_period(self):
        '''
        returns a vector with the weights of the portfolio after the new prices but before taking any action
        '''
        #Se toma el ultimo cambio de precios conocido yt
        y = self._price_relative_vector()[-1]
        return np.multiply(y,self.weights)/np.dot(y,self.weights)
    
    def _operation_cost(self,weights):
        '''
        weights: vector with the new weights provided by the actor
        returns a scalar value with the cost of doing the buy/sell operations needed to get to those weights
        '''
        w_prime = self._weights_at_end_of_period()[1:]
        return self.fee * np.sum(np.abs(weights[1:]-w_prime))
    
    def _portfolio_value_after_operation(self,weights):
        '''
        weights: vector with the new weights provided by the actor
        returns a scalar with the new value of the portfolio after doing the buy/sell operations needed to get to those weights
        '''
        c = self._operation_cost(weights)
        p0 = self.portfolio_value
        y = self._price_relative_vector()[-1]
        w = self.weights
        return p0 * (1 - c) * np.dot(y, w)
    
    def _portfolio_return_rate(self):
        mu = self._transaction_remainder_factor()
        y = self._price_relative_vector()[-1]
        w = self.weights
        return np.dot(mu*y,w) - 1
    
    def _portfolio_log_return_rate(self):
        mu = self._transaction_remainder_factor()
        y = self._price_relative_vector()[-1]
        w = self.weights
        return np.log(np.dot(mu*y,w))

        
    def _next_observation(self):
        obs = self.buffer.get_next_batch()
        return obs
            

    def _calculate_reward(self):
        reward = -1
        return reward
    
    def step(self, action):
        
        p1 = self._portfolio_value_after_operation(action)
        self.weights = action
        
        
        reward = self._calculate_reward()
        
        self.portfolio_value = p1
        done = 0 if self.buffer.length > self.current_step and self.current_step<self.max_steps else 1 
        info = {}
        self.current_step += 1
        obs = self._next_observation()
        
        return obs, reward, done, info
    
    def reset(self):
        self.weights = np.array([1.0]+[0.0]*(self.m-1))
        self.portfolio_value = 1.0
        self.buffer.reset()
    
    def render(self):
        pass
        
        