In [60]:
import sys 
sys.path.append("../")

import pandas as pd
import numpy as np
from IPython.display import display

#from tensorflow.keras.models import Model, Sequential
#from tensorflow.keras.layers import Dense, CuDNNLSTM, Activation, Dropout
#from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import MDS, TSNE

from pgtaa.config import *
from pgtaa.core.utils import *

In [None]:
data_gen = TimeseriesGenerator(data, targets,
                               length=10, sampling_rate=2,
                               batch_size=2)

In [2]:
df = pd.read_csv(TRAIN_CSV, index_col=0, parse_dates=True)
df.iloc[:, :8] = df.iloc[:,:8].pct_change(1)
df.dropna(inplace=True)
df.head(3)

Unnamed: 0,iShares Russell 1000 Value ETF (IWD),iShares Russell 1000 Growth ETF (IWF),iShares Russell 2000 Growth ETF (IWO),iShares Russell 2000 Value ETF (IWN),iShares MSCI EAFE ETF (EFA),iShares TIPS Bond ETF (TIP),SPDR Gold Trust (GLD),Vanguard REIT ETF (VNQ),USD/EUR,CNY/USD,...,Civilian Unemployment Ratet,Consumer Price Index for All Urban Consumers: All Items,Industrial Production Index,Federal Debt: Total Public Debt as Percent of Gross Domestic Product,Personal Consumption Expenditures,Effective Federal Funds Rate,Primary Credit Rate,Interest Rates Discount Rate for United States,10-Year Treasury Constant Maturity Rate,All Employees: Total Nonfarm Payrolls
2005-01-10,0.004013,0.00625,0.009982,0.01,0.004509,0.000668,0.002629,-0.003177,1.3109,8.2765,...,5.327273,191.818182,98.971655,60.919206,8536.6602,2.34,3.25,3.318182,4.29,132864.545455
2005-01-11,-0.006302,-0.006832,-0.011296,-0.009085,-0.001923,0.000953,0.006198,-0.010124,1.3161,8.2765,...,5.331818,191.854545,99.001864,60.915506,8538.9444,2.35,3.25,3.329545,4.26,132875.636364
2005-01-12,0.00232,0.004378,0.001428,0.001921,0.008031,0.003523,0.00924,-0.004734,1.3281,8.2765,...,5.336364,191.890909,99.032073,60.911805,8541.2286,2.36,3.25,3.340909,4.25,132886.727273


In [3]:
train = StandardScaler().fit_transform(df.values)

In [4]:
pca = PCA(n_components=1)
pcc = pca.fit_transform(np.cov(train[:,:8].T))
pcc

array([[-0.70890834],
       [-0.69316197],
       [-0.66428075],
       [-0.69967173],
       [-0.54147367],
       [ 2.2091984 ],
       [ 1.53419518],
       [-0.43589713]])

In [5]:
t = df.iloc[:,:8]

In [6]:
class PortfolioEnv(object):
    """
    Creates a portfolio environment, similar to the TensorForce or Gym environment:
    Gym: https://github.com/openai/gym/blob/522c2c532293399920743265d9bc761ed18eadb3/gym/core.py
    TensorForce: https://github.com/reinforceio/tensorforce/blob/master/tensorforce/environments/environment.py
    """
    def __init__(
            self,
            data: np.ndarray,
            assets: list,
            nb_assets: int=10,
            horizon: int=20,
            action_space: str='unbounded',
            window_size: int=100,
            portfolio_value: float=1000.0,
            risk_aversion: float=1.0,
            num_actions: int=11,
            cost_buying: float=0.025,
            cost_selling: float=0.025,
            cost_fix: float=0.0,
            predictor=None,
            optimized: bool=True,
            action_type: str='signal_softmax',
            scaler=None,
            standardize: bool=True,
            action_bounds: tuple=(0.0, 1.0),
            discrete_states: bool=False,
            state_labels: tuple=(50,),
            episodes: int=100,
            epochs: int=200,
            random_starts: bool=True,
    ):

        """
        Args:
            :param data: (object) environment data
            :param assets: (list) asset names
            :param horizon: (int) investment horizon -> max episode time steps
            :param window_size: (int) sequence length of the data used for doing prediction
                                and estimating variance, covariance etc
            :param portfolio_value: (int or float) initial portfolio value
            :param risk_aversion: (int or float) constant risk aversion of investor/agent
            :param cost_selling, cost_buying: (float) relative cost of selling and buying assets
            :param cost_fix: (float) costs for being allowed to trade on each time step
            :param predictor: (str) h5 file with prediction model (hyper-)parameters
            :param optimized: (bool) for using either optimized or naive weights
            :param action_type: (str) how to change weights based on actions:
                                'signal', 'signal_softmax', 'direct', 'direct_softmax', 'clipping'
            :param action_space: (str) specifies action space -> 'bounded', 'unbounded' or 'discrete'
            :param num_actions: (int) number of possible action values for each action
                                if action space is discrete
            :param scaler: (object) scaler object
            :param standardize: (bool) use normalization or standardization for state scaling
            :param action_bounds: (tuple) upper and lower bound for continuous actions
            :param discrete_states: (bool) true for discrete state space
            :param state_labels: (tuple) number of labels for state discretization per state row
        """
        # build logger
        #self.logger = get_logger(filename='tmp/env.log', logger_name='EnvLogger')
        self.data = data
        self.horizon = horizon
        self.window_size = window_size
        self.risk_aversion = risk_aversion
        self.init_portfolio_value = portfolio_value
        self.optimized = optimized
        self.action_type = action_type
        self.action_space = action_space
        self.num_actions = num_actions
        self.action_bounds = action_bounds
        self.discrete_states = discrete_states
        self.state_labels = state_labels
        self.standardize = standardize,
        self.episodes = episodes
        self.epochs = epochs

        self.step = 0
        self.episode = 0

        try:
            if assets is None:
                self.asset_names = ['A' + str(i + 1) for i in range(nb_assets)]
            else:
                self.asset_names = assets
            if nb_assets is None:
                self.nb_assets = len(self.asset_names)
            else:
                self.nb_assets = nb_assets
            if len(self.asset_names) != nb_assets:
                self.asset_names = ['A' + str(i + 1) for i in range(nb_assets)]
        except Exception as e:
            self.logger.error(e)

        # build portfolio object
        self.portfolio = Portfolio(
            portfolio_value=self.init_portfolio_value,
            risk_aversion=risk_aversion,
            fix_cost=cost_fix,
            cost_selling=cost_selling,
            cost_buying=cost_buying,
            action_type=action_type)

        # build data object
        self.data_env = DataEnv(
            data,
            assets,
            horizon=horizon,
            window_size=window_size,
            scaler=scaler,
            predictor=predictor,
            standardize=standardize,
            random_starts=random_starts,
            episodes=episodes,
            epochs=epochs
        )

        # reset epoch
        self.reset_epoch()

    def __str__(self):
        return str(self.__class__.__name__)

    def _step(self, action):

        # update current portfolio based on agent action
        new_weights, cost, portfolio_value = self.portfolio.update(action)
        self.episode_costs += cost

        # see PortfolioEnv.execute() for explanation
        info = self.data_env.get_window(episode_step=self.step)
        self._update(info)

        # make a step forward
        reward, weights, new_portfolio_value = self.portfolio.get_next_step(self.asset_returns[-1], self.covariance)

        self.episode_reward += reward

        info = portfolio_info(weights=new_weights,
                              old_weights=self.weights,
                              new_weights=weights,
                              init_weights=self.init_weights,
                              asset_returns=self.asset_returns[-1],
                              predictions=self.prediction,
                              portfolio_value=portfolio_value,
                              new_portfolio_value=new_portfolio_value,
                              old_portfolio_value=self.portfolio_value,
                              portfolio_return=new_portfolio_value / portfolio_value - 1,
                              portfolio_variance=self.portfolio.variance,
                              sharpe_ratio=self.portfolio.sharpe,
                              transaction_costs=cost)

        # update portfolio values
        self.weights = new_weights
        self.portfolio_value = new_portfolio_value
        self.portfolio_variance = self.portfolio.variance

        # true if episode has finished
        done = bool(self.step >= self.horizon)

        # update state
        self.state = np.concatenate((np.reshape(self.weights, (1, self.nb_assets)), self.__state), axis=0)

        # discretize the state array if selected
        if self.discrete_states:
            state = self._state_discretization()
        else:
            state = self.state

        # flatten the state array and reduce state size -> returns 1d array
        state = get_flatten(state)

        self.step += 1

        # should not be happening when using a runner
        if self.step > self.horizon:
            self.reset()

        return env_step(state, reward, done, info=info)

    def _reset(self):

        # is being called after finishing an episode -> get a new episode
        self.episode += 1
        self.step = 0
        self.episode_costs = 0
        self.episode_reward = 0

        # get new episode start -> get new data observations
        self.data_env.reset()
        info = self.data_env.get_window()

        # update some variables for easier access
        self._update(info)

        # get a new episode start
        self.entry_point = self.data_env.episode_start

        self.logger.debug(f'{20 * "#"} Episode {self.episode} {50 * "#"}')
        self.logger.debug(f'Episode entry point:{self.entry_point}')

        # estimate initial weights for the assets (optimized or naive)
        self.init_weights = self._get_init_weights(self.asset_returns, self.covariance, optimized=self.optimized)
        self.weights = self.init_weights

        # reset the portfolio
        self.portfolio.reset(covariance=self.covariance, weights=self.weights)
        self.portfolio_value = self.portfolio.init_portfolio_value
        self.portfolio_variance = self.portfolio.variance

        self.logger.debug(f'Initial weights for episode {self.episode}:\n{self.weights}')

        # returns the initial episode environment state
        self.state = np.concatenate((np.reshape(self.weights, (1, self.nb_assets)), self.__state), axis=0)

        # discretize the state array if selected
        if self.discrete_states:
            state = self._state_discretization()
        else:
            state = self.state

        # flatten the array -> returns a 1d array
        state = get_flatten(state)
        return state

    def _get_init_weights(self, asset_returns, covariance, optimized=True):
        """
        Args:
            :param asset_returns: (object) past asset returns
            :param covariance: (object) covariance matrix
            :param optimized: (bool) for doing optimization
        :return: semi-optimized weights based on past mean variance or naive 1/n weights
        """
        return WeightOptimize(asset_returns, covariance, nb_assets=self.nb_assets,
                              risk_aversion=self.risk_aversion).optimize_weights(method='SLSQP') \
            if optimized else [1/self.nb_assets for _ in range(self.nb_assets)]

    def _update(self, info):
        self.__state = info.state
        self.window = info.window
        self.asset_mean = info.mean
        self.asset_variance = info.variance
        self.asset_returns = info.asset_returns
        self.correlation = info.correlation
        self.covariance = info.covariance
        self.prediction = info.prediction

    def close(self):
        """
        Close environment. No other method calls possible afterwards.
        Currently not implemented.
        """
        pass

    def reset_epoch(self):
        # is called at the start for each new epoch
        self.episode = 0
        return self._reset()

    def reset(self):
        # is called after an episode has finished
        self.logger.debug(f'Episode {self.episode} has finished.'
                          f'\nCumulative Reward:{self.episode_reward}'
                          f'\nPortfolio Value: {self.portfolio_value}'
                          f'\nPortfolio Return: {self.portfolio.portfolio_return}'
                          f'\nPortfolio Variance: {self.portfolio.variance}'
                          f'\nSharpe Ration: {self.portfolio.sharpe}\n')
        return self._reset()

    def execute(self, action):
        """
        Args:
            :param action: (list) agent actions to execute
        """
        return self._step(action)

    def seed(self, seed=None):
        return np.random.seed(seed)

    def _state_discretization(self, nb_labels=(50,)):
        if len(nb_labels) == 1:
            nb_labels = [nb_labels[0] for _ in range(self.state.shape[0])]

        # quantil based state discretization
        # TODO: some issues with tensorforce agent NNs (expects float32 gets int32...), maybe wait for an update
        _state = np.array([pd.qcut(self.state[i], nb_labels[i], labels=False, duplicates='drop')
                           for i in range(self.state.shape[0])], dtype='int32')
        return _state

    def env_spec(self):
        # returns a dict of environment configurations
        return dict(
            data_shape=self.data.shape,
            epochs=self.epochs,
            episodes=self.episodes,
            horizon=self.horizon,
            window_size=self.window_size,
            portfolio_value=self.init_portfolio_value,
            risk_aversion=self.risk_aversion,
            optimized_weights=self.optimized,
            action_type=self.action_type,
            action_space=self.action_space,
            discrete_states=self.discrete_states,
            standardize=self.standardize,
            num_actions=self.num_actions
        )

    @property
    def states(self):
        """
        Return the state space.
        Returns: dict of state properties (shape and type).
        => weight + scaled mean + scaled variance + scaled predictions + correlation
        """

        if self.discrete_states:
            # returns discrete state space shape
            return dict(shape=(0.5 * self.nb_assets * (self.nb_assets + 7),), type='int')
        else:
            # returns continuous state space shape
            return dict(shape=(0.5 * self.nb_assets * (self.nb_assets + 7),), type='float')

    @property
    def actions(self):
        """
        Return the action space.
        Returns: dict of action properties (continuous, number of actions)
        """
        # discrete action space -> categorical distribution
        if self.action_space == 'discrete':
            return dict(shape=(self.nb_assets,), num_actions=self.num_actions, type='int')

        # continuous action space with upper and lower bounds -> beta distribution
        elif self.action_space == 'bounded':
            return dict(shape=(self.nb_assets,), type='float',
                        min_value=self.action_bounds[0], max_value=self.action_bounds[1])

        # unbounded continuous action space (default) -> gaussian distribution
        else:
            return dict(shape=(self.nb_assets,), type='float')


class Env(object):

    def _step(self, action):

        # update current portfolio based on agent action
        new_weights, cost, portfolio_value = self.portfolio.update(action)
        benchmark_weights = self.weigths
        self.episode_costs += cost

        # see PortfolioEnv.execute() for explanation
        info = self.data_env.get_window(episode_step=self.step)
        self._update(info)

        # make a step forward
        reward, weights, new_portfolio_value = self.portfolio.get_next_step(self.asset_returns[-1], self.covariance)

        self.episode_reward += reward

        info = portfolio_info(weights=new_weights,
                              old_weights=self.weights,
                              new_weights=weights,
                              init_weights=self.init_weights,
                              asset_returns=self.asset_returns[-1],
                              predictions=self.prediction,
                              portfolio_value=portfolio_value,
                              new_portfolio_value=new_portfolio_value,
                              old_portfolio_value=self.portfolio_value,
                              portfolio_return=new_portfolio_value / portfolio_value - 1,
                              portfolio_variance=self.portfolio.variance,
                              sharpe_ratio=self.portfolio.sharpe,
                              transaction_costs=cost)

        # update portfolio values
        self.weights = new_weights
        self.portfolio_value = new_portfolio_value
        self.portfolio_variance = self.portfolio.variance

        # update state
        self.state = np.concatenate((np.reshape(self.weights, (1, self.nb_assets)), self.__state), axis=0)

        # flatten the state array and reduce state size -> returns 1d array
        state = get_flatten(state)

        # estimate benchmark = buy and hold value
        benchmark = pass

        self.step += 1

        # should not be happening when using a runner
        if self.step > self.horizon:
            self.reset()

        return env_step(state, reward, benchmark, info=info)

    def execute(self, action):
        """
        Args:
            :param action: (list) agent actions to execute
        """
        return self._step(action)


    @property
    def action(self):
        return self.nb_assets

    @property
    def state(self):
        # => weight + scaled mean + scaled variance + scaled predictions + correlation
        return 0.5 * self.nb_assets * (self.nb_assets + 7)

SyntaxError: invalid syntax (<ipython-input-6-fccf455c3824>, line 385)

In [109]:
class Env:
    def __init__(self, data: np.ndarray, seed: int):
        self.data = data
        np.random.seed(seed)
        
    def observation(self):
        NotImplementedError
        
    def step(self, action: np.ndarray):
        reward, info = self._step(action)
        return reward, info
    
    def reset(self):
        return self._reset()
    
    def _step(self, action):
        NotImplementedError
    
    def _reset(self):
        NotImplementedError
        
    def __str__(self):
        return str(self.__class__.__name__)
    
    def __len__(self):
        return len(self.data)
    
    @property
    def action_space(self):
        NotImplementedError
    
    @property
    def state_space(self):
        NotImplementedError
        

class PortfolioEnv(Env):
    def __init__(
        self, 
        data: np.ndarray,
        nb_assets: int = 8,
        episodes: int = 100,
        horizon: int = 30,
        window_size: int = 100,
        portfolio_value: float = 1000.,
        risk_aversion: float = 1.,
        costs: float = 0.025,
        seed: int = 42
    ):
        super(PortfolioEnv, self).__init__(data, seed)
        self.nb_assets = nb_assets
        #self.horizon = horizon
        #self.window_size = window_size
        self.portfolio_value = portfolio_value
        self.risk_aversion = risk_aversion
        self.costs = costs
        self.dl = DataLoader(data, nb_assets, episodes, horizon, window_size)
        
    def observation(self):
        weights, var_covar, returns, mean, areturn = ()
        
    @classmethod
    def from_config_spec(cls, data, mode="train"):
        import pgtaa.config as cfg  
        if mode == "train":
            episodes = cfg.TRAIN_EPISODES
        else:
            episodes = cfg.TEST_EPISODES
        return cls(data, cfg.NB_ASSETS, episodes, cfg.HORIZON, cfg.WINDOW_SIZE, 
                   cfg.PORTFOLIO_INIT_VALUE, cfg.RISK_AVERSION, cfg.COSTS, cfg.SEED)
        
    @property
    def action_space(self):
        return self.nb_assets,
    
    @property
    def state_space(self):
        return int(0.5 * self.nb_assets * (self.nb_assets + 7)),
    
    
class DataLoader:
    def __init__(self, 
                 data: np.ndarray, 
                 nb_assets: int, 
                 episodes: int,
                 horizon: int, 
                 window_size: int
                ):
        
        self.data = data
        self.horizon = horizon
        self.nb_assets =nb_assets
        self.episodes = episodes
        self.window_size = window_size
        
    def init_batches():
        
    def get_batch(self):
        print(self.episodes)

In [110]:
data = read_data(TRAIN_CSV)
p = PortfolioEnv.from_config_spec(data)

The size of the state space can be calculated by $\frac{1}{2}n(n+7)$.