In [None]:
from __future__ import absolute_import, print_function, division
import tflearn
import tensorflow as tf
import numpy as np
from pgportfolio.constants import *
import pgportfolio.learn.network as network

# 1. Trading Agent
Class to implement a policy gradient trading agent. This agent is in charge of finding a policy which maximizes the mean of the cumulative reward function of the portfolio. Since the agent is a NN, the CNN class defined in NeuralNetwor.ipynb is going to be instanciated in order to create the agent. Then, by calling the method _build_network, the output of the NN (weight vector $\vec{w}$ or action) is computed by the layers specified in a json file which is fedin the constructor of the agent.

This class computes the portfolio features and trains the NN based on those features.


## 1.1 Portolio features:
The portfolio features are the tensors that characterize the portfolio:

* Relative price tensor $Y_t$: Composed by the relative price vectors of the 3 features (future_price vector = closing price($v_t$)/opening price($v_{t-1}$)), where the shape is $[Bathces, f, m]$, where f are the features, and m the non cash assets.

* Relative price vector ($y_t$): Relative prices of the closing price (feature 0). Shape $[Batches, 1+m]$. It is a rank 2 tensor, and it can be seen as a vector (rank 1 tensor) for each batch ($n_b$ samples/periods).

* Future_weight_vec ($w'_t$): is the portfolio weight vector at the end of the trading period. It is given by:
$$w'_t = \frac{\vec{y}_t\vec{w}_{t-1}}{\sum_{i=1}^m y_{t,i}\cdot w_{t-1,i}};\; \mathrm{Shape}\; [Batches, 1+m]$$

* Logarithmic rate of return ($r_t$) or immediate reward: $\log{\mu_t y_t \cdot w_{t-1}}$. Shape $[Batches, 1]$

* Portfolio value vector (__pv_vector): Portfolio value for each batch (the value of the portfolio after computing the action with n_b samples)
    - $[Batch]$ rank 1 tensor (vector):  There is a value per batch.
    - Portfolio value ($P_f$): is the value of the portfolio anfter $\Delta t = t_f-t_0$ periods:
$$P_{t_f} = P_0 \exp \left( \sum _{t=1} ^{t_f + 1} r_t \right) = P_0 \prod _{t=1} ^{t_f+1} \mu_t \vec{y}_t \cdot \vec{w}_{t-1}; \; \mathrm{Shape}\; \mathrm{It\; is\; a\; scalar. Shape\; []}$$ 

* Cumulative reward function ($R$): is what is going to be maximize. It is given by the average of logarithmic cumulated return
$$R(s_1, a_1, \dots, s_{t_f}, a_{t_f}, s_{t_f+1}) = \frac{1}{t_f}\log \left(\frac{P_f}{P_0}\right) = \sum _{t=1}^{t_f+1}\log (\mu_t\vec{y}_t\cdot \vec{w}_{t-1}) = \frac{1}{t_f}\sum_{t=1}^{t_f+1}r_t; \; \mathrm{Shape}\; [Batches,1]$$


## 1.2 Training the trading agent aka NN

1. Creation of the agent object which is the instance of the CNN class. 
2. Fed the batch input tensor, the relative price tensor $Y_t$, the previous weight vector, and the number of batches ($N_b$) into the NN. 
3. The policy network will be trained against $N_b$ randomly chosen mini-batches from this set of n previous periods. Each mini-batch contains $n_b$ samples/periods of the data.
4. A batch starting with period tb $t_0 − n_b$ is picked with a geometrically distributed probability (ReplayBuffer.ipynb).
5.  It is important that prices inside a batch are in time-order.
6. __set_loss_function: Sets the loss function also called objective function. It is the function that the agent wants to minimize so as to update the parameters. This function is the -reward, cause the agent looks forward maximizing the reward.
7. init_train: Define which is going to be the optimizer used to minimize the loss (train_step.)
8. train: calls evaluate_tensors, checking that the tensors fed into the NN have no nan value, and trains the NN with the previously defined optimizer.
9. decide_by_history: Once the NN outputs the action, this function runs the computational graph defined in the constructor, returning each of its values (features of the portfolio) updated.


In [1]:
class DPG_LogReturn:
    def __init__(self, config, restore_dir=None, device="cpu"):
        '''
        From the CNN object (the class is defined inside Neural Network) outputs the weight vector or action vector.
        Calculates the return and the cumulated reward
        Defined loss functions based on this cumulated reward we want to maximize.
        The Net, after each batch computes the loss and updates parameters.
        config: network configuration
        '''
        self.__config = config  # Read the config file where training parameters are defined
        self.__asset_number = config["input"]["asset_number"]  # Number of assets to be traded (does not include cash)
        
        ## Create the CNN object in order to train the net. It outputs the weiht vector depending on config["layers"]
        self.__net = network.CNN(config["input"]["feature_number"],  # feature number (f)
                                 self.__asset_number,                # rows: number of non cash assets in the portfolio (m)
                                 config["input"]["window_size"],     # cols: number of previous periods FOR A SPECIFIC ASSET (n)
                                 config["layers"],                   # dict of layers
                                 device=device)
        
        ## Keep track of global steps during training
        self.__global_step = tf.Variable(0, trainable=False)
        self.__train_operation = None
        
        ## Compute portfolio features:
        
        # FUTURE RELATIVE PRICE RANK 3 TENSOR Y: tensor for the last period (t)
        self.__Y = tf.placeholder(tf.float32, shape=[None, self.__config["input"]["feature_number"], self.__asset_number])
        
        # FUTURE RELATIVE PRICERANK 2 TENSOR y: relative prices considering just the closing prices (feature 0) at t
        self.__y = tf.concat([tf.ones([self.__net.input_num, 1]), self.__Y[:, 0, :]], 1)
        
        # WEIGHT VECTOR: Computing w' at the end of each batch
        self.__future_weight_vec = (self.__y * self.__net.output) / tf.reduce_sum(self.__y * self.__net.output, axis=1)[:, None]
                    
        # tf.assert_equal(tf.reduce_sum(self.__future_weight_vec, axis=1), tf.constant(1.0))
        
        # COMISIONS:
        # At the end of period t, the agent has to reallocate w'_t (future_weight_vec) into w_t
        # The reallocation of the assets has a cost (transaction cost)
        self.__commission_ratio = self.__config["trading"]["trading_consumption"]
        
        # PORTFOLIO VALUE VECTOR:
        # __pv_vector: portfolio value for each batch (the value of the portfolio after computing the action with n_b samples)
        # __pure_pc: function defined below which returns mu such that w_t = w'_t*mu,  rank 0 tensor (scalar)
        # The operations are computed over all the periods in a batch 
        self.__pv_vector = tf.reduce_sum(self.__net.output * self.__y, reduction_indices=[1]) *\
                           (tf.concat([tf.ones(1), self.__pure_pc()], axis=0))
        
        # LOG MEAN FREE: cumulated return without considering transaction costs 
        # dot product of weigght vector of the batches and the future price of the batches
        # The dimension is reduced along the asset axis (1), obtaining one value for each batch
        self.__log_mean_free = tf.reduce_mean(tf.log(tf.reduce_sum(self.__net.output * self.__y, reduction_indices=[1])))
        
        # PORTFOLIO VALUE: 
        # __portfolio_value: result of multiplying all the elements of __pv_vector (values for each batch)
        # where each element is the portfolio value for a batch
        self.__portfolio_value = tf.reduce_prod(self.__pv_vector)
        self.__mean = tf.reduce_mean(self.__pv_vector)              # Mean of the portfolio value vector (through all the batches)
        self.__log_mean = tf.reduce_mean(tf.log(self.__pv_vector))  # Cumulated return (eq 22)
        
        ## Evaluate performance
        self.__standard_deviation = tf.sqrt(tf.reduce_mean((self.__pv_vector - self.__mean) ** 2))
        self.__sharp_ratio = (self.__mean - 1) / self.__standard_deviation
        
        ## Train the NN
        self.__loss = self.__set_loss_function()
        self.__train_operation = self.init_train(learning_rate=self.__config["training"]["learning_rate"],
                                                 decay_steps=self.__config["training"]["decay_steps"],
                                                 decay_rate=self.__config["training"]["decay_rate"],
                                                 training_method=self.__config["training"]["training_method"])
        self.__saver = tf.train.Saver()
        if restore_dir:
            self.__saver.restore(self.__net.session, restore_dir)
        else:
            self.__net.session.run(tf.global_variables_initializer())

    ## Getters 
    @property
    def session(self):
        return self.__net.session

    @property
    def pv_vector(self):
        return self.__pv_vector

    @property
    def standard_deviation(self):
        return self.__standard_deviation

    @property
    def portfolio_weights(self):
        return self.__net.output

    @property
    def sharp_ratio(self):
        return self.__sharp_ratio

    @property
    def log_mean(self):
        return self.__log_mean

    @property
    def log_mean_free(self):
        return self.__log_mean_free

    @property
    def portfolio_value(self):
        return self.__portfolio_value

    @property
    def loss(self):
        return self.__loss

    @property
    def layers_dict(self):
        return self.__net.layers_dict

    def recycle(self):
        tf.reset_default_graph()
        self.__net.session.close()

        
    # Define the loss function which is going to minimize the agent (so as to maximize the reward)
    def __set_loss_function(self):
        
        # Minimizes the minus the cumulated reward (maximizes the reward)
        def loss_function4():
            return -tf.reduce_mean(tf.log(tf.reduce_sum(self.__net.output[:] * self.__y,
                                                        reduction_indices=[1])))
        # Adds regularization
        def loss_function5():
            return -tf.reduce_mean(tf.log(tf.reduce_sum(self.__net.output * self.__y, reduction_indices=[1]))) + \
                   LAMBDA * tf.reduce_mean(tf.reduce_sum(-tf.log(1 + 1e-6 - self.__net.output), reduction_indices=[1]))

        # Minimizes minus the portfolio value (maximizes the portfolio value)
        def loss_function6():
            return -tf.reduce_mean(tf.log(self.pv_vector))

        # Adds regularization
        def loss_function7():
            return -tf.reduce_mean(tf.log(self.pv_vector)) + \
                   LAMBDA * tf.reduce_mean(tf.reduce_sum(-tf.log(1 + 1e-6 - self.__net.output), reduction_indices=[1]))

        # Considers the differences between previous weight vector and the computed one times comision ratio
        def with_last_w():
            return -tf.reduce_mean(tf.log(tf.reduce_sum(self.__net.output[:] * self.__y, reduction_indices=[1])
                                          -tf.reduce_sum(tf.abs(self.__net.output[:, 1:] - self.__net.previous_w)
                                                         *self.__commission_ratio, reduction_indices=[1])))

        loss_function = loss_function5
        if self.__config["training"]["loss_function"] == "loss_function4":
            loss_function = loss_function4
        elif self.__config["training"]["loss_function"] == "loss_function5":
            loss_function = loss_function5
        elif self.__config["training"]["loss_function"] == "loss_function6":
            loss_function = loss_function6
        elif self.__config["training"]["loss_function"] == "loss_function7":
            loss_function = loss_function7
        elif self.__config["training"]["loss_function"] == "loss_function8":
            loss_function = with_last_w

        loss_tensor = loss_function()
        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        if regularization_losses:
            for regularization_loss in regularization_losses:
                loss_tensor += regularization_loss
        return loss_tensor

    
    # Define the optimizer operation (train_step)
    def init_train(self, learning_rate, decay_steps, decay_rate, training_method):
        learning_rate = tf.train.exponential_decay(learning_rate, self.__global_step,
                                                   decay_steps, decay_rate, staircase=True)
        if training_method == 'GradientDescent':
            train_step = tf.train.GradientDescentOptimizer(learning_rate).\
                         minimize(self.__loss, global_step=self.__global_step)
        elif training_method == 'Adam':
            train_step = tf.train.AdamOptimizer(learning_rate).\
                         minimize(self.__loss, global_step=self.__global_step)
        elif training_method == 'RMSProp':
            train_step = tf.train.RMSPropOptimizer(learning_rate).\
                         minimize(self.__loss, global_step=self.__global_step)
        else:
            raise ValueError()
        return train_step

    # Train the NN using the previously defined train_operation
    def train(self, x, y, last_w, setw):
        tflearn.is_training(True, self.__net.session)
        self.evaluate_tensors(x, y, last_w, setw, [self.__train_operation])

        
    # Before feeding the tensor into the NN, checks that there is no nan values
    def evaluate_tensors(self, x, y, last_w, setw, tensors):
        """
        :param x: input tensor
        :param y: relative price rank 2 tensor
        :param last_w: previous weight to consider transaction costs
        :param setw: a function, pass the output w to it to fill the PVM
        :param tensors: other tensors so as to update its values when running the training (see TrainTrader.ipynb)
        :return: The result of runing the NN (weight vector)
        """
        tensors = list(tensors)
        tensors.append(self.__net.output)
        assert not np.any(np.isnan(x))
        assert not np.any(np.isnan(y))
        assert not np.any(np.isnan(last_w)), "the last_w is {}".format(last_w)
        # The session which is ran by this function is the session of the NN class, so it computes the operations defined
        # in that class which are related to the tensors given in the feed_dict. In this case, it returns the weight vector
        # computed using the parameter tensors which has to be the train_step.
        results = self.__net.session.run(tensors, feed_dict={
                                         self.__net.input_tensor: x,          # Enters the net
                                          self.__Y: y,
                                          self.__net.previous_w: last_w,      # Input of EIIE_Output_WithW together with x
                                          self.__net.input_num: x.shape[0]})  # Num batches in the dataset
        
        # TODO: por que el ultimo elemento. No te devuelve como ultimo elemento el num de batches?
        # For all the batches, all the weights in the weight vector (last element in the reward list) but the first one
        # (weight associated with cash) is stored in the PVM once it has been computed by the NN.
        setw(results[-1][:, 1:]) # Sets all the weiht vectors computed for all the samples in the batch without the cash component
        return results[:-1]

    
    # Save the variables path including file name
    def save_model(self, path):
        self.__saver.save(self.__net.session, path)

        
    # Consumption vector (on each periods)
    def __pure_pc(self):
        c = self.__commission_ratio
        # Transaction costs are given by equation 16 in the paper 
        # Time order: w_prime_t -> action this has transaction costs given by the transaction remainder factor
        w_t = self.__future_weight_vec[:self.input_num-1]  # rebalanced (all the weights but the one from last batch)
        w_t1 = self.__net.action[1:self.input_num]         # actions for all the batches but the first one 
        mu = 1 - tf.reduce_sum(tf.abs(w_t1[:, 1:]-w_t[:, 1:]), axis=1)*c  # Equations 15, 16, 17 of the paper
        """
        mu = 1-3*c+c**2

        def recurse(mu0):
            factor1 = 1/(1 - c*w_t1[:, 0])
            if isinstance(mu0, float):
                mu0 = mu0
            else:
                mu0 = mu0[:, None]
            factor2 = 1 - c*w_t[:, 0] - (2*c - c**2)*tf.reduce_sum(
                tf.nn.relu(w_t[:, 1:] - mu0 * w_t1[:, 1:]), axis=1)
            return factor1*factor2

        for i in range(20):
            mu = recurse(mu)
        """
        return mu

    
    # The history is a 3d matrix (tensor of rank 4). It returns an asset vector in which the agent should invert 
    def decide_by_history(self, history, last_w):
        assert isinstance(history, np.ndarray),\
            "the history should be a numpy array, not %s" % type(history)
        assert not np.any(np.isnan(last_w))
        assert not np.any(np.isnan(history))
        tflearn.is_training(False, self.session)
        history = history[np.newaxis, :, :, :]
        # The session which is ran by this function is the session of this class, so computes the operations defined
        # in this class which are related to the tensors given in the feed_dict
        return np.squeeze(self.session.run(self.__net.output, feed_dict={self.__net.input_tensor: history,
                                                                         self.__net.previous_w: last_w[np.newaxis, 1:],
                                                                         self.__net.input_num: 1}))

2. Trading Agent evaluating simple return
The differences with the previous trading agent are:
* This one uses the simple return instead of the logarithic one.
* This one uses as the change in the prices $\frac{\text{Open}_t}{\text{Open}_{t-1}}$ so the chane in the portfolio value $P_t$ is given by:
$$P_t = P_{t-1} \cdot \vec{y}_t\cdot \vec{w}_{t-1}$$

In [1]:
class DPG_SimpleReturn(object):  
    def __init__(self, config, restore_dir=None, device="cpu"):

        self.__config = config  # Read the config file where training parameters are defined
        self.__asset_number = config["input"]["asset_number"]  # Number of assets to be traded (does not include cash)
        
        ## Create the CNN object in order to train the net. It outputs the weiht vector depending on config["layers"]
        self.__net = network.CNN(config["input"]["feature_number"],  # feature number (f)
                                 self.__asset_number,                # rows: number of non cash assets in the portfolio (m)
                                 config["input"]["window_size"],     # cols: number of previous periods FOR A SPECIFIC ASSET (n)
                                 config["layers"],                   # dict of layers
                                 device=device)
        
          # parameters
        self.trading_cost = trading_cost
        self.interest_rate = interest_rate        
                
        self.constant_return = tf.constant(1+self.interest_rate, shape=[1, 1])
        self.cash_return = tf.tile(constant_return, tf.stack([shape_X_t, 1]))
        
        # variable of the cash bias
        bias = tf.get_variable('cash_bias', shape=[1, 1, 1, 1], initializer=tf.constant_initializer(cash_bias_init))
        # shape of the tensor == batchsize
        shape_X_t = tf.shape(self.X_t)[0]
        # trick to get a "tensor size" for the cash bias
        self.cash_bias = tf.tile(bias, tf.stack([shape_X_t, 1, 1, 1]))
        
        self.__commission_ratio = self.__config["trading"]["trading_consumption"]
        
         # portfolio value at the previous time step
        self.pf_value_previous = tf.placeholder(tf.float32, [None, 1])
        # vector of Open(t)/Open(t-1)
        self.dailyReturn_t = tf.placeholder(tf.float32, [None, self.m])  # Relative price tensor for each feature
        
        def reward

        y_t = tf.concat([cash_return, self.dailyReturn_t], axis=1)       # Relative price tensor for each feature

        Vprime_t = self.action * self.pf_value_previous     # Value at the end of the period  t before reallocating
        Vprevious = self.W_previous*self.pf_value_previous  # Value at the beggining of the period

        constant = tf.constant(1.0, shape=[1])

        cost = self.trading_cost * tf.norm(Vprime_t-Vprevious, ord=1, axis=1)*constant
        cost = tf.expand_dims(cost, 1)

        zero = tf.constant(np.array([0.0]*m).reshape(1, m), shape=[1, m], dtype=tf.float32)

        vec_zero = tf.tile(zero, tf.stack([shape_X_t, 1]))
        vec_cost = tf.concat([cost, vec_zero], axis=1)

        Vsecond_t = Vprime_t - vec_cost

        V_t = tf.multiply(Vsecond_t, y_t)
        # Compute the portfolio value
        self.portfolioValue = tf.norm(V_t, ord=1)
        # Simple return, not logarithmic one
        self.instantaneous_reward = (self.portfolioValue-self.pf_value_previous)/self.pf_value_previous
        
        ## Train the NN
        self.__loss = self.__set_loss_function()
        
        
        def rewardEquiweighted(self):
            y_t = tf.concat([self.cash_return, self.dailyReturn_t], axis=1)
            
            w_eq = np.array(np.array([1/(self.__asset_number+1)]*(self.__asset_number+1)))
            V_eq = w_eq*self.pf_value_previous
            V_eq_second = tf.multiply(V_eq, y_t)

            self.portfolioValue_eq = tf.norm(V_eq_second, ord=1)

            self.instantaneous_reward_eq = (self.portfolioValue_eq-self.pf_value_previous)/self.pf_value_previous
            
            return self.instantaneous_reward_eq
        
        def __set_loss_function():
            

                
            with tf.variable_scope("Reward_Equiweighted"):
                constant_return = tf.constant(
                    1+self.interest_rate, shape=[1, 1])
                cash_return = tf.tile(
                    constant_return, tf.stack([shape_X_t, 1]))
                y_t = tf.concat(
                    [cash_return, self.dailyReturn_t], axis=1)
  

                V_eq = w_eq*self.pf_value_previous
                V_eq_second = tf.multiply(V_eq, y_t)
        
                self.portfolioValue_eq = tf.norm(V_eq_second, ord=1)
            
                self.instantaneous_reward_eq = (
                    self.portfolioValue_eq-self.pf_value_previous)/self.pf_value_previous
                
            with tf.variable_scope("Max_weight"):
                self.max_weight = tf.reduce_max(self.action)
                print(self.max_weight.shape)

                
            with tf.variable_scope("Reward_adjusted"):
                
                self.adjested_reward = self.instantaneous_reward - self.instantaneous_reward_eq - ratio_regul*self.max_weight
                
        #objective function 
        #maximize reward over the batch 
        # min(-r) = max(r)
        self.train_op = optimizer.minimize(-self.adjested_reward)
        
        # some bookkeeping
        self.optimizer = optimizer
        self.sess = sess

        
    def compute_W(self, X_t_, W_previous_):
        """
        This function returns the action the agent takes 
        given the input tensor and the W_previous
        
        It is a vector of weight

        """
        return self.sess.run(tf.squeeze(self.action), feed_dict={self.X_t: X_t_, self.W_previous: W_previous_})

    
    def train(self, X_t_, W_previous_, pf_value_previous_, dailyReturn_t_):
        """
        This function trains the neural network
        maximizing the reward 
        the input is a batch of the differents values
        """
        self.sess.run(self.train_op, feed_dict={self.X_t: X_t_,
                                                self.W_previous: W_previous_,
                                                self.pf_value_previous: pf_value_previous_,
                                                self.dailyReturn_t: dailyReturn_t_})
        
        
        ## Keep track of global steps during training
        self.__global_step = tf.Variable(0, trainable=False)
        self.__train_operation = None
        
        ## Compute portfolio features:
        
        # FUTURE RELATIVE PRICE RANK 3 TENSOR Y: tensor for the last period (t)
        self.__Y = tf.placeholder(tf.float32, shape=[None, self.__config["input"]["feature_number"], self.__asset_number])
        
        # FUTURE RELATIVE PRICERANK 2 TENSOR y: relative prices considering just the closing prices (feature 0) at t
        self.__y = tf.concat([tf.ones([self.__net.input_num, 1]), self.__Y[:, 0, :]], 1)
        
        # WEIGHT VECTOR: Computing w' at the end of each batch
        self.__future_weight_vec = (self.__y * self.__net.output) / tf.reduce_sum(self.__y * self.__net.output, axis=1)[:, None]
                    
        # tf.assert_equal(tf.reduce_sum(self.__future_weight_vec, axis=1), tf.constant(1.0))
        
        # COMISIONS:
        # At the end of period t, the agent has to reallocate w'_t (future_weight_vec) into w_t
        # The reallocation of the assets has a cost (transaction cost)
        self.__commission_ratio = self.__config["trading"]["trading_consumption"]
        
        # PORTFOLIO VALUE VECTOR:
        # __pv_vector: portfolio value for each batch (the value of the portfolio after computing the action with n_b samples)
        # __pure_pc: function defined below which returns mu such that w_t = w'_t*mu,  rank 0 tensor (scalar)
        # The operations are computed over all the periods in a batch 
        self.__pv_vector = tf.reduce_sum(self.__net.output * self.__y, reduction_indices=[1]) *\
                           (tf.concat([tf.ones(1), self.__pure_pc()], axis=0))
        
        # LOG MEAN FREE: cumulated return without considering transaction costs 
        # dot product of weigght vector of the batches and the future price of the batches
        # The dimension is reduced along the asset axis (1), obtaining one value for each batch
        self.__log_mean_free = tf.reduce_mean(tf.log(tf.reduce_sum(self.__net.output * self.__y, reduction_indices=[1])))
        
        # PORTFOLIO VALUE: 
        # __portfolio_value: result of multiplying all the elements of __pv_vector (values for each batch)
        # where each element is the portfolio value for a batch
        self.__portfolio_value = tf.reduce_prod(self.__pv_vector)
        self.__mean = tf.reduce_mean(self.__pv_vector)              # Mean of the portfolio value vector (through all the batches)
        self.__log_mean = tf.reduce_mean(tf.log(self.__pv_vector))  # Cumulated return (eq 22)
        
        ## Evaluate performance
        self.__standard_deviation = tf.sqrt(tf.reduce_mean((self.__pv_vector - self.__mean) ** 2))
        self.__sharp_ratio = (self.__mean - 1) / self.__standard_deviation
        
        ## Train the NN
        self.__loss = self.__set_loss_function()
        self.__train_operation = self.init_train(learning_rate=self.__config["training"]["learning_rate"],
                                                 decay_steps=self.__config["training"]["decay_steps"],
                                                 decay_rate=self.__config["training"]["decay_rate"],
                                                 training_method=self.__config["training"]["training_method"])
        self.__saver = tf.train.Saver()
        if restore_dir:
            self.__saver.restore(self.__net.session, restore_dir)
        else:
            self.__net.session.run(tf.global_variables_initializer())

    ## Getters 
    @property
    def session(self):
        return self.__net.session

    @property
    def pv_vector(self):
        return self.__pv_vector

    @property
    def standard_deviation(self):
        return self.__standard_deviation

    @property
    def portfolio_weights(self):
        return self.__net.output

    @property
    def sharp_ratio(self):
        return self.__sharp_ratio

    @property
    def log_mean(self):
        return self.__log_mean

    @property
    def log_mean_free(self):
        return self.__log_mean_free

    @property
    def portfolio_value(self):
        return self.__portfolio_value

    @property
    def loss(self):
        return self.__loss

    @property
    def layers_dict(self):
        return self.__net.layers_dict

    def recycle(self):
        tf.reset_default_graph()
        self.__net.session.close()

        
   
    
    # Define the optimizer operation (train_step)
    def init_train(self, learning_rate, decay_steps, decay_rate, training_method):
        learning_rate = tf.train.exponential_decay(learning_rate, self.__global_step,
                                                   decay_steps, decay_rate, staircase=True)
        if training_method == 'GradientDescent':
            train_step = tf.train.GradientDescentOptimizer(learning_rate).\
                         minimize(self.__loss, global_step=self.__global_step)
        elif training_method == 'Adam':
            train_step = tf.train.AdamOptimizer(learning_rate).\
                         minimize(self.__loss, global_step=self.__global_step)
        elif training_method == 'RMSProp':
            train_step = tf.train.RMSPropOptimizer(learning_rate).\
                         minimize(self.__loss, global_step=self.__global_step)
        else:
            raise ValueError()
        return train_step

    # Train the NN using the previously defined train_operation
    def train(self, x, y, last_w, setw):
        tflearn.is_training(True, self.__net.session)
        self.evaluate_tensors(x, y, last_w, setw, [self.__train_operation])

        
    # Before feeding the tensor into the NN, checks that there is no nan values
    def evaluate_tensors(self, x, y, last_w, setw, tensors):
        """
        :param x: input tensor
        :param y: relative price rank 2 tensor
        :param last_w: previous weight to consider transaction costs
        :param setw: a function, pass the output w to it to fill the PVM
        :param tensors: other tensors so as to update its values when running the training (see TrainTrader.ipynb)
        :return: The result of runing the NN (weight vector)
        """
        tensors = list(tensors)
        tensors.append(self.__net.output)
        assert not np.any(np.isnan(x))
        assert not np.any(np.isnan(y))
        assert not np.any(np.isnan(last_w)), "the last_w is {}".format(last_w)
        # The session which is ran by this function is the session of the NN class, so it computes the operations defined
        # in that class which are related to the tensors given in the feed_dict. In this case, it returns the weight vector
        # computed using the parameter tensors which has to be the train_step.
        results = self.__net.session.run(tensors, feed_dict={
                                         self.__net.input_tensor: x,          # Enters the net
                                          self.__Y: y,
                                          self.__net.previous_w: last_w,      # Input of EIIE_Output_WithW together with x
                                          self.__net.input_num: x.shape[0]})  # Num batches in the dataset
        
        # TODO: por que el ultimo elemento. No te devuelve como ultimo elemento el num de batches?
        # For all the batches, all the weights in the weight vector (last element in the reward list) but the first one
        # (weight associated with cash) is stored in the PVM once it has been computed by the NN.
        setw(results[-1][:, 1:])   
        return results[:-1]

    
    # Save the variables path including file name
    def save_model(self, path):
        self.__saver.save(self.__net.session, path)

        
   
    
    # The history is a 3d matrix (tensor of rank 4). It returns an asset vector in which the agent should invert 
    def decide_by_history(self, history, last_w):
        assert isinstance(history, np.ndarray),\
            "the history should be a numpy array, not %s" % type(history)
        assert not np.any(np.isnan(last_w))
        assert not np.any(np.isnan(history))
        tflearn.is_training(False, self.session)
        history = history[np.newaxis, :, :, :]
        # The session which is ran by this function is the session of this class, so computes the operations defined
        # in this class which are related to the tensors given in the feed_dict
        return np.squeeze(self.session.run(self.__net.output, feed_dict={self.__net.input_tensor: history,
                                                                         self.__net.previous_w: last_w[np.newaxis, 1:],
                                                                         self.__net.input_num: 1}))
        
      