# Portfolio vector memory

Memory stack storing the agent actions (weight vectors of the portfolio at each batch) to consider the previous weight vector when computing the actual action.

In [1]:
import numpy as np
class PVM():

    def __init__(self, m, sample_bias, total_steps, batch_size, w_init):
        
        # Initialize the memory. The size of the stack is the total steps (samples in a batch?)
        self.memory = np.transpose(np.array([w_init]*total_steps))  # Rank 2 tensor (one w init vector for all the steps)
        self.sample_bias = sample_bias
        self.total_steps = total_steps
        self.batch_size = batch_size

    def get_W(self, t):
        return self.memory[:, t]  # Return the weight from the PVM at time t 

    def update(self, t, w):
        self.memory[:, t] = w     # Update the weight at time t

    def get_random_index(self):
        beta = self.sample_bias
        # A batch starting with period tb <= t − nb is picked with a geometrically distributed probability 
        # returns a valid step so you can get a training batch starting at this step
        while 1:
            z = np.random.geometric(p=beta)
            tb = self.total_steps - self.batch_size + 1 - z  # To make sure that only periods from trainig set are taken
            if tb >= 0:
                return tb
            