Purpose is to develop the processor objects.  Main reason for using an object is so that we can keep state (ie keep track of averages etc)

In [1]:
import numpy as np

In [2]:
class SimpleStandardizer(object):
    def __init__(self):
        pass

    def transform(self, data):
        """
        The SimpleStandardizer transforms the data using the mean and standard
        deviation across the batch it is processing
        """
        assert len(data.shape) == 2
        return (data - data.mean(axis=0)) / data.std(axis=0)


In [3]:
standardizer = SimpleStandardizer()

In [4]:
#  first try an array with a single sample
#  in energy_py all arrays have the shape (num_samples, array_length)
#  with array_length using being the observation array length or the number of actions etc
state = np.array([1,2,3]).reshape(1, -1)
state.shape

(1, 3)

In [5]:
processed = standardizer.transform(state)

  # This is added back by InteractiveShellApp.init_path()


In [29]:
#  now lets try an array with multiple observations
states = np.array([[1,2,3],[4,5,6], [7,8,9], [0,0,0]]).reshape(4,3)
states

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9],
       [0, 0, 0]])

In [30]:
_means = states.mean(axis=0)
print(_means, _means.shape)

[ 3.    3.75  4.5 ] (3,)


In [31]:
_std = states.std(axis=0)
print(_std, _std.shape)

[ 2.73861279  3.03108891  3.35410197] (3,)


In [32]:
_proc = (states - _means) / _std
_proc

array([[-0.73029674, -0.57735027, -0.4472136 ],
       [ 0.36514837,  0.41239305,  0.4472136 ],
       [ 1.46059349,  1.40213637,  1.34164079],
       [-1.09544512, -1.23717915, -1.34164079]])

In [33]:
#  now we try to use the object
standardizer = SimpleStandardizer()
processed_states = standardizer.transform(states)
processed_states

array([[-0.73029674, -0.57735027, -0.4472136 ],
       [ 0.36514837,  0.41239305,  0.4472136 ],
       [ 1.46059349,  1.40213637,  1.34164079],
       [-1.09544512, -1.23717915, -1.34164079]])

In [34]:
processed_states.mean(axis=0)

array([  0.00000000e+00,   5.55111512e-17,   0.00000000e+00])

In [35]:
processed_states.std(axis=0)

array([ 1.,  1.,  1.])

Now we try a more complex class that keeps track of statistics

In [40]:
shape = (3)
history = [states[0], states[1], states[2], states[3]]
history = np.array(history)
processed_states = (history - history.mean(axis=0)) / history.std(axis=0)
processed_states


array([[-0.73029674, -0.57735027, -0.4472136 ],
       [ 0.36514837,  0.41239305,  0.4472136 ],
       [ 1.46059349,  1.40213637,  1.34164079],
       [-1.09544512, -1.23717915, -1.34164079]])

In [44]:
class Standardizer(object):
    """
    We rely on the input shape being (n_samples, state_dim) 
    """
    def __init__(self):
        #  use a list to hold all data tthis processor has seen
        self.history = []

    def transform(self, data, use_history=True):
        """
        The SimpleStandardizer transforms the data using the mean and standard
        deviation across the batch it is processing
        """
        #  check that our data is 
        assert len(data.shape) == 2
        #  add the data we are processing onto our history list
        self.history.append(data)
        if use_history:
            #  create an array from the list then reshape to (num_samples, dim)
            #  taking advantage of energy_py states/actions being this shape (always len 2)
            history = np.array(self.history).reshape(-1, data.shape[1])
            means, stdevs = history.mean(axis=0), history.std(axis=0)
        else:
            means, stdevs = data.mean(axis=0), data.std(axis=0)
        
        return (data - means) / stdevs

In [55]:
std = Standardizer()

In [62]:
proc = std.transform(data=states, use_history=False)
proc

array([[-0.73029674, -0.57735027, -0.4472136 ],
       [ 0.36514837,  0.41239305,  0.4472136 ],
       [ 1.46059349,  1.40213637,  1.34164079],
       [-1.09544512, -1.23717915, -1.34164079]])

In [61]:
proc = std.transform(data=states)
proc

array([[-0.20412415,  0.04389513,  0.22941573],
       [ 1.02062073,  1.0973782 ,  1.14707867],
       [ 2.2453656 ,  2.15086128,  2.0647416 ],
       [-0.61237244, -0.65842692, -0.6882472 ]])

In [60]:
new_data = np.zeros(proc.shape)
proc = std.transform(data=new_data)
proc

array([[-0.52827054, -0.56493268, -0.58834841],
       [-0.52827054, -0.56493268, -0.58834841],
       [-0.52827054, -0.56493268, -0.58834841],
       [-0.52827054, -0.56493268, -0.58834841]])

In [63]:
proc = std.transform(data=states, use_history=False)
proc

array([[-0.73029674, -0.57735027, -0.4472136 ],
       [ 0.36514837,  0.41239305,  0.4472136 ],
       [ 1.46059349,  1.40213637,  1.34164079],
       [-1.09544512, -1.23717915, -1.34164079]])

Now we can try the normalizer

In [65]:
class Normalizer(object):
    """
    We rely on the input shape being (n_samples, state_dim) 
    """
    def __init__(self):
        #  use a list to hold all data tthis processor has seen
        self.history = []

    def transform(self, data):
        """
        The SimpleStandardizer transforms the data using the mean and standard
        deviation across the batch it is processing
        """
        assert len(data.shape) == 2
        #  add the data we are processing onto our history list
        self.history.append(data)
        #  create an array from the list then reshape to (num_samples, dim)
        #  taking advantage of energy_py states/actions being this shape (always len 2)
        history = np.array(self.history).reshape(-1, data.shape[1])
        
        return (data - history.mean(axis=0)) / history.std(axis=0)

In [69]:
norm = Normalizer()

In [79]:
proc = norm.transform(states)
proc

array([[-0.3099937 , -0.0838591 ,  0.08804509],
       [ 0.85248268,  0.92245011,  0.968496  ],
       [ 2.01495907,  1.92875932,  1.8489469 ],
       [-0.69748583, -0.75473191, -0.79240582]])

In [80]:
proc = norm.transform(new_data)
proc

array([[-0.65079137, -0.7016704 , -0.73484692],
       [-0.65079137, -0.7016704 , -0.73484692],
       [-0.65079137, -0.7016704 , -0.73484692],
       [-0.65079137, -0.7016704 , -0.73484692]])