In [1]:
import itertools

import numpy as np

from energy_py.main.scripts.spaces import Continuous_Space

In [2]:
#  create our action space, which is a list of spaces
action_space = [Continuous_Space(1, 10),
               Continuous_Space(20,25)]

In [3]:
observation = np.array([9,9,9], dtype=float).reshape(1,3)
observation

array([[ 9.,  9.,  9.]])

In [4]:
#  our first case is to generate a single state_action 
#  this will be used when acting randomly in e-greedy methods
random_action = [space.sample() for space in action_space]
random_action

[5.712030456757384, 23.49548851576284]

In [5]:
state_action = np.append(observation, random_action)
state_action

array([  9.        ,   9.        ,   9.        ,   5.71203046,  23.49548852])

In [6]:
def make_state_action(action_space, observation):
    random_action = [space.sample() for space in action_space]
    state_action = np.append(observation, random_action)
    return state_action

In [7]:
sa = make_state_action(action_space, observation)
sa

array([  9.        ,   9.        ,   9.        ,   6.07788035,  21.25399576])

In [8]:
#  our second case is acting greedily
#  1 observation, n actions -> n state_actions
discrete_action_spaces = [list(space.discretize()) for space in action_space]
discrete_action_spaces

[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
 [20.0, 21.0, 22.0, 23.0, 24.0, 25.0]]

In [9]:
all_combs = [element for element in itertools.product(*discrete_action_spaces)]
all_combs

[(1.0, 20.0),
 (1.0, 21.0),
 (1.0, 22.0),
 (1.0, 23.0),
 (1.0, 24.0),
 (1.0, 25.0),
 (2.0, 20.0),
 (2.0, 21.0),
 (2.0, 22.0),
 (2.0, 23.0),
 (2.0, 24.0),
 (2.0, 25.0),
 (3.0, 20.0),
 (3.0, 21.0),
 (3.0, 22.0),
 (3.0, 23.0),
 (3.0, 24.0),
 (3.0, 25.0),
 (4.0, 20.0),
 (4.0, 21.0),
 (4.0, 22.0),
 (4.0, 23.0),
 (4.0, 24.0),
 (4.0, 25.0),
 (5.0, 20.0),
 (5.0, 21.0),
 (5.0, 22.0),
 (5.0, 23.0),
 (5.0, 24.0),
 (5.0, 25.0),
 (6.0, 20.0),
 (6.0, 21.0),
 (6.0, 22.0),
 (6.0, 23.0),
 (6.0, 24.0),
 (6.0, 25.0),
 (7.0, 20.0),
 (7.0, 21.0),
 (7.0, 22.0),
 (7.0, 23.0),
 (7.0, 24.0),
 (7.0, 25.0),
 (8.0, 20.0),
 (8.0, 21.0),
 (8.0, 22.0),
 (8.0, 23.0),
 (8.0, 24.0),
 (8.0, 25.0),
 (9.0, 20.0),
 (9.0, 21.0),
 (9.0, 22.0),
 (9.0, 23.0),
 (9.0, 24.0),
 (9.0, 25.0),
 (10.0, 20.0),
 (10.0, 21.0),
 (10.0, 22.0),
 (10.0, 23.0),
 (10.0, 24.0),
 (10.0, 25.0)]

In [10]:
all_combs = np.array(all_combs)
all_combs

array([[  1.,  20.],
       [  1.,  21.],
       [  1.,  22.],
       [  1.,  23.],
       [  1.,  24.],
       [  1.,  25.],
       [  2.,  20.],
       [  2.,  21.],
       [  2.,  22.],
       [  2.,  23.],
       [  2.,  24.],
       [  2.,  25.],
       [  3.,  20.],
       [  3.,  21.],
       [  3.,  22.],
       [  3.,  23.],
       [  3.,  24.],
       [  3.,  25.],
       [  4.,  20.],
       [  4.,  21.],
       [  4.,  22.],
       [  4.,  23.],
       [  4.,  24.],
       [  4.,  25.],
       [  5.,  20.],
       [  5.,  21.],
       [  5.,  22.],
       [  5.,  23.],
       [  5.,  24.],
       [  5.,  25.],
       [  6.,  20.],
       [  6.,  21.],
       [  6.,  22.],
       [  6.,  23.],
       [  6.,  24.],
       [  6.,  25.],
       [  7.,  20.],
       [  7.,  21.],
       [  7.,  22.],
       [  7.,  23.],
       [  7.,  24.],
       [  7.,  25.],
       [  8.,  20.],
       [  8.,  21.],
       [  8.,  22.],
       [  8.,  23.],
       [  8.,  24.],
       [  8.,

In [11]:
all_combs.shape

(60, 2)

In [12]:
observation.shape

(1, 3)

In [13]:
observations = np.tile(observation, all_combs.shape[0]).reshape(all_combs.shape[0], -1)
observations

array([[ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.,  9.],
       [ 9.,  9.

In [14]:
observations.shape

(60, 3)

In [20]:
#  we have all possible actions
#  we now need to append the observation onto the front of each array

state_actions = np.concatenate([observations, all_combs], axis=1)
state_actions

array([[  9.,   9.,   9.,   1.,  20.],
       [  9.,   9.,   9.,   1.,  21.],
       [  9.,   9.,   9.,   1.,  22.],
       [  9.,   9.,   9.,   1.,  23.],
       [  9.,   9.,   9.,   1.,  24.],
       [  9.,   9.,   9.,   1.,  25.],
       [  9.,   9.,   9.,   2.,  20.],
       [  9.,   9.,   9.,   2.,  21.],
       [  9.,   9.,   9.,   2.,  22.],
       [  9.,   9.,   9.,   2.,  23.],
       [  9.,   9.,   9.,   2.,  24.],
       [  9.,   9.,   9.,   2.,  25.],
       [  9.,   9.,   9.,   3.,  20.],
       [  9.,   9.,   9.,   3.,  21.],
       [  9.,   9.,   9.,   3.,  22.],
       [  9.,   9.,   9.,   3.,  23.],
       [  9.,   9.,   9.,   3.,  24.],
       [  9.,   9.,   9.,   3.,  25.],
       [  9.,   9.,   9.,   4.,  20.],
       [  9.,   9.,   9.,   4.,  21.],
       [  9.,   9.,   9.,   4.,  22.],
       [  9.,   9.,   9.,   4.,  23.],
       [  9.,   9.,   9.,   4.,  24.],
       [  9.,   9.,   9.,   4.,  25.],
       [  9.,   9.,   9.,   5.,  20.],
       [  9.,   9.,   9.,

In [21]:
#  get the discrete action space for all action dimensions
disc_action_spaces = [list(space.discretize()) for space in action_space]
#  create every possible combination of actions
actions = np.array([element for element in itertools.product(*disc_action_spaces)])
#  scale each action

In [22]:
actions

array([[  1.,  20.],
       [  1.,  21.],
       [  1.,  22.],
       [  1.,  23.],
       [  1.,  24.],
       [  1.,  25.],
       [  2.,  20.],
       [  2.,  21.],
       [  2.,  22.],
       [  2.,  23.],
       [  2.,  24.],
       [  2.,  25.],
       [  3.,  20.],
       [  3.,  21.],
       [  3.,  22.],
       [  3.,  23.],
       [  3.,  24.],
       [  3.,  25.],
       [  4.,  20.],
       [  4.,  21.],
       [  4.,  22.],
       [  4.,  23.],
       [  4.,  24.],
       [  4.,  25.],
       [  5.,  20.],
       [  5.,  21.],
       [  5.,  22.],
       [  5.,  23.],
       [  5.,  24.],
       [  5.,  25.],
       [  6.,  20.],
       [  6.,  21.],
       [  6.,  22.],
       [  6.,  23.],
       [  6.,  24.],
       [  6.,  25.],
       [  7.,  20.],
       [  7.,  21.],
       [  7.,  22.],
       [  7.,  23.],
       [  7.,  24.],
       [  7.,  25.],
       [  8.,  20.],
       [  8.,  21.],
       [  8.,  22.],
       [  8.,  23.],
       [  8.,  24.],
       [  8.,

In [41]:
import itertools

def normalize(value, low, high):
    """
    Generic helper function
    Normalizes a value using a given lower & upper bound
    """
    #  if statement to catch the constant value case
    if low == high:
        normalized = 0
    else:
        max_range = high - low
        normalized = (value - low) / max_range
    return np.array(normalized)

def scale_array(array, space, normalize):
    """
    Helper function for make_machine_experience()
    Uses the space & a given function to scale an array
    Default scaler is to normalize

    Used to scale the observation and action

    can probably move this into a parent
    """

    #  empty numpy array

    scaled_array = np.array([])

    #  iterate across the array values & corresponding space object
    for value, spc in itertools.zip_longest(array, space):
        if spc.type == 'continuous':
            # normalize continuous variables
            scaled = normalize(value, spc.low, spc.high)
        elif spc.type == 'discrete':
            #  shouldn't need to do anything
            #  check value is already dummy
            assert (value == 0) or (value == 1)
        else:
            assert 1 == 0

        #  appending the scaled value onto the scaled array
        scaled_array = np.append(scaled_array, scaled).reshape(-1)

    return scaled_array

In [42]:
for a in actions:
    print(a)

[  1.  20.]
[  1.  21.]
[  1.  22.]
[  1.  23.]
[  1.  24.]
[  1.  25.]
[  2.  20.]
[  2.  21.]
[  2.  22.]
[  2.  23.]
[  2.  24.]
[  2.  25.]
[  3.  20.]
[  3.  21.]
[  3.  22.]
[  3.  23.]
[  3.  24.]
[  3.  25.]
[  4.  20.]
[  4.  21.]
[  4.  22.]
[  4.  23.]
[  4.  24.]
[  4.  25.]
[  5.  20.]
[  5.  21.]
[  5.  22.]
[  5.  23.]
[  5.  24.]
[  5.  25.]
[  6.  20.]
[  6.  21.]
[  6.  22.]
[  6.  23.]
[  6.  24.]
[  6.  25.]
[  7.  20.]
[  7.  21.]
[  7.  22.]
[  7.  23.]
[  7.  24.]
[  7.  25.]
[  8.  20.]
[  8.  21.]
[  8.  22.]
[  8.  23.]
[  8.  24.]
[  8.  25.]
[  9.  20.]
[  9.  21.]
[  9.  22.]
[  9.  23.]
[  9.  24.]
[  9.  25.]
[ 10.  20.]
[ 10.  21.]
[ 10.  22.]
[ 10.  23.]
[ 10.  24.]
[ 10.  25.]


In [50]:
scaled_actions =[]
for i, act in enumerate(actions):
    scaled = scale_array(act, action_space, normalize)
    print(act)
    print(scaled)
    scaled_actions.append(scaled)
    

[  1.  20.]
[ 0.  0.]
[  1.  21.]
[ 0.   0.2]
[  1.  22.]
[ 0.   0.4]
[  1.  23.]
[ 0.   0.6]
[  1.  24.]
[ 0.   0.8]
[  1.  25.]
[ 0.  1.]
[  2.  20.]
[ 0.11111111  0.        ]
[  2.  21.]
[ 0.11111111  0.2       ]
[  2.  22.]
[ 0.11111111  0.4       ]
[  2.  23.]
[ 0.11111111  0.6       ]
[  2.  24.]
[ 0.11111111  0.8       ]
[  2.  25.]
[ 0.11111111  1.        ]
[  3.  20.]
[ 0.22222222  0.        ]
[  3.  21.]
[ 0.22222222  0.2       ]
[  3.  22.]
[ 0.22222222  0.4       ]
[  3.  23.]
[ 0.22222222  0.6       ]
[  3.  24.]
[ 0.22222222  0.8       ]
[  3.  25.]
[ 0.22222222  1.        ]
[  4.  20.]
[ 0.33333333  0.        ]
[  4.  21.]
[ 0.33333333  0.2       ]
[  4.  22.]
[ 0.33333333  0.4       ]
[  4.  23.]
[ 0.33333333  0.6       ]
[  4.  24.]
[ 0.33333333  0.8       ]
[  4.  25.]
[ 0.33333333  1.        ]
[  5.  20.]
[ 0.44444444  0.        ]
[  5.  21.]
[ 0.44444444  0.2       ]
[  5.  22.]
[ 0.44444444  0.4       ]
[  5.  23.]
[ 0.44444444  0.6       ]
[  5.  24.]
[ 0.44444444

In [51]:
scaled

array([ 1.,  1.])

In [52]:
act

array([ 10.,  25.])

In [55]:
all_acts = np.array(scaled_actions)

In [56]:
all_acts.shape

(60, 2)

In [57]:
disc_action_spaces = [list(space.discretize()) for space in action_space]

#  create every possible combination of actions
scaled_actions = [scale_array(act, action_space, normalize)
                  for act in itertools.product(*disc_action_spaces)]

In [58]:
scaled_actions

[array([ 0.,  0.]),
 array([ 0. ,  0.2]),
 array([ 0. ,  0.4]),
 array([ 0. ,  0.6]),
 array([ 0. ,  0.8]),
 array([ 0.,  1.]),
 array([ 0.11111111,  0.        ]),
 array([ 0.11111111,  0.2       ]),
 array([ 0.11111111,  0.4       ]),
 array([ 0.11111111,  0.6       ]),
 array([ 0.11111111,  0.8       ]),
 array([ 0.11111111,  1.        ]),
 array([ 0.22222222,  0.        ]),
 array([ 0.22222222,  0.2       ]),
 array([ 0.22222222,  0.4       ]),
 array([ 0.22222222,  0.6       ]),
 array([ 0.22222222,  0.8       ]),
 array([ 0.22222222,  1.        ]),
 array([ 0.33333333,  0.        ]),
 array([ 0.33333333,  0.2       ]),
 array([ 0.33333333,  0.4       ]),
 array([ 0.33333333,  0.6       ]),
 array([ 0.33333333,  0.8       ]),
 array([ 0.33333333,  1.        ]),
 array([ 0.44444444,  0.        ]),
 array([ 0.44444444,  0.2       ]),
 array([ 0.44444444,  0.4       ]),
 array([ 0.44444444,  0.6       ]),
 array([ 0.44444444,  0.8       ]),
 array([ 0.44444444,  1.        ]),
 array([