In [50]:
import numpy as np

### Testing how to create sorted policies
Goal: Find the right way to create action indices ('policies') corresponding to the sorted features.  
Motivation: Converting actions to so called 'sorted policies' that the Q-model can interpret.

In [103]:
# Creating some test data
ACTIONS = ['UP', 'RIGHT', 'DOWN', 'LEFT', 'WAIT', 'BOMB']

test_features = np.array( \
    [[0, 2, 0, 1],
     [1, 1, 0, 2],
     [0, 0, 2, 0],
     [2, 1, 0, 1],
     [1, 2, 0, 0],
     [2, 0, 0, 0]]
)

ideal_actions = \
    ['RIGHT',
     'LEFT',
     'DOWN',
     'UP',
     'RIGHT',
     'UP']

In [104]:
sorted_features = np.sort(test_features, axis = 1)
sorting_indices = np.argsort(test_features, axis = 1)

In [108]:
print("Original features  Sorted features  Sorting indices")
for i in range(len(test_features)):
    print(test_features[i], ' '*8, sorted_features[i], ' '*6, sorting_indices[i])

Original features  Sorted features  Sorting indices
[0 2 0 1]          [0 0 1 2]        [0 2 3 1]
[1 1 0 2]          [0 1 1 2]        [2 0 1 3]
[0 0 2 0]          [0 0 0 2]        [0 1 3 2]
[2 1 0 1]          [0 1 1 2]        [2 1 3 0]
[1 2 0 0]          [0 0 1 2]        [2 3 0 1]
[2 0 0 0]          [0 0 0 2]        [1 2 3 0]


In [54]:
# Test if sort is random
equal = np.empty(100)
for i in range(100):
    sorting_indices  = np.argsort(test_features, axis = 1)
    sorting_indices2 = np.argsort(test_features, axis = 1)
    equal[i] = np.all(sorting_indices == sorting_indices2)
np.all(equal)
# If True then not random

True

In [109]:
# Original ideal actions and policies
policy = np.array([ACTIONS.index(action)  for action in ideal_actions])  # indices of the actions

print("features  policy  actions")
for i in range(len(test_features)):
    print(test_features[i], f"{policy[i]:<7}", ideal_actions[i])

features  policy  actions
[0 2 0 1] 1       RIGHT
[1 1 0 2] 3       LEFT
[0 0 2 0] 2       DOWN
[2 1 0 1] 0       UP
[1 2 0 0] 1       RIGHT
[2 0 0 0] 0       UP


In [None]:
# Does it work to just sort the policy with the sorting indices?
sorted_policy  = np.array([list(sorting_indices[i]).index(policy[i])  for i in range(len(policy))])
sorted_actions = [ACTIONS[sorted_policy[i]]  for i in range(len(policy))]

print("sorted features  sorted policy  'sorted actions'")
for i in range(len(test_features)):
    print(sorted_features[i], ' '*6, f"{sorted_policy[i]:<14}", sorted_actions[i])

print("Yes, but code's a bit ugly.")

sorted features  sorted policy  'sorted actions'
[0 0 1 2]        3              LEFT
[0 1 1 2]        3              LEFT
[0 0 0 2]        3              LEFT
[0 1 1 2]        3              LEFT
[0 0 1 2]        3              LEFT
[0 0 0 2]        3              LEFT
Yes, but it's a bit ugly.


In [112]:
# Rediscovering the numpy way:
sorted_policy  = np.array([np.where(sorting_indices[i] == policy[i])[0][0]  for i in range(len(policy))])
sorted_actions = [ACTIONS[sorted_policy[i]]  for i in range(len(policy))]

print("sorted features  sorted policy  'sorted actions'")
for i in range(len(test_features)):
    print(sorted_features[i], ' '*6, f"{sorted_policy[i]:<14}", sorted_actions[i])

print("Hmm, code is not much more elegant.")

sorted features  sorted policy  'sorted actions'
[0 0 1 2]        3              LEFT
[0 1 1 2]        3              LEFT
[0 0 0 2]        3              LEFT
[0 1 1 2]        3              LEFT
[0 0 1 2]        3              LEFT
[0 0 0 2]        3              LEFT
Hmm, code is not much more elegant.


Deciding on performance:

In [101]:
%timeit np.where(sorting_indices[4] == policy[4])[0][0]

3.13 µs ± 88.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [102]:
%timeit list(sorting_indices[4]).index(policy[4])

1.17 µs ± 6.75 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


=> Python's list is actually 3 times faster than numpy here.

### Testing how the backtransformation from sorted to original policies should be done

#### First testing with transforming sorted features back to original features

In [57]:
# Does backtransformation work like test_features == sorted_features[sorting_indices]?
print("restored  test")
restored_features = np.empty_like(test_features)
for i in range(len(test_features)):
    restored_features[i] = sorted_features[i][sorting_indices[i]]
    print(restored_features[i], test_features[i])
print("Same:", np.all(restored_features == test_features))
# If False then it doesn't work like that

restored  test
[0 1 2 0] [0 2 0 1]
[1 0 1 2] [1 1 0 2]
[0 0 2 0] [0 0 2 0]
[1 1 2 0] [2 1 0 1]
[1 2 0 0] [1 2 0 0]
[0 0 2 0] [2 0 0 0]
Same: False


In [58]:
# Does backtransformation work like test_features == sorting_indices[sorted_features]?
print("restored  test")
restored_features = np.empty_like(test_features)
for i in range(len(test_features)):
    restored_features[i] = sorting_indices[i][sorted_features[i]]
    print(restored_features[i], test_features[i])
print("Same:", np.all(restored_features == test_features))
# If False then it doesn't work like that

restored  test
[0 0 2 3] [0 2 0 1]
[2 0 0 1] [1 1 0 2]
[0 0 0 3] [0 0 2 0]
[2 1 1 3] [2 1 0 1]
[2 2 3 0] [1 2 0 0]
[1 1 1 3] [2 0 0 0]
Same: False


In [59]:
# Does backtransformation work like this?
backtrafo_indices = np.argsort(sorting_indices)

print("restored  test")
restored_features = np.empty_like(test_features)
for i in range(len(test_features)):
    restored_features[i] = sorted_features[i][backtrafo_indices[i]]
    print(restored_features[i], test_features[i])
print("Same:", np.all(restored_features == test_features))
# If True then it does work like that

restored  test
[0 2 0 1] [0 2 0 1]
[1 1 0 2] [1 1 0 2]
[0 0 2 0] [0 0 2 0]
[2 1 0 1] [2 1 0 1]
[1 2 0 0] [1 2 0 0]
[2 0 0 0] [2 0 0 0]
Same: True


#### Is this transferrable to backtransformation of sorted policies?

In [125]:
# Does it work with backtrafo_indices?
print("policy  restored policy")
restored_policy = np.empty_like(policy)
for i in range(len(test_features)):
    restored_policy[i] = backtrafo_indices[i][sorted_policy[i]]
    print(policy[i], ' '*5, restored_policy[i])
print("Same:", np.all(policy == restored_policy))
# If True then it does work like that

policy  restored policy
1       2
3       3
2       2
0       2
1       1
0       2
Same: False


In [126]:
# Does it work with sorting_indices?
print("policy  restored policy")
restored_policy = np.empty_like(policy)
for i in range(len(test_features)):
    restored_policy[i] = sorting_indices[i][sorted_policy[i]]
    print(policy[i], ' '*5, restored_policy[i])
print("Same:", np.all(policy == restored_policy))
# If True then it does work like that

policy  restored policy
1       1
3       3
2       2
0       0
1       1
0       0
Same: True


### Other Tests

In [129]:
# Test if sorting with sorting_features also works.
for i in range(len(test_features)):
    print(np.all(sorted_features[i] == test_features[i][sorting_indices[i]]))

True
True
True
True
True
True
