In [1]:
import copy

import numpy as np
from mdptoolbox import mdp

In [2]:
items = ['empty', 'white', 'blue', 'red']

In [3]:
from itertools import product
def all_repeat():
    """
    Computes all permutations of items with repetitions

    -> returns ['empty', 'empty', 'empty', 'empty'], ['empty', 'empty', 'empty', 'red'], ..., ['empty', 'white', 'blue', 'red']

    :return: permutations of items with repetitions
    """
    results = []
    for c in product(items, repeat = 4):
        comb = [[c[0], c[1]], [c[2], c[3]]]
        results.append(comb)
    return results

In [4]:
item_combinations = all_repeat()
print(item_combinations)

[[['empty', 'empty'], ['empty', 'empty']], [['empty', 'empty'], ['empty', 'white']], [['empty', 'empty'], ['empty', 'blue']], [['empty', 'empty'], ['empty', 'red']], [['empty', 'empty'], ['white', 'empty']], [['empty', 'empty'], ['white', 'white']], [['empty', 'empty'], ['white', 'blue']], [['empty', 'empty'], ['white', 'red']], [['empty', 'empty'], ['blue', 'empty']], [['empty', 'empty'], ['blue', 'white']], [['empty', 'empty'], ['blue', 'blue']], [['empty', 'empty'], ['blue', 'red']], [['empty', 'empty'], ['red', 'empty']], [['empty', 'empty'], ['red', 'white']], [['empty', 'empty'], ['red', 'blue']], [['empty', 'empty'], ['red', 'red']], [['empty', 'white'], ['empty', 'empty']], [['empty', 'white'], ['empty', 'white']], [['empty', 'white'], ['empty', 'blue']], [['empty', 'white'], ['empty', 'red']], [['empty', 'white'], ['white', 'empty']], [['empty', 'white'], ['white', 'white']], [['empty', 'white'], ['white', 'blue']], [['empty', 'white'], ['white', 'red']], [['empty', 'white'], 

In [5]:
def neighboured_fields(field_from: tuple, field_to: tuple) -> bool:
    """
    Indicates if the input fields are horizontally or vertically neighboured.

    :param field_from: coordinates of from-field.
    :param field_to: coordinates of to-field.
    :return: True if the fields are neighboured, False otherwise.
    """
    return np.linalg.norm(np.subtract(field_from, field_to)) <= 1

In [6]:
def num_key_neighbours(comb: list, key: str, field: tuple) -> int:
    result = 0
    y = field[0]
    x = field[1]

    for step in [[0, 1], [1, 0], [0, -1], [-1, 0]]:
        y += step[0]
        x += step[1]

        if 1 >= y >= 0 and 1 >= x >= 0 and comb[y][x] == key:
            result += 1

        y = field[0]
        x = field[1]

    return result



In [7]:
def impossible_store_transition(key: str, from_comb: list, to_comb: list, field: tuple):
    """
    Indicates if this store transition is impossible.

    Example:
        from-state: ['red']  ->  ['empty' 'blue']
                                 ['white' 'empty']

        to-state: ['empty' 'blue']
                  ['white' 'empty'] -> ['blue']

        is impossible, because 'red' needs to be stored somewhere.

    :param key: key of the item to be stored in the warehouse (in example: 'red').
    :param from_comb: from-state without the key as list (item to be stored, in example: [['empty', 'blue']
                                                                                          ['white', 'empty']]).
    :param to_comb: to-state without the key as list (item to be stored/restored, in example: [['empty', 'blue']
                                                                                               ['white', 'empty']]).
    :param field: coordinates the robot stands at.
    :return: True if the transition is impossible, False otherwise.
    """
    from_comb_temp = copy.deepcopy(from_comb)
    y = field[0]
    x = field[1]

    if from_comb_temp[y][x] == 'empty':
        from_comb_temp[y][x] = key

        if from_comb_temp == to_comb:
            return False
    return True

In [8]:
def impossible_restore_transition(key: str, from_comb: list, to_comb: list, field: tuple):
    """
    Indicates if this restore transition is impossible.

    Example:
        from-state: ['empty' 'blue']
                    ['white' 'empty'] -> ['blue']

        to-state: ['empty' 'blue']
                  ['white' 'empty'] -> ['white']

        is impossible, because some 'blue' needs to be restored.

    :param key: key of the item to be restored in the warehouse (in example: 'blue').
    :param from_comb: from-state without the key as list (item to be restored, in example: [['empty', 'blue']
                                                                                            ['white', 'empty']]).
    :param to_comb: to-state without the key as list (item to be stored/restored, in example: [['empty', 'blue']
                                                                                               ['white', 'empty']]).
    :param field: coordinates the robot stands at.
    :return: True if the transition is impossible, False otherwise.
    """
    from_comb_temp = copy.deepcopy(from_comb)
    y = field[0]
    x = field[1]

    if from_comb_temp[y][x] == key:
        from_comb_temp[y][x] = 'empty'

        if from_comb_temp == to_comb:
            return False
    return True


In [9]:
def store_transition_probability(from_comb: list, p_to: int, field: tuple):
    """
    Probability to transition from the from-state to the to-state via storing.

    Example:
        from-state: ['red']  ->  ['empty' 'blue']
                                 ['white' 'empty']

        to-state: ['red' 'blue']
                  ['white' 'empty'] -> ['blue']

        returns (1 / 2) * prob_blue
    :param from_comb: from-state without the key (item to be stored) as list (in example: [['empty', 'blue']
                                                                                           ['white', 'empty']]).
    :param p_to: probability of the store/restore operation in the to-state (in example: prob_blue).
    :param field: coordinates the robot stands at.
    :return: Transition probability of storing.
    """
    num_free_fields = num_key_neighbours(from_comb, 'empty', field)
    if num_free_fields == 0:
        return 0

    return p_to / (2 * num_free_fields)

In [10]:
def restore_transition_probability(key_from: str, from_comb: list, p_to: int, field: tuple):
    """
    Probability to transition from the from-state to the to-state via restoring.

    Example:
        from-state: ['red' 'blue']
                    ['white' 'empty'] -> ['red']

        to-state: ['empty' 'blue']
                  ['white' 'empty'] -> ['blue']

        returns (1 / 2) * prob_blue

    :param key_from: key (item to be restored) of from-state (in example: 'red').
    :param from_comb: from-state without the key (item to be restored) as list (in example: [['red', 'blue']
                                                                                             ['white', 'empty']]).
    :param p_to: probability of the store/restore operation in the to-state (in example: prob_blue).
    :param field: coordinates the robot stands at.
    :return: Transition probability of storing.
    """
    num_free_fields = num_key_neighbours(from_comb, key_from, field)
    if num_free_fields == 0:
        return 0

    return p_to / (2 * num_free_fields)

In [11]:
def get_store_transitions(key_from: str, from_comb: list, p_to: float, field: tuple) -> list:
    """
    Returns a list of all store transition probabilities with fixed from-state and fixed stored/restored item in the to-state.

    :param key_from: key of the item to be stored in the warehouse.
    :param from_comb: from-state without the key as list (item to be stored)
    :param p_to: probability of the store/restore operation in the to-state.
    :param field: coordinates the robot stands at.
    :return: List of store transition probabilities.
    """
    if from_comb[field[0]][field[1]] != 'empty':

    result = []
    for item in item_combinations:
        if impossible_store_transition(key_from, from_comb, item, field):
            result.append(0)
            continue

        result.append(1 / 6)
        # result.append(store_transition_probability(item, p_to, field))

    return result

In [12]:
def get_restore_transitions(key_from: str, from_comb: list, p_to: float, field: tuple) -> list:
    """
    Returns a list of all restore transition probabilities with fixed from-state and fixed stored/restored item in the to-state.

    :param key_from: key of the item to be restored in the warehouse.
    :param from_comb: from-state without the key as list (item to be stored)
    :param p_to: probability of the store/restore operation in the to-state.
    :param field: coordinates the robot stands at.
    :return: List of restore transition probabilities.
    """
    result = []
    for item in item_combinations:
        if impossible_restore_transition(key_from, from_comb, item, field):
            result.append(0)
            continue

        result.append(1 / 6)
        # result.append(restore_transition_probability(key_from, item, p_to, field))

    return result

In [47]:
def init_transitions(w, b, r) -> np.array:
    """
    Computes the transition probability matrix.

    :param w: probability that a white item is store/restored.
    :param b: probability that a blue item is store/restored.
    :param r: probability that a red item is store/restored.
    :return: transition probability matrix.
    """
    probabilities = {'white': w, 'blue': b, 'red': r}
    fields = [(0, 0), (0, 1), (1, 0), (1, 1)]

    result = []
    for field in fields:
        matrix = []
        for key_row, p_row in probabilities.items():
            for comb_row in item_combinations:
                line = []
                for key_column, p_column in probabilities.items():
                    store_transitions = get_store_transitions(key_from=key_row, from_comb=comb_row, p_to=p_column, field=field)

                    line += store_transitions + store_transitions

            for comb_row in item_combinations:
                line = []
                for key_column, p_column in probabilities.items():
                    restore_transitions = get_restore_transitions(key_from=key_row, from_comb=comb_row, p_to=p_column, field=field)

                    line += restore_transitions + restore_transitions

                matrix.append(line)

        result.append(matrix)

    return np.array(result)


In [46]:
# define store/restore probabilities for an item of a specific color (white, blue, red):
white = 0.2
blue = 0.7
red = 0.1

transitions_test = init_transitions(white, blue, red)
neu = np.sum(transitions_test, axis=2)

KeyboardInterrupt: 

In [34]:
print(item_combinations[64])
print(neu)

KeyboardInterrupt: 

In [20]:
import unittest
from numpy import testing

class TestTransition(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        cls.transitions = init_transitions(white, blue, red)

    def test_transition_matrix_shape(self):
        num_actions = 4
        num_states = np.power(4, 4) * 2 * 3
        self.assertEqual((num_actions, num_states, num_states), self.transitions.shape)

    def test_transition_matrix_sum_one(self):
        transitions_summed = np.sum(self.transitions, axis=2)
        testing.assert_array_equal(transitions_summed, np.ones_like(transitions_summed))

unittest.main(argv=[''], verbosity=2, exit=False)


test_impossible_store_transition_False (__main__.TestTransitionSubMethods) ... FAIL
test_impossible_store_transition_True (__main__.TestTransitionSubMethods) ... FAIL

FAIL: test_impossible_store_transition_False (__main__.TestTransitionSubMethods)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-16-14f55ad1c974>", line 8, in test_impossible_store_transition_False
    self.assertFalse(result)
AssertionError: True is not false

FAIL: test_impossible_store_transition_True (__main__.TestTransitionSubMethods)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-16-14f55ad1c974>", line 15, in test_impossible_store_transition_True
    self.assertTrue(result)
AssertionError: False is not true

----------------------------------------------------------------------
Ran 2 tests in 0.003s

FAILED (failures=2)


<unittest.main.TestProgram at 0x19d765d3790>

test_transition_matrix_shape (__main__.TestTransition) ... ok
test_transition_matrix_sum_one (__main__.TestTransition) ... FAIL
test_impossible_store_transition_False (__main__.TestTransitionSubMethods) ... FAIL
test_impossible_store_transition_True (__main__.TestTransitionSubMethods) ... FAIL

FAIL: test_transition_matrix_sum_one (__main__.TestTransition)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-20-a560f891b2f6>", line 17, in test_transition_matrix_sum_one
    testing.assert_array_equal(transitions_summed, np.ones_like(transitions_summed))
  File "c:\users\thorb\pycharmprojects\saki\ex4\venv\lib\site-packages\numpy\testing\_private\utils.py", line 934, in assert_array_equal
    assert_array_compare(operator.__eq__, x, y, err_msg=err_msg,
  File "c:\users\thorb\pycharmprojects\saki\ex4\venv\lib\site-packages\numpy\testing\_private\utils.py", line 844, in assert_array_compare
    raise AssertionError

<unittest.main.TestProgram at 0x19d762b2f10>

In [16]:
class TestTransitionSubMethods(unittest.TestCase):

    def test_impossible_store_transition_False(self):
        from_comb = [['empty', 'blue'], ['white', 'empty']]
        to_comb = [['red', 'blue'], ['white', 'empty']]
        field = (1, 0)
        result = impossible_store_transition('red', from_comb, to_comb, field)
        self.assertFalse(result)

    def test_impossible_store_transition_True(self):
        from_comb = [['empty', 'blue'], ['white', 'empty']]
        to_comb = [['red', 'blue'], ['white', 'empty']]
        field = (0, 0)
        result = impossible_store_transition('red', from_comb, to_comb, field)
        self.assertTrue(result)

#unittest.main(argv=[''], verbosity=2, exit=False)

In [17]:
process = mdp.PolicyIteration(transitions, reward, discount=0.9)

NameError: name 'transitions' is not defined

In [None]:
process.run()
process.V

In [None]:
import mdptoolbox.example
P, R = mdptoolbox.example.forest(is_sparse=True)
P[0].toarray()