In [1]:
import numpy as np

---
## Defining the warhouse model
Adding rewards and punishments

<img src="https://github.com/LarsTinnefeld/mTSP_warehouse_picking/blob/main/wh_grid.PNG?raw=true" width="600" height="600">

In [2]:
# Define a 3-dimensional matrix:
# A location in which an operator can be on a 2-dimensional WH footprint and 4 movements

# WH footprint
wh_rows = 15
wh_cols = 15

# Operator movements:
# 0 = up, 1 = right, 2 = down, 3 = left
movements =  ['up', 'right', 'down', 'left']

wh_states = np.zeros((wh_rows, wh_cols, len(movements)))

In [3]:
wh_states

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        

In [4]:
# Setting all values to -100 before changing paths to -1
rewards = np.full((wh_rows, wh_cols), -100)

In [5]:
# Defining aisles
aisles = {}
aisles[0] = [i for i in range(1, 16)]
aisles[1] = [i for i in range(0, 16)]
aisles[2] = [0, 1, 4, 7, 10, 13]
aisles[3] = [0, 1, 4, 7, 10, 13]
aisles[4] = [0, 1, 4, 7, 10, 13]
aisles[5] = [0, 1, 4, 7, 10, 13]
aisles[6] = [0, 1, 4, 7, 10, 13]
aisles[7] = [i for i in range(0, 16)]
aisles[8] = [0, 1, 4, 7, 10, 13]
aisles[9] = [0, 1, 4, 7, 10, 13]
aisles[10] = [0, 1, 4, 7, 10, 13]
aisles[11] = [0, 1, 4, 7, 10, 13]
aisles[12] = [0, 1, 4, 7, 10, 13]
aisles[13] = [i for i in range(0, 16)]
aisles[14] = [i for i in range(1, 16)]

In [6]:
aisles

{0: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
 1: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
 2: [0, 1, 4, 7, 10, 13],
 3: [0, 1, 4, 7, 10, 13],
 4: [0, 1, 4, 7, 10, 13],
 5: [0, 1, 4, 7, 10, 13],
 6: [0, 1, 4, 7, 10, 13],
 7: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
 8: [0, 1, 4, 7, 10, 13],
 9: [0, 1, 4, 7, 10, 13],
 10: [0, 1, 4, 7, 10, 13],
 11: [0, 1, 4, 7, 10, 13],
 12: [0, 1, 4, 7, 10, 13],
 13: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
 14: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]}

In [42]:
# Adding aisles
for i in range(len(aisles)):
    for j in aisles[i]:
        rewards[i, j-1] = -1

In [43]:
rewards

array([[  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
          -1,   -1,   -1,   -1],
       [  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
          -1,   -1,   -1,   -1],
       [  -1, -100, -100,   -1, -100, -100,   -1, -100, -100,   -1, -100,
        -100,   -1, -100,   -1],
       [  -1, -100, -100,   -1, -100, -100,   -1, -100, -100,   -1, -100,
        -100,   -1, -100,   -1],
       [  -1, -100, -100,   -1, -100, -100,   -1, -100, -100,   -1, -100,
        -100,   -1, -100,   -1],
       [  -1, -100, -100,   -1, -100, -100,   -1, -100, -100,   -1, -100,
        -100,   -1, -100,   -1],
       [  -1, -100, -100,   -1, -100, -100,   -1, -100, -100,   -1, -100,
        -100,   -1, -100,   -1],
       [  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
          -1,   -1,   -1,   -1],
       [  -1, -100, -100,   -1, -100, -100,   -1, -100, -100,   -1, -100,
        -100,   -1, -100,   -1],
       [  -1, -100, -100,   -1, -100,

In [44]:
# Adding start- and end position
rewards[0, 0] = 0
rewards[14, 0] = 100

The warehouse is now set up.

---
## Generating the virtual operator
I will start with a simple example:
- One operator
- Random starting point
- No pick orders
- Learn to find optimal way