In [1]:
import gym
from gym import spaces
import numpy as np 
import simpy 
import random
import pandas as pd

## Notes

There are apparently 5 types of spaces in OpenAI Gym. I will play around with them and decide which one of them make sense for my problem

## Spaces in OpenAI Gym

### Box

In [2]:
space_box = gym.spaces.Box(low = np.array([0,0]), high=np.array([1,1]),dtype=np.int)
space_box.sample()

array([0, 0])

### Discrete

In [24]:
space_discrete = gym.spaces.Discrete(500)

In [34]:
space_discrete.sample()

101

### Dict

In [5]:
space_dict = gym.spaces.Dict({"Machine1":gym.spaces.Discrete(2),"Machine2":gym.spaces.Discrete(2)})
space_dict.sample()

OrderedDict([('Machine1', 1), ('Machine2', 0)])

### Multi Discrete

In [6]:
space_multiDiscrete = gym.spaces.MultiDiscrete([2,2,2])

In [7]:
space_multiDiscrete.sample()

array([0, 0, 0])

### Multi Binary

In [8]:
space_multiBinary = gym.spaces.MultiBinary(3)

In [9]:
space_multiBinary.sample()

array([0, 1, 1], dtype=int8)

### Tuple

#### Tuple with Discrete

In [96]:
test_dict = dict()

In [130]:
observation_space = gym.spaces.Tuple((gym.spaces.Discrete(3),gym.spaces.Discrete(3),gym.spaces.Discrete(3)))

In [131]:
observation_space.sample()

(1, 1, 2)

In [151]:
test_dict[(observation_space.sample())] = 'hi'

In [152]:
for i in test_dict:
    print(i)

(1, 0, 1)
(1, 1, 1)
(0, 1, 1)
(0, 0, 1)
(0, 1, 0)
(1, 0, 0)
(0, 0, 0)
(1, 1, 0)
(2, 0, 1)
(0, 2, 0)
(1, 2, 1)
(2, 2, 1)
(2, 0, 0)
(2, 1, 0)
(1, 0, 2)
(1, 2, 0)


#### Tuple with MultiDiscrete - doesn't work

Note: 
- A single MultiDiscrete object cannot be put in a tuple. We have to include multiple of them. I don't know why this is the case though

In [85]:
observation_space = gym.spaces.Tuple((gym.spaces.Discrete(3),gym.spaces.Discrete(3)))

In [89]:
observation_space.sample()

(1, 2)

In [67]:
observation_space = gym.spaces.Tuple((gym.spaces.MultiDiscrete([2,2,2]),gym.spaces.MultiDiscrete([9,2,2])))

In [81]:
observation_space.sample()

(array([1, 1, 1]), array([5, 1, 1]))

In [90]:
test_dict = dict()
test_dict[(observation_space.sample())] = 'hi'

In [91]:
for i in test_dict:
    print(i)

(2, 2)


## Different action spaces

Just exploring different kinds of OpenAI spaces for my different kinds of action spaces

### Binary action space

I think I should use the `MultiBinary` action space

### Multi Discrete action space

I should use `MultiDiscrete` action space in this case

## Editing state space    

In [75]:
from gym.utils import seeding

In [77]:
np_random, seed = seeding.np_random(None)

In [79]:
np_random

RandomState(MT19937) at 0x119EE3258

Basically they are asking for a random value between -0.6 to -0.4 

In [89]:
state = np.array([np_random.uniform(low=-0.6, high=-0.4), 0])

In [139]:
type(state)

numpy.ndarray

How does the output relate to the observation space definition

In [94]:
min_position = -1.2
max_position = 0.6
max_speed = 0.07
goal_position = 0.5
goal_velocity = 10
force = 0.001
gravity = 0.0025
low = np.array(
            [min_position, -max_speed], dtype=np.float32
        )
high = np.array(
            [max_position, max_speed], dtype=np.float32
        )
viewer = None
action_space = spaces.Discrete(3)
observation_space = spaces.Box(
low, high, dtype=np.float32
        )

In [95]:
print(observation_space)

Box(-1.2000000476837158, 0.6000000238418579, (2,), float32)


In [138]:
type(observation_space.sample())

numpy.ndarray

How does the hospital simulation do it?

In [147]:
states = dict()
var1_dict = dict()

In [148]:
var1_dict['pos'] = 3
var1_dict['vel'] = 6


In [149]:
states['var1'] = var1_dict

In [150]:
states['var2'] = 5

In [151]:
[v for k,v in states.items()]

[{'pos': 3, 'vel': 6}, 5]

## Formulating the state space 

In [70]:
states = dict()

In [71]:
states['current_time'] = 3 # current simulation time
states['MC1_queue1_due'] = 20 # Due date of the first product in the queue at machine 1
states['MC1_queue1_type'] = 1 # Product type of the first product in the queue at machine 1
states['MC1_prod_type'] = 0 # Product type of the product inside machine 1

In [72]:
[v for k,v in states.items()]

[3, 20, 1, 0]

# Exploring the Taxi Environment

In [85]:
import gym

env = gym.make("Taxi-v3").env

env.render()

+---------+
|[34;1mR[0m: | : :G|
| : | : : |
| : : : : |
| | : | : |
|Y| :[43m [0m|[35mB[0m: |
+---------+



In [86]:
env.observation_space

Discrete(500)

In [87]:
action = env.action_space.sample()
env.step(action)

(443, -10, False, {'prob': 1.0})

## The encoding function (Multi Discrete to Discrete)

In [None]:
states['MC1_queue1_type'] = prod_req_1.prod_type # Product type of the first product in the queue at machine 1
            states['time left'] = MC1_queue1_due - self.envSimpy.now # +ve = good / -ve = bad
        else:
            #states['MC1_queue1_due'] = None
            states['MC1_queue1_type'] = None 
            states['time left'] = None
        
        # Getting details of the product inside the machine
        if sim.machine.users != []: #if there is someone using the machine
            states['MC1_prod_type']

Imagine that the possible time due values can be between -200 and 200

Now I have to encode them into 20 slots

How do I do that?

In [128]:
def time_encode(time):
    
    #time_encoded = (time-1)/20
    #return np.floor(time_encoded)

In [148]:
time_encode(200)

9.0

using np digitize

In [140]:
for i in range(-2,2,1):
    print(i)

-2
-1
0
1


In [145]:
np.array(range(-2,2,1))

array([-2, -1,  0,  1])

In [150]:
x = np.array([-1, 0.2, 0, -1.0, 1.6])
#bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0])
bins = np.array(range(-2,2,1))
inds = np.digitize(x, bins)

In [147]:
inds

array([2, 3, 3, 2, 4])

In [113]:
def encode(MC1_queue1_type, MC1_queue1_timeleft, MC1_prod_type):
    # (2) 200, 2
    i = MC1_queue1_type
    i *= 200
    i += MC1_queue1_timeleft
    i *= 2
    i += MC1_prod_type
    return i

In [159]:
def encode( obs):
        # (2), 2
        i = obs[0] 
        i *= 2
        
        i += obs[1]
        return i

In [162]:
obs = [0,0]
encode(obs)

0

In [187]:
min_val = -200
max_val = +200

def encode_new(obs):
    scaled = (obs - min_val) / (max_val - min_val)
    return np.ceil(scaled *10)


In [197]:
encode_new(161)

10.0