In [1]:
import gym
import numpy as np
import queueing_network

In [2]:
alpha = 0.8
nu = np.array([1.3, 1.2, 1.1, 1.0])
c = np.array([1, 4/3, 5/3, 2])
N = 10

In [3]:
config1 = {'nu':nu, 'alpha':alpha, 'c':c, 'N':N, 'starting_state': np.array([0, 0, 0, 0])}

In [4]:
env = gym.make('queueing_network:queueing_network-v0', config = config1)

In [5]:
print(env.state)
print(env.action_space)
print(env.observation_space)

[0 0 0 0]
MultiBinary(4)
MultiDiscrete(4)


In [38]:
print(env.step([0, 0, 0, 0]))
print('***')
env.state = np.array([1, 1, 1, 1])
print(env.step([0, 1, 0, 0]))

(array([1., 1., 1., 1.]), -1.111111111111111, False, {})
***
(array([1., 0., 2., 1.]), -1.111111111111111, False, {})


In [7]:
env.action_space.sample()

array([0, 0, 1, 1], dtype=int8)

In [41]:
import copy

def get_value(env):
    eps = 0.1
    value = np.zeros((N+1, N+1, N+1, N+1))
    for i_1 in range(N+1):
        for i_2 in range(N+1):
            for i_3 in range(N+1):
                for i_4 in range(N+1):
                    tot_reward = 0
                    average= 0
                    average_prev = 1
                    iter = 0
                    env.state = np.array([i_1, i_2, i_3, i_4])
                    while abs(average - average_prev) > eps:
                        average_prev = copy.deepcopy(average)
                        _, reward, _, _ = env.step(env.action_space.sample())
                        tot_reward += reward
                        iter = iter + 1
                        average = tot_reward/iter
                    value[i_1, i_2, i_3, i_4] = average
    return value
    


In [42]:
# value function
get_value(env)

array([[[[  0.        ,  -0.37037037,  -0.74074074, ...,  -3.05555556,
           -3.33333333,  -3.7037037 ],
         [ -0.40123457,  -0.67901235,  -1.04938272, ...,  -3.27160494,
           -3.7345679 ,  -4.01234568],
         [ -0.61728395,  -0.98765432,  -1.38888889, ...,  -3.58024691,
           -3.95061728,  -4.32098765],
         ...,
         [ -2.4691358 ,  -2.93209877,  -3.20987654, ...,  -5.43209877,
           -5.89506173,  -6.17283951],
         [ -2.77777778,  -3.14814815,  -3.51851852, ...,  -5.74074074,
           -6.11111111,  -6.48148148],
         [ -3.08641975,  -3.20987654,  -3.58024691, ...,  -5.80246914,
           -6.41975309,  -6.79012346]],

        [[ -0.27777778,  -0.61728395,  -1.08024691, ...,  -3.20987654,
           -3.58024691,  -3.95061728],
         [ -0.55555556,  -0.95679012,  -1.32716049, ...,  -3.54938272,
           -3.88888889,  -4.25925926],
         [ -0.95679012,  -1.2345679 ,  -1.69753086, ...,  -3.91975309,
           -4.22839506,  -4.56790

In [62]:
# evaluate random policy

def evaluate(env, numiters):

    value = np.zeros((N+1, N+1, N+1, N+1, numiters))
    mean = np.zeros((N+1, N+1, N+1, N+1))
    std = np.zeros((N+1, N+1, N+1, N+1))
                   
    for i in range(numiters):
        value[:, :, :, :, i] = get_value(env)
    
    for i_1 in range(N+1):
        for i_2 in range(N+1):
            for i_3 in range(N+1):
                for i_4 in range(N+1):
                    mean[i_1, i_2, i_3, i_4] = np.mean(value[i_1, i_2, i_3, i_4, :])
                    std[i_1, i_2, i_3, i_4] = np.std(value[i_1, i_2, i_3, i_4, :], dtype = np.float64)
    
    return mean, std
        


In [69]:
mean, std = evaluate(env, 10)

In [70]:
# mean
mean

array([[[[  0.        ,  -0.39814815,  -0.75925926, ...,  -2.9382716 ,
           -3.30864198,  -3.66358025],
         [ -0.33024691,  -0.69753086,  -1.02160494, ...,  -3.27160494,
           -3.66975309,  -3.97839506],
         [ -0.62962963,  -0.96296296,  -1.33641975, ...,  -3.57716049,
           -3.9372428 ,  -4.28703704],
         ...,
         [ -2.48765432,  -2.85493827,  -3.18055556, ...,  -5.41975309,
           -5.7962963 ,  -6.13271605],
         [ -2.79012346,  -3.12345679,  -3.54938272, ...,  -5.7191358 ,
           -6.11419753,  -6.49074074],
         [ -3.12037037,  -3.47839506,  -3.85802469, ...,  -6.01851852,
           -6.4382716 ,  -6.79938272]],

        [[ -0.26851852,  -0.62037037,  -0.97530864, ...,  -3.23765432,
           -3.62345679,  -3.92283951],
         [ -0.57098765,  -0.87860082,  -1.28395062, ...,  -3.53395062,
           -3.86604938,  -4.25925926],
         [ -0.88271605,  -1.25617284,  -1.63580247, ...,  -3.77983539,
           -4.1882716 ,  -4.54526

In [71]:
std

array([[[[0.00000000e+00, 4.24312564e-02, 3.70370370e-02, ...,
          7.40740741e-02, 7.40740741e-02, 1.06961256e-01],
         [3.66492040e-02, 3.70370370e-02, 9.90062650e-02, ...,
          4.44089210e-16, 3.50549898e-02, 9.60764347e-02],
         [2.82875043e-02, 7.40740741e-02, 7.56645103e-02, ...,
          8.88653089e-02, 7.61664363e-02, 9.60764347e-02],
         ...,
         [3.70370370e-02, 2.84553841e-02, 1.32113548e-01, ...,
          8.30470620e-02, 8.81657831e-02, 1.06961256e-01],
         [2.82875043e-02, 7.40740741e-02, 4.14086662e-02, ...,
          7.56645103e-02, 9.25925926e-03, 2.77777778e-02],
         [4.01234568e-02, 9.95818877e-02, 4.14086662e-02, ...,
          9.76011624e-02, 3.70370370e-02, 2.77777778e-02]],

        [[3.66492040e-02, 9.25925926e-03, 8.30470620e-02, ...,
          4.24312564e-02, 4.18662345e-02, 1.15028674e-01],
         [2.84553841e-02, 1.30313696e-01, 8.30470620e-02, ...,
          7.71604938e-02, 2.06509852e-01, 7.17219138e-02],
        