In [1]:
import os
from copy import deepcopy

from tqdm import tqdm
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

from Environment import Environment, make_one_hot, give_mapping
from Networks import UserActor, AsstActor, CentralizedCritic

In [2]:
def give_prev_steps(prev_steps_assist, steps):
    prev_steps_assist = [[0,0,0,0,-1,-1] for i in range(steps-1)]
    return prev_steps_assist

In [3]:
class Agent:
    def __init__(self):
        self.memory_len = 6
        self.user_actor = UserActor()
        self.asst_actor = AsstActor(self.memory_len)
        self.user_critic = CentralizedCritic(self.memory_len)
        self.asst_critic = CentralizedCritic(self.memory_len)
        self.optimizer_actors = tf.keras.optimizers.Adam(lr = 0.0001)
        self.optimizer_critic = tf.keras.optimizers.Adam(lr = 0.0002)
        self.huber_loss = tf.keras.losses.Huber()
        
        
        self.gamma = 0.90
        self.env = Environment()
        self.env.cells = np.array([[0, 0], [1, 1], [0, 1], [1, 0], [0.5, 0.3], [0.5, 0.7]])
        self.env_cell_mapping = give_mapping(self.env.cells)
        self.env_cell_mapping = self.env_cell_mapping[np.newaxis, :, :, np.newaxis]
        self.eps = 10e-6
    
    def learn(self):
        env = self.env
        max_steps = 40
        running_reward = 0
        reached = 0
        best = 0
        
        for epoch in tqdm(range(100000)):
            
            user_action_probs_history = []
            asst_action_probs_history = []
            user_critic_value_history = []
            asst_critic_value_history = []
            rewards_history = []
            returns = [] #Returns
            
            done = False
            episode_reward = 0
            start, dest = env.give_start_dest()
            ob_user = [start[0], start[1], dest[0], dest[1]]
            prev_steps_assist = []
            prev_steps_assist = give_prev_steps(prev_steps_assist, self.memory_len)
            step = 0
            episode_reward = 0
            
            with tf.GradientTape(persistent = True) as tape:
                while not done and step<max_steps:
                    curr_loc = ob_user[:2]
                    target_loc = ob_user[2:4]
                    step+=1
                    ob_user = np.array(ob_user)[np.newaxis]
                    user_probs = self.user_actor.model(ob_user)
                    user_action = np.random.choice(4, p=np.squeeze(user_probs))
                    user_action_probs_history.append(tf.math.log(user_probs[0, user_action]))

                    action_user_one_hot = make_one_hot(user_action, 4)

                    ob_assist = [action_user_one_hot + curr_loc] 
                    ob_assist = prev_steps_assist + ob_assist
                    ob_assist = np.array(ob_assist)[np.newaxis]
                    
                    asst_probs = self.asst_actor.model([ob_assist, self.env_cell_mapping])
                    asst_action = np.random.choice(4, p=np.squeeze(asst_probs))
                    asst_action_probs_history.append(tf.math.log(asst_probs[0, asst_action]))
                    
                    asst_output_one_hot = np.array(make_one_hot(asst_action, 4))[np.newaxis]
                    
                    user_critic_value = self.user_critic.model([ob_user, ob_assist, self.env_cell_mapping, asst_output_one_hot])
                    user_critic_value_history.append(user_critic_value)
                    
                    asst_critic_value = self.asst_critic.model([ob_user, ob_assist, self.env_cell_mapping, asst_output_one_hot])
                    asst_critic_value_history.append(asst_critic_value)
                    
                    new_loc, reward_user, reward_assist, done = self.env.step(user_action, asst_action + 1, target_loc, curr_loc)
                    
                    next_ob_user = new_loc[:]
                    next_ob_user = next_ob_user + target_loc

                    ob_user = next_ob_user
                    prev_steps_assist = np.squeeze(ob_assist).tolist()[1:]
                    
                    rewards_history.append(reward_user)
                    episode_reward+=reward_user
                    
                    if done:
                        reached += 1

                running_reward = 0.05 * episode_reward + (1 - 0.05) * running_reward

                discounted_sum = 0
                for r in rewards_history[::-1]:
                    discounted_sum = r + self.gamma * discounted_sum
                    returns.append(discounted_sum)
                returns.reverse()
                
                user_critic_losses = []
                asst_critic_losses = []
                user_losses = []
                asst_losses = []
                
                for log_prob_user, log_prob_asst, val_user, val_asst, ret in zip(user_action_probs_history, asst_action_probs_history, user_critic_value_history,\
                                                                 asst_critic_value_history, returns):
                    diff_user = ret - val_user
                    diff_asst = ret - val_asst
                    user_losses.append(-log_prob_user*diff_user)
                    asst_losses.append(-log_prob_asst*diff_asst)
                    user_critic_losses.append(self.huber_loss(tf.expand_dims(val_user, 0), tf.expand_dims(ret, 0)))
                    asst_critic_losses.append(self.huber_loss(tf.expand_dims(val_asst, 0), tf.expand_dims(ret, 0)))

                user_loss = sum(user_losses)
                asst_loss = sum(asst_losses)
                user_critic_loss = sum(user_critic_losses)
                asst_critic_loss = sum(asst_critic_losses)
                
            grads = tape.gradient(user_loss, self.user_actor.model.trainable_variables)
            self.optimizer_actors.apply_gradients(zip(grads, self.user_actor.model.trainable_variables))

            grads = tape.gradient(asst_loss, self.asst_actor.model.trainable_variables)
            self.optimizer_actors.apply_gradients(zip(grads, self.asst_actor.model.trainable_variables))

            grads = tape.gradient(user_critic_loss, self.user_critic.model.trainable_variables)
            self.optimizer_critic.apply_gradients(zip(grads, self.user_critic.model.trainable_variables))
            
            grads = tape.gradient(asst_critic_loss, self.asst_critic.model.trainable_variables)
            self.optimizer_critic.apply_gradients(zip(grads, self.asst_critic.model.trainable_variables))
            
            
            if epoch%100 == 0:
                print(running_reward)
            
            if epoch and epoch%100 == 0:
                print(reached)
                if reached>=best:
                    best = reached
                    tf.keras.models.save_model(self.user_actor.model, 'user.h5')
                    tf.keras.models.save_model(self.asst_actor.model, 'asst.h5')
                    tf.keras.models.save_model(self.user_critic.model, 'user_critic.h5')
                    tf.keras.models.save_model(self.asst_critic.model, 'asst_critic.h5')
                    
                reached = 0
                
                if epoch%200 == 0:
                    self.trial()
                    
                    if epoch%1000 == 0:
                        if epoch<=10000:
                            max_steps -= 2
                            max_steps = max(max_steps, 10)
                        
                        elif epoch%2000 == 0:
                            max_steps -= 1
                            max_steps = max(max_steps, 6)
                            
                        
    def trial(self):
        env = self.env
        max_steps = 10
        done = False
        episode_reward = 0
        start, dest = env.give_start_dest()
        ob_user = [start[0], start[1], dest[0], dest[1]]
        prev_steps_assist = []
        prev_steps_assist = give_prev_steps(prev_steps_assist, self.memory_len)
        step = 0
        episode_reward = 0

        while not done and step<max_steps:
            curr_loc = ob_user[:2]
            target_loc = ob_user[2:4]
            step+=1
            print(ob_user)

            ob_user = np.array(ob_user)[np.newaxis]
            user_probs = self.user_actor.model(ob_user)
            user_action = np.argmax(np.squeeze(user_probs))

            action_user_one_hot = make_one_hot(user_action, 4)

            ob_assist = [action_user_one_hot + curr_loc] 
            ob_assist = prev_steps_assist + ob_assist
            ob_assist = np.array(ob_assist)[np.newaxis]

            asst_probs = self.asst_actor.model([ob_assist, self.env_cell_mapping])
            print(asst_probs)
            asst_action = np.argmax(np.squeeze(asst_probs)) 
            new_loc, reward_user, reward_assist, done = self.env.step(user_action, asst_action + 1, target_loc, curr_loc)

            next_ob_user = new_loc[:]
            next_ob_user = next_ob_user + target_loc

            ob_user = next_ob_user
            prev_steps_assist = np.squeeze(ob_assist).tolist()[1:]
            episode_reward+=reward_user




In [4]:
agent = Agent()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 4)]          0                                            
__________________________________________________________________________________________________
tf_op_layer_strided_slice (Tens [(None, 2)]          0           input_1[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_strided_slice_1 (Te [(None, 2)]          0           input_1[0][0]                    
__________________________________________________________________________________________________
subtract (Subtract)             (None, 2)            0           tf_op_layer_strided_slice[0][0]  
                                                                 tf_op_layer_strided_sl

Model: "functional_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            [(None, 4)]          0                                            
__________________________________________________________________________________________________
tf_op_layer_strided_slice_4 (Te [(None, 2)]          0           input_8[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_strided_slice_5 (Te [(None, 2)]          0           input_8[0][0]                    
__________________________________________________________________________________________________
input_10 (InputLayer)           [(None, 11, 11, 1)]  0                                            
_______________________________________________________________________________________

In [5]:
agent.user_actor.model = tf.keras.models.load_model('user.h5')
agent.user_critic.model = tf.keras.models.load_model('user_critic.h5')
agent.asst_actor.model = tf.keras.models.load_model('asst.h5')
agent.asst_critic.model = tf.keras.models.load_model('asst_critic.h5')



In [None]:
agent.learn()

  0%|                                                                             | 1/50000 [00:01<24:49:27,  1.79s/it]

0.45
1
[0.5, 0.2, 0.5, 0.3]
tf.Tensor([[1.7136050e-05 3.2712461e-04 4.5490921e-02 9.5416474e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.6, 0.5, 0.3]
tf.Tensor([[7.3341413e-05 8.5770356e-04 1.5459463e-01 8.4447426e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.2, 0.5, 0.3]
tf.Tensor([[0.00565339 0.00794925 0.8386865  0.14771077]], shape=(1, 4), dtype=float32)
[0.5, 0.5, 0.5, 0.3]
tf.Tensor([[0.78679764 0.03370312 0.1764149  0.00308435]], shape=(1, 4), dtype=float32)
[0.5, 0.4, 0.5, 0.3]
tf.Tensor([[0.8144103  0.03006248 0.15306559 0.00246164]], shape=(1, 4), dtype=float32)


  0%|▏                                                                           | 101/50000 [00:30<4:43:45,  2.93it/s]

6.423996349419676
97


  0%|▎                                                                           | 201/50000 [01:00<4:33:34,  3.03it/s]

6.183978349447237
94
[0.6, 0.7, 0.5, 0.7]
tf.Tensor([[1.4370766e-05 2.7980792e-04 5.2383099e-02 9.4732273e-01]], shape=(1, 4), dtype=float32)
[0.2, 0.7, 0.5, 0.7]
tf.Tensor([[8.2657774e-05 8.2378363e-04 1.8370704e-01 8.1538653e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.7, 0.5, 0.7]
tf.Tensor([[0.003141   0.00435883 0.8592871  0.13321312]], shape=(1, 4), dtype=float32)
[0.3, 0.7, 0.5, 0.7]
tf.Tensor([[0.6806828  0.03242884 0.2828191  0.00406931]], shape=(1, 4), dtype=float32)
[0.4, 0.7, 0.5, 0.7]
tf.Tensor([[0.63198644 0.03616557 0.32667017 0.00517784]], shape=(1, 4), dtype=float32)


  1%|▍                                                                           | 301/50000 [01:28<3:43:52,  3.70it/s]

5.63598149589614
94


  1%|▌                                                                           | 401/50000 [01:57<3:54:49,  3.52it/s]

5.995673326337752
96
[0.3, 0.5, 0.0, 1.0]
tf.Tensor([[1.3032876e-05 2.8676621e-04 4.0505614e-02 9.5919454e-01]], shape=(1, 4), dtype=float32)
[0.3, 0.9, 0.0, 1.0]
tf.Tensor([[4.7237947e-05 6.6155317e-04 1.2367963e-01 8.7561160e-01]], shape=(1, 4), dtype=float32)
[0.0, 0.9, 0.0, 1.0]
tf.Tensor([[0.00257267 0.00548117 0.7872344  0.2047117 ]], shape=(1, 4), dtype=float32)


  1%|▊                                                                           | 501/50000 [02:26<4:44:39,  2.90it/s]

6.136269237359994
97


  1%|▉                                                                           | 601/50000 [02:54<3:30:40,  3.91it/s]

6.399794859281093
96
[0.0, 0.9, 0.5, 0.3]
tf.Tensor([[8.6151049e-06 2.0512415e-04 3.4125842e-02 9.6566039e-01]], shape=(1, 4), dtype=float32)
[0.0, 0.5, 0.5, 0.3]
tf.Tensor([[2.8775263e-05 4.6219624e-04 8.1837252e-02 9.1767186e-01]], shape=(1, 4), dtype=float32)
[0.4, 0.5, 0.5, 0.3]
tf.Tensor([[0.00089114 0.00331085 0.65603614 0.33976185]], shape=(1, 4), dtype=float32)
[0.4, 0.2, 0.5, 0.3]
tf.Tensor([[0.443354   0.04272919 0.5049878  0.00892903]], shape=(1, 4), dtype=float32)
[0.4, 0.5, 0.5, 0.3]
tf.Tensor([[0.39430696 0.03909872 0.5580722  0.0085221 ]], shape=(1, 4), dtype=float32)
[0.4, 0.2, 0.5, 0.3]
tf.Tensor([[0.5424393  0.04202941 0.40874994 0.0067814 ]], shape=(1, 4), dtype=float32)
[0.4, 0.3, 0.5, 0.3]
tf.Tensor([[0.4802938  0.04174106 0.4700308  0.00793434]], shape=(1, 4), dtype=float32)


  1%|█                                                                           | 701/50000 [03:24<3:14:23,  4.23it/s]

5.425504027915975
87


  2%|█▏                                                                          | 801/50000 [03:52<5:08:30,  2.66it/s]

4.256149921996609
88
[0.8, 0.3, 0.0, 1.0]
tf.Tensor([[8.4187413e-06 2.3335258e-04 3.6594521e-02 9.6316361e-01]], shape=(1, 4), dtype=float32)
[0.8, 0.7, 0.0, 1.0]
tf.Tensor([[2.8110002e-05 5.1475799e-04 1.1307668e-01 8.8638043e-01]], shape=(1, 4), dtype=float32)
[0.4, 0.7, 0.0, 1.0]
tf.Tensor([[0.00075672 0.00294126 0.7290528  0.26724926]], shape=(1, 4), dtype=float32)
[0.1, 0.7, 0.0, 1.0]
tf.Tensor([[0.37891272 0.03948717 0.5719449  0.0096553 ]], shape=(1, 4), dtype=float32)
[0.1, 1.0, 0.0, 1.0]
tf.Tensor([[0.30643588 0.03388141 0.6491903  0.01049236]], shape=(1, 4), dtype=float32)


  2%|█▎                                                                          | 901/50000 [04:20<3:14:12,  4.21it/s]

5.008356273588801
85


  2%|█▌                                                                         | 1002/50000 [04:46<4:29:57,  3.03it/s]

4.970837107623987
91
[0.1, 0.9, 0.5, 0.3]
tf.Tensor([[1.1217893e-05 2.9291044e-04 5.7156876e-02 9.4253898e-01]], shape=(1, 4), dtype=float32)
[0.1, 0.5, 0.5, 0.3]
tf.Tensor([[4.363774e-05 7.070353e-04 1.488518e-01 8.503975e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.5, 0.5, 0.3]
tf.Tensor([[0.00109169 0.00390113 0.78885055 0.20615669]], shape=(1, 4), dtype=float32)
[0.5, 0.2, 0.5, 0.3]
tf.Tensor([[0.4783724  0.05007333 0.4632573  0.008297  ]], shape=(1, 4), dtype=float32)


  2%|█▋                                                                         | 1101/50000 [05:16<6:32:29,  2.08it/s]

4.4193420779230985
91


  2%|█▊                                                                         | 1201/50000 [05:53<5:56:33,  2.28it/s]

5.881134864125527
94
[0.1, 0.3, 0.0, 1.0]
tf.Tensor([[1.940365e-05 4.360175e-04 8.044294e-02 9.191016e-01]], shape=(1, 4), dtype=float32)
[0.1, 0.7, 0.0, 1.0]
tf.Tensor([[9.0925139e-05 1.1492809e-03 2.3322460e-01 7.6553518e-01]], shape=(1, 4), dtype=float32)
[0.1, 1.0, 0.0, 1.0]
tf.Tensor([[0.00133256 0.00431439 0.781223   0.21313   ]], shape=(1, 4), dtype=float32)


  3%|█▉                                                                         | 1301/50000 [06:29<6:15:36,  2.16it/s]

6.11820340963909
95


  3%|██                                                                         | 1401/50000 [07:03<5:10:56,  2.60it/s]

5.642698014432099
95
[0.6, 0.2, 1.0, 1.0]
tf.Tensor([[3.8748141e-05 6.4422947e-04 1.8995056e-01 8.0936646e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.6, 1.0, 1.0]
tf.Tensor([[2.2547269e-04 1.6314149e-03 5.6031007e-01 4.3783307e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.9, 1.0, 1.0]
tf.Tensor([[0.00399985 0.00499558 0.92933446 0.06167011]], shape=(1, 4), dtype=float32)
[0.9, 0.9, 1.0, 1.0]
tf.Tensor([[0.6413272  0.04082758 0.31339633 0.00444885]], shape=(1, 4), dtype=float32)
[0.9, 1.0, 1.0, 1.0]
tf.Tensor([[0.6562338  0.03698175 0.30304667 0.00373784]], shape=(1, 4), dtype=float32)


  3%|██▎                                                                        | 1501/50000 [07:41<5:39:21,  2.38it/s]

4.903251292823162
90


  3%|██▍                                                                        | 1601/50000 [08:16<3:52:49,  3.46it/s]

6.573156340142563
95
[0.6, 0.7, 0.5, 0.3]
tf.Tensor([[1.7468479e-05 3.6822038e-04 1.0262487e-01 8.9698941e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.3, 0.5, 0.3]
tf.Tensor([[9.1563779e-05 9.7668939e-04 3.4675917e-01 6.5217263e-01]], shape=(1, 4), dtype=float32)
[0.2, 0.3, 0.5, 0.3]
tf.Tensor([[0.00222157 0.00415658 0.88188833 0.11173354]], shape=(1, 4), dtype=float32)


  3%|██▌                                                                        | 1701/50000 [08:50<4:15:11,  3.15it/s]

5.337344264332586
95


  4%|██▋                                                                        | 1801/50000 [09:27<4:10:00,  3.21it/s]

6.17984453872395
93
[0.3, 0.0, 1.0, 0.0]
tf.Tensor([[1.816197e-05 3.718488e-04 8.357247e-02 9.160375e-01]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 1.0, 0.0]
tf.Tensor([[5.2589792e-05 7.3197373e-04 1.8725419e-01 8.1196117e-01]], shape=(1, 4), dtype=float32)


  4%|██▊                                                                        | 1901/50000 [10:04<5:31:58,  2.41it/s]

6.0373809622736045
98


  4%|███                                                                        | 2000/50000 [10:39<4:27:37,  2.99it/s]

6.508499437543764
95
[0.3, 0.1, 1.0, 1.0]
tf.Tensor([[2.92737750e-05 5.12033817e-04 1.22713484e-01 8.76745224e-01]], shape=(1, 4), dtype=float32)
[0.3, 0.5, 1.0, 1.0]
tf.Tensor([[1.0409672e-04 1.1009526e-03 2.9167235e-01 7.0712256e-01]], shape=(1, 4), dtype=float32)
[0.7, 0.5, 1.0, 1.0]
tf.Tensor([[0.00263121 0.00522592 0.8637069  0.12843603]], shape=(1, 4), dtype=float32)
[0.7, 0.8, 1.0, 1.0]
tf.Tensor([[0.6249058  0.04598834 0.32308996 0.0060159 ]], shape=(1, 4), dtype=float32)
[0.8, 0.8, 1.0, 1.0]
tf.Tensor([[0.67317027 0.04164905 0.28043044 0.00475027]], shape=(1, 4), dtype=float32)
[0.9, 0.8, 1.0, 1.0]
tf.Tensor([[0.7589195  0.03823742 0.19938396 0.00345911]], shape=(1, 4), dtype=float32)
[0.9, 0.9, 1.0, 1.0]
tf.Tensor([[0.7408117  0.03641798 0.219435   0.00333535]], shape=(1, 4), dtype=float32)
[1.0, 0.9, 1.0, 1.0]
tf.Tensor([[0.781098   0.03521002 0.18080524 0.00288675]], shape=(1, 4), dtype=float32)

  4%|███                                                                        | 2001/50000 [10:39<4:32:42,  2.93it/s]




  4%|███▏                                                                       | 2100/50000 [11:15<4:02:36,  3.29it/s]

6.902985484150509
98


  4%|███▎                                                                       | 2202/50000 [11:53<4:15:47,  3.11it/s]

5.984142773595483
92
[0.9, 0.8, 0.5, 0.7]
tf.Tensor([[2.3314318e-05 4.2569023e-04 1.3046402e-01 8.6908692e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.8, 0.5, 0.7]
tf.Tensor([[1.3761707e-04 1.1748234e-03 4.5596239e-01 5.4272515e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.4, 0.5, 0.7]
tf.Tensor([[0.00634532 0.00569011 0.93517774 0.05278682]], shape=(1, 4), dtype=float32)


  5%|███▍                                                                       | 2301/50000 [12:48<4:27:54,  2.97it/s]

5.417420056641987
90


  5%|███▌                                                                       | 2401/50000 [13:30<4:06:20,  3.22it/s]

6.042792927776148
95
[0.5, 0.3, 0.0, 0.0]
tf.Tensor([[2.05933320e-05 4.07168438e-04 1.11889884e-01 8.87682378e-01]], shape=(1, 4), dtype=float32)
[0.1, 0.3, 0.0, 0.0]
tf.Tensor([[6.5087661e-05 8.3077600e-04 2.6465681e-01 7.3444736e-01]], shape=(1, 4), dtype=float32)
[0.1, 0.0, 0.0, 0.0]
tf.Tensor([[7.4048690e-04 2.7677934e-03 7.7312768e-01 2.2336398e-01]], shape=(1, 4), dtype=float32)


  5%|███▊                                                                       | 2501/50000 [14:22<7:11:20,  1.84it/s]

4.537102097581506
89


  5%|███▉                                                                       | 2601/50000 [15:02<5:04:57,  2.59it/s]

5.511446310657089
88
[0.0, 0.5, 1.0, 1.0]
tf.Tensor([[1.4319169e-05 3.1640308e-04 6.6056013e-02 9.3361318e-01]], shape=(1, 4), dtype=float32)
[0.4, 0.5, 1.0, 1.0]
tf.Tensor([[4.1769319e-05 6.3692214e-04 1.4464723e-01 8.5467404e-01]], shape=(1, 4), dtype=float32)
[0.8, 0.5, 1.0, 1.0]
tf.Tensor([[0.00087061 0.00350552 0.6754607  0.32016313]], shape=(1, 4), dtype=float32)
[0.8, 0.8, 1.0, 1.0]
tf.Tensor([[0.44708392 0.04692856 0.4960603  0.00992724]], shape=(1, 4), dtype=float32)
[0.8, 1.0, 1.0, 1.0]
tf.Tensor([[0.6480101  0.04062112 0.30620965 0.00515918]], shape=(1, 4), dtype=float32)
[0.9, 1.0, 1.0, 1.0]
tf.Tensor([[0.729502   0.03631851 0.23046497 0.00371445]], shape=(1, 4), dtype=float32)


  5%|████                                                                       | 2701/50000 [15:39<6:12:51,  2.11it/s]

5.470130970673889
94


  6%|████▏                                                                      | 2801/50000 [16:20<4:09:31,  3.15it/s]

6.559846747215834
92
[0.6, 0.3, 0.5, 0.7]
tf.Tensor([[2.4373168e-05 4.6226705e-04 1.0828754e-01 8.9122581e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.7, 0.5, 0.7]
tf.Tensor([[8.9455716e-05 9.8339794e-04 3.0624008e-01 6.9268703e-01]], shape=(1, 4), dtype=float32)
[0.2, 0.7, 0.5, 0.7]
tf.Tensor([[0.00230566 0.00438154 0.8656298  0.12768304]], shape=(1, 4), dtype=float32)


  6%|████▎                                                                      | 2901/50000 [16:57<5:39:14,  2.31it/s]

6.0225494815648695
94


  6%|████▌                                                                      | 3001/50000 [17:36<4:43:07,  2.77it/s]

5.836716455500226
93
[0.8, 0.5, 0.5, 0.7]
tf.Tensor([[2.0160598e-05 4.2028993e-04 1.1540179e-01 8.8415772e-01]], shape=(1, 4), dtype=float32)
[0.4, 0.5, 0.5, 0.7]
tf.Tensor([[1.2649222e-04 1.2232934e-03 3.6846638e-01 6.3018388e-01]], shape=(1, 4), dtype=float32)
[0.4, 0.9, 0.5, 0.7]
tf.Tensor([[0.00337084 0.00467401 0.91452736 0.07742789]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.7759556  0.03347284 0.18744119 0.00313035]], shape=(1, 4), dtype=float32)
[0.5, 0.6, 0.5, 0.7]
tf.Tensor([[0.7422948  0.04024782 0.21320936 0.00424805]], shape=(1, 4), dtype=float32)


  6%|████▋                                                                      | 3101/50000 [18:27<5:28:56,  2.38it/s]

5.820399003638214
93


  6%|████▊                                                                      | 3201/50000 [19:09<5:10:17,  2.51it/s]

5.668146555786746
93
[0.6, 0.6, 0.5, 0.7]
tf.Tensor([[1.4403883e-05 3.4117926e-04 7.5677834e-02 9.2396665e-01]], shape=(1, 4), dtype=float32)
[0.2, 0.6, 0.5, 0.7]
tf.Tensor([[5.9521568e-05 8.2667702e-04 1.9812980e-01 8.0098397e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.6, 0.5, 0.7]
tf.Tensor([[0.00120105 0.00355842 0.80201226 0.19322832]], shape=(1, 4), dtype=float32)
[0.3, 0.6, 0.5, 0.7]
tf.Tensor([[0.6571807  0.03928408 0.2982664  0.00526879]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.55449677 0.0449698  0.39284506 0.0076884 ]], shape=(1, 4), dtype=float32)
[0.5, 0.6, 0.5, 0.7]
tf.Tensor([[0.6655894  0.04421463 0.2844092  0.00578678]], shape=(1, 4), dtype=float32)


  7%|████▉                                                                      | 3301/50000 [19:54<5:15:16,  2.47it/s]

5.0398019310816595
92


  7%|█████                                                                      | 3401/50000 [20:33<6:00:03,  2.16it/s]

5.558335266842726
94
[0.3, 0.4, 0.0, 1.0]
tf.Tensor([[1.7785334e-05 3.9527472e-04 7.6254494e-02 9.2333239e-01]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.0, 1.0]
tf.Tensor([[5.0937782e-05 7.6660194e-04 1.8530458e-01 8.1387794e-01]], shape=(1, 4), dtype=float32)
[0.0, 0.8, 0.0, 1.0]
tf.Tensor([[0.00145024 0.00431233 0.77498865 0.21924885]], shape=(1, 4), dtype=float32)


  7%|█████▎                                                                     | 3501/50000 [21:10<4:26:22,  2.91it/s]

6.1191979022589535
91


  7%|█████▍                                                                     | 3601/50000 [21:47<4:28:49,  2.88it/s]

5.266992165969983
93
[0.1, 0.3, 0.5, 0.3]
tf.Tensor([[1.2223490e-05 2.9713311e-04 5.8810793e-02 9.4087982e-01]], shape=(1, 4), dtype=float32)


  7%|█████▌                                                                     | 3701/50000 [22:26<6:01:47,  2.13it/s]

4.926645371932264
87


  8%|█████▋                                                                     | 3801/50000 [23:03<5:50:23,  2.20it/s]

5.890400294289945
90
[0.1, 0.4, 1.0, 1.0]
tf.Tensor([[9.7389820e-06 2.7500340e-04 5.8189522e-02 9.4152570e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.4, 1.0, 1.0]
tf.Tensor([[3.4325596e-05 6.3914689e-04 1.4264397e-01 8.5668254e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.8, 1.0, 1.0]
tf.Tensor([[5.3914404e-04 2.9887005e-03 6.1905295e-01 3.7741920e-01]], shape=(1, 4), dtype=float32)
[0.8, 0.8, 1.0, 1.0]
tf.Tensor([[0.3756759  0.04522427 0.5674854  0.01161441]], shape=(1, 4), dtype=float32)
[0.8, 1.0, 1.0, 1.0]
tf.Tensor([[0.59342194 0.04104379 0.35946095 0.00607332]], shape=(1, 4), dtype=float32)
[0.9, 1.0, 1.0, 1.0]
tf.Tensor([[0.5504137  0.03910963 0.40431154 0.00616512]], shape=(1, 4), dtype=float32)


  8%|█████▊                                                                     | 3901/50000 [23:41<4:58:52,  2.57it/s]

6.250036019472507
95


  8%|██████                                                                     | 4001/50000 [24:19<7:24:17,  1.73it/s]

5.268409510955999
93
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[1.74776851e-05 4.23367834e-04 1.10649176e-01 8.88909996e-01]], shape=(1, 4), dtype=float32)
[0.4, 1.0, 0.5, 0.7]
tf.Tensor([[6.402526e-05 9.423449e-04 3.167510e-01 6.822426e-01]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.00265391 0.00468727 0.9059581  0.0867007 ]], shape=(1, 4), dtype=float32)
[0.4, 0.9, 0.5, 0.7]
tf.Tensor([[0.78271985 0.03198719 0.18245149 0.00284151]], shape=(1, 4), dtype=float32)
[0.4, 0.8, 0.5, 0.7]
tf.Tensor([[0.76448345 0.03344863 0.19881628 0.00325161]], shape=(1, 4), dtype=float32)
[0.5, 0.8, 0.5, 0.7]
tf.Tensor([[0.7450464  0.03343613 0.21828118 0.00323638]], shape=(1, 4), dtype=float32)


  8%|██████▏                                                                    | 4101/50000 [25:00<4:10:02,  3.06it/s]

5.909760450901096
89


  8%|██████▎                                                                    | 4201/50000 [25:44<5:22:20,  2.37it/s]

5.622288080102583
88
[0.9, 0.1, 0.5, 0.7]
tf.Tensor([[1.5530737e-05 4.2289449e-04 1.0232617e-01 8.9723539e-01]], shape=(1, 4), dtype=float32)
[0.9, 0.5, 0.5, 0.7]
tf.Tensor([[5.9131569e-05 9.3473942e-04 3.1329796e-01 6.8570811e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.5, 0.5, 0.7]
tf.Tensor([[0.00218335 0.00438483 0.90066487 0.09276696]], shape=(1, 4), dtype=float32)
[0.5, 0.8, 0.5, 0.7]
tf.Tensor([[0.7709488  0.03327521 0.19294153 0.00283442]], shape=(1, 4), dtype=float32)


  9%|██████▍                                                                    | 4301/50000 [26:21<4:51:03,  2.62it/s]

5.4143277691834415
91


  9%|██████▌                                                                    | 4401/50000 [27:00<6:19:38,  2.00it/s]

5.52075485301381
88
[0.6, 0.2, 1.0, 1.0]
tf.Tensor([[1.7302922e-05 4.3742196e-04 1.0508882e-01 8.9445639e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.6, 1.0, 1.0]
tf.Tensor([[7.7743665e-05 1.0969329e-03 2.9995289e-01 6.9887245e-01]], shape=(1, 4), dtype=float32)
[0.6, 1.0, 1.0, 1.0]
tf.Tensor([[0.00162455 0.00427517 0.8591944  0.13490592]], shape=(1, 4), dtype=float32)
[0.9, 1.0, 1.0, 1.0]
tf.Tensor([[0.68504804 0.03766036 0.27253297 0.00475855]], shape=(1, 4), dtype=float32)


  9%|██████▊                                                                    | 4501/50000 [27:37<3:25:10,  3.70it/s]

5.721854584800806
90


  9%|██████▉                                                                    | 4601/50000 [28:16<5:44:27,  2.20it/s]

4.524728490800009
86
[0.9, 0.5, 0.0, 1.0]
tf.Tensor([[2.0948992e-05 5.0384644e-04 1.8556835e-01 8.1390685e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.5, 0.0, 1.0]
tf.Tensor([[1.2405061e-04 1.2883088e-03 5.2097201e-01 4.7761562e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.8, 0.0, 1.0]
tf.Tensor([[0.0019459  0.00328318 0.9279067  0.06686419]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.0, 1.0]
tf.Tensor([[0.67809033 0.03345679 0.28435788 0.00409495]], shape=(1, 4), dtype=float32)
[0.1, 0.8, 0.0, 1.0]
tf.Tensor([[0.5819353  0.04005494 0.37169388 0.00631586]], shape=(1, 4), dtype=float32)
[0.1, 0.9, 0.0, 1.0]
tf.Tensor([[0.511326   0.03639404 0.44559634 0.00668356]], shape=(1, 4), dtype=float32)
[0.0, 0.9, 0.0, 1.0]
tf.Tensor([[0.47215283 0.04130096 0.47823465 0.00831157]], shape=(1, 4), dtype=float32)


  9%|███████                                                                    | 4701/50000 [28:53<4:27:06,  2.83it/s]

6.319646150227867
89


 10%|███████▏                                                                   | 4801/50000 [29:29<5:21:20,  2.34it/s]

5.458376540150059
90
[0.7, 0.7, 1.0, 0.0]
tf.Tensor([[2.2396385e-05 5.3637137e-04 2.0200993e-01 7.9743129e-01]], shape=(1, 4), dtype=float32)
[0.7, 0.3, 1.0, 0.0]
tf.Tensor([[9.3487935e-05 1.1416802e-03 4.8919058e-01 5.0957429e-01]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 1.0, 0.0]
tf.Tensor([[0.00187453 0.003136   0.94438154 0.05060789]], shape=(1, 4), dtype=float32)


 10%|███████▎                                                                   | 4901/50000 [30:07<4:28:56,  2.79it/s]

5.081729516973508
87


 10%|███████▌                                                                   | 5001/50000 [30:45<5:30:21,  2.27it/s]

4.257768798818991
86
[0.2, 0.7, 0.5, 0.3]
tf.Tensor([[2.8563210e-05 6.5556448e-04 2.5576407e-01 7.4355179e-01]], shape=(1, 4), dtype=float32)
[0.2, 0.3, 0.5, 0.3]
tf.Tensor([[1.0980243e-04 1.2833178e-03 5.3185987e-01 4.6674696e-01]], shape=(1, 4), dtype=float32)


 10%|███████▋                                                                   | 5102/50000 [31:24<3:51:40,  3.23it/s]

3.350948851748103
84


 10%|███████▊                                                                   | 5201/50000 [32:04<5:40:49,  2.19it/s]

4.190010821588713
85
[0.6, 0.3, 0.0, 1.0]
tf.Tensor([[3.2590327e-05 7.2475575e-04 3.1349310e-01 6.8574959e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.7, 0.0, 1.0]
tf.Tensor([[9.7959361e-05 1.1109703e-03 6.6011876e-01 3.3867231e-01]], shape=(1, 4), dtype=float32)
[0.3, 0.7, 0.0, 1.0]
tf.Tensor([[0.00149102 0.00276228 0.9507652  0.04498148]], shape=(1, 4), dtype=float32)
[0.3, 1.0, 0.0, 1.0]
tf.Tensor([[0.32956734 0.03032309 0.6334233  0.00668626]], shape=(1, 4), dtype=float32)


 11%|███████▉                                                                   | 5301/50000 [32:40<4:50:36,  2.56it/s]

5.878950047343044
80


 11%|████████                                                                   | 5401/50000 [33:19<5:31:24,  2.24it/s]

3.195412657981705
71
[0.6, 0.7, 0.0, 1.0]
tf.Tensor([[2.6425905e-05 6.4742181e-04 4.2709473e-01 5.7223147e-01]], shape=(1, 4), dtype=float32)
[0.2, 0.7, 0.0, 1.0]
tf.Tensor([[1.0799680e-04 1.0885664e-03 7.7451557e-01 2.2428776e-01]], shape=(1, 4), dtype=float32)
[0.2, 1.0, 0.0, 1.0]
tf.Tensor([[5.9811480e-04 1.6492555e-03 9.5450729e-01 4.3245278e-02]], shape=(1, 4), dtype=float32)


 11%|████████▎                                                                  | 5501/50000 [34:16<5:48:33,  2.13it/s]

3.2913145552453815
71


 11%|████████▍                                                                  | 5601/50000 [34:58<4:29:20,  2.75it/s]

3.2502353527889873
67
[0.9, 0.6, 0.0, 1.0]
tf.Tensor([[1.8191478e-05 5.3298124e-04 4.1443533e-01 5.8501357e-01]], shape=(1, 4), dtype=float32)
[0.5, 0.6, 0.0, 1.0]
tf.Tensor([[5.2324391e-05 7.4815220e-04 7.4499005e-01 2.5420952e-01]], shape=(1, 4), dtype=float32)
[0.2, 0.6, 0.0, 1.0]
tf.Tensor([[4.8457840e-04 1.4990981e-03 9.5601970e-01 4.1996583e-02]], shape=(1, 4), dtype=float32)
[0.2, 0.9, 0.0, 1.0]
tf.Tensor([[0.07329159 0.01506859 0.9036809  0.00795884]], shape=(1, 4), dtype=float32)
[0.0, 0.9, 0.0, 1.0]
tf.Tensor([[0.06227841 0.01618212 0.9114647  0.01007474]], shape=(1, 4), dtype=float32)


 11%|████████▌                                                                  | 5701/50000 [35:39<4:08:13,  2.97it/s]

3.80761937567752
71


 12%|████████▋                                                                  | 5801/50000 [36:15<4:53:04,  2.51it/s]

4.515596554537509
78
[0.1, 0.6, 0.0, 1.0]
tf.Tensor([[1.3873542e-05 5.1351555e-04 2.6181927e-01 7.3765337e-01]], shape=(1, 4), dtype=float32)
[0.1, 1.0, 0.0, 1.0]
tf.Tensor([[2.7810049e-05 7.1930693e-04 4.6587116e-01 5.3338176e-01]], shape=(1, 4), dtype=float32)


 12%|████████▊                                                                  | 5901/50000 [36:54<3:46:45,  3.24it/s]

4.558881977410344
72


 12%|█████████                                                                  | 6001/50000 [37:32<6:06:47,  2.00it/s]

4.071767699692206
73
[0.6, 0.0, 0.5, 0.3]
tf.Tensor([[1.4620608e-05 5.4168381e-04 3.4407461e-01 6.5536904e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.5, 0.3]
tf.Tensor([[2.7565420e-05 7.1513484e-04 6.0192007e-01 3.9733720e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.5, 0.3]
tf.Tensor([[2.2337311e-04 1.4764604e-03 9.2723572e-01 7.1064413e-02]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.5, 0.3]
tf.Tensor([[0.02639148 0.01149043 0.95268404 0.00943409]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.5, 0.3]
tf.Tensor([[0.03608496 0.01408555 0.9399959  0.0098336 ]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.5, 0.3]
tf.Tensor([[0.03025072 0.01229621 0.9482802  0.00917278]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.5, 0.3]
tf.Tensor([[0.03608496 0.01408555 0.9399959  0.0098336 ]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.5, 0.3]
tf.Tensor([[0.03025072 0.01229621 0.9482802  0.00917278]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.5, 0.3]
tf.Tensor([[0.03608496 0.01408555 0.9399959  0.009

 12%|█████████▏                                                                 | 6101/50000 [38:10<4:50:27,  2.52it/s]

2.4638000560694895
64


 12%|█████████▎                                                                 | 6201/50000 [38:46<4:18:52,  2.82it/s]

2.6075596814650113
66
[0.1, 0.0, 0.5, 0.7]
tf.Tensor([[2.1606264e-05 5.4758420e-04 6.4690012e-01 3.5253069e-01]], shape=(1, 4), dtype=float32)
[0.1, 0.3, 0.5, 0.7]
tf.Tensor([[4.9922019e-05 6.9314556e-04 8.4162199e-01 1.5763499e-01]], shape=(1, 4), dtype=float32)
[0.1, 0.6, 0.5, 0.7]
tf.Tensor([[1.7046949e-04 9.8509842e-04 9.5222682e-01 4.6617560e-02]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.00969194 0.00604259 0.9738845  0.01038101]], shape=(1, 4), dtype=float32)
[0.4, 0.9, 0.5, 0.7]
tf.Tensor([[0.01678313 0.00704725 0.96922785 0.00694186]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.02524672 0.00869233 0.959604   0.00645696]], shape=(1, 4), dtype=float32)
[0.4, 0.9, 0.5, 0.7]
tf.Tensor([[0.07443251 0.01372362 0.9075813  0.00426257]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.08346239 0.01521941 0.89660084 0.00471742]], shape=(1, 4), dtype=float32)
[0.4, 0.9, 0.5, 0.7]
tf.Tensor([[0.07443251 0.01372362 0.9075813  0.00

 13%|█████████▍                                                                 | 6301/50000 [39:24<4:29:41,  2.70it/s]

1.6114581138250184
65


 13%|█████████▌                                                                 | 6401/50000 [40:01<5:07:11,  2.37it/s]

3.202312359361203
69
[0.6, 0.5, 0.5, 0.3]
tf.Tensor([[1.3225466e-05 3.7501470e-04 7.4123418e-01 2.5837761e-01]], shape=(1, 4), dtype=float32)
[0.6, 0.2, 0.5, 0.3]
tf.Tensor([[3.5078730e-05 4.5256002e-04 9.1655123e-01 8.2961082e-02]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.5, 0.3]
tf.Tensor([[1.6092695e-04 5.9388694e-04 9.8403370e-01 1.5211389e-02]], shape=(1, 4), dtype=float32)
[0.6, 0.2, 0.5, 0.3]
tf.Tensor([[0.02282301 0.00714149 0.965424   0.00461145]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.5, 0.3]
tf.Tensor([[0.01913481 0.0061729  0.9704901  0.0042022 ]], shape=(1, 4), dtype=float32)
[0.6, 0.2, 0.5, 0.3]
tf.Tensor([[0.02282301 0.00714149 0.965424   0.00461145]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.5, 0.3]
tf.Tensor([[0.01913481 0.0061729  0.9704901  0.0042022 ]], shape=(1, 4), dtype=float32)
[0.6, 0.2, 0.5, 0.3]
tf.Tensor([[0.02282301 0.00714149 0.965424   0.00461145]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.5, 0.3]
tf.Tensor([[0.01913481 0.0061729  0.9704901  0.004

 13%|█████████▊                                                                 | 6501/50000 [40:39<4:51:05,  2.49it/s]

3.0677616081677166
70


 13%|█████████▉                                                                 | 6601/50000 [41:16<4:22:54,  2.75it/s]

1.7292520779299188
61
[0.7, 0.0, 0.5, 0.7]
tf.Tensor([[1.7542778e-05 3.7179844e-04 8.1407219e-01 1.8553840e-01]], shape=(1, 4), dtype=float32)
[0.7, 0.3, 0.5, 0.7]
tf.Tensor([[3.7599271e-05 4.0517206e-04 9.3998611e-01 5.9571195e-02]], shape=(1, 4), dtype=float32)
[0.7, 0.6, 0.5, 0.7]
tf.Tensor([[1.5105920e-04 5.2041002e-04 9.8357624e-01 1.5752288e-02]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.01402522 0.00487946 0.9762607  0.00483468]], shape=(1, 4), dtype=float32)
[0.4, 0.9, 0.5, 0.7]
tf.Tensor([[0.01751131 0.00495166 0.9738167  0.0037203 ]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.02241521 0.00597049 0.9675905  0.00402385]], shape=(1, 4), dtype=float32)
[0.4, 0.9, 0.5, 0.7]
tf.Tensor([[0.02909403 0.00701473 0.96019846 0.00369274]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.03265835 0.00780219 0.95549446 0.00404496]], shape=(1, 4), dtype=float32)
[0.4, 0.9, 0.5, 0.7]
tf.Tensor([[0.02909403 0.00701473 0.96019846 0.00

 13%|██████████                                                                 | 6701/50000 [41:53<3:51:11,  3.12it/s]

2.178751788180612
56


 14%|██████████▏                                                                | 6801/50000 [42:30<4:00:24,  2.99it/s]

3.64878778402507
68
[0.7, 0.3, 1.0, 1.0]
tf.Tensor([[1.4897828e-05 4.2304996e-04 6.5002078e-01 3.4954125e-01]], shape=(1, 4), dtype=float32)
[0.7, 0.6, 1.0, 1.0]
tf.Tensor([[3.3437915e-05 5.0116115e-04 8.7053800e-01 1.2892747e-01]], shape=(1, 4), dtype=float32)
[0.7, 0.9, 1.0, 1.0]
tf.Tensor([[1.4717055e-04 7.2401797e-04 9.6744812e-01 3.1680658e-02]], shape=(1, 4), dtype=float32)
[1.0, 0.9, 1.0, 1.0]
tf.Tensor([[0.0123168  0.00524227 0.97585547 0.00658537]], shape=(1, 4), dtype=float32)


 14%|██████████▏                                                                | 6828/50000 [42:39<4:03:03,  2.96it/s]

In [None]:
agent.trial()