In [1]:
import os
from copy import deepcopy

from tqdm import tqdm
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

from Environment import Environment, make_one_hot, give_mapping
from Networks import UserActor, AsstActor, CentralizedCritic

In [2]:
def give_prev_steps(prev_steps_assist, steps):
    prev_steps_assist = [[0,0,0,0,-1,-1] for i in range(steps-1)]
    return prev_steps_assist

In [3]:
class Agent:
    def __init__(self):
        self.user_actor = UserActor()
        self.asst_actor = AsstActor()
        self.critic = CentralizedCritic()
        self.optimizer_actors = tf.keras.optimizers.Adam(lr = 0.0001)
        self.optimizer_critic = tf.keras.optimizers.Adam(lr = 0.0002)
        self.huber_loss = tf.keras.losses.Huber()
        self.memory_len = 4
        
        self.gamma = 0.90
        self.env = Environment()
        self.env.cells = np.array([[0.7, 0.1], [0.1, 0.1], [0.5, 0.7], [0.6, 0.2], [0.7, 0.4], [0.2, 0.9]])
        self.env_cell_mapping = give_mapping(self.env.cells)
        self.env_cell_mapping = self.env_cell_mapping[np.newaxis, :, :, np.newaxis]
        self.eps = 10e-6
    
    def learn(self):
        env = self.env
        max_steps = 40
        running_reward = 0
        reached = 0
        best = 0
        
        for epoch in tqdm(range(10000)):
            
            user_action_probs_history = []
            asst_action_probs_history = []
            critic_value_history = []
            rewards_history = []
            returns = [] #Returns
            
            done = False
            episode_reward = 0
            start, dest = env.give_start_dest()
            ob_user = [start[0], start[1], dest[0], dest[1]]
            prev_steps_assist = []
            prev_steps_assist = give_prev_steps(prev_steps_assist, self.memory_len)
            step = 0
            episode_reward = 0
            
            with tf.GradientTape(persistent = True) as tape:
                while not done and step<max_steps:
                    curr_loc = ob_user[:2]
                    target_loc = ob_user[2:4]
                    step+=1
                    ob_user = np.array(ob_user)[np.newaxis]
                    user_probs = self.user_actor.model(ob_user)
                    user_action = np.random.choice(4, p=np.squeeze(user_probs))
                    user_action_probs_history.append(tf.math.log(user_probs[0, user_action]))

                    action_user_one_hot = make_one_hot(user_action, 4)

                    ob_assist = [action_user_one_hot + curr_loc] 
                    ob_assist = prev_steps_assist + ob_assist
                    ob_assist = np.array(ob_assist)[np.newaxis]
                    
                    asst_probs = self.asst_actor.model([ob_assist, self.env_cell_mapping])
                    asst_action = np.random.choice(4, p=np.squeeze(asst_probs))
                    asst_action_probs_history.append(tf.math.log(asst_probs[0, asst_action]))
                    
                    asst_output_one_hot = np.array(make_one_hot(asst_action, 4))[np.newaxis]
                    
                    critic_value = self.critic.model([ob_user, ob_assist, self.env_cell_mapping, asst_output_one_hot])
                    critic_value_history.append(critic_value)
                    
                    new_loc, reward_user, reward_assist, done = self.env.step(user_action, asst_action + 1, target_loc, curr_loc)
                    
                    next_ob_user = new_loc[:]
                    next_ob_user = next_ob_user + target_loc

                    ob_user = next_ob_user
                    prev_steps_assist = np.squeeze(ob_assist).tolist()[1:]
                    
                    rewards_history.append(reward_user)
                    episode_reward+=reward_user
                    
                    if done:
                        reached += 1

                running_reward = 0.05 * episode_reward + (1 - 0.05) * running_reward

                discounted_sum = 0
                for r in rewards_history[::-1]:
                    discounted_sum = r + self.gamma * discounted_sum
                    returns.append(discounted_sum)
                returns.reverse()
                
                critic_losses = []
                user_losses = []
                asst_losses = []
                
                for log_prob_user, log_prob_asst, val, ret in zip(user_action_probs_history, asst_action_probs_history, critic_value_history,\
                                                                 returns):
                    diff = ret - val
                    user_losses.append(-log_prob_user*diff)
                    asst_losses.append(-log_prob_asst*diff)
                    critic_losses.append(self.huber_loss(tf.expand_dims(val, 0), tf.expand_dims(ret, 0)))

                user_loss = sum(user_losses)
                asst_loss = sum(asst_losses)
                critic_loss = sum(critic_losses)

            grads = tape.gradient(user_loss, self.user_actor.model.trainable_variables)
            self.optimizer_actors.apply_gradients(zip(grads, self.user_actor.model.trainable_variables))

            grads = tape.gradient(asst_loss, self.asst_actor.model.trainable_variables)
            self.optimizer_actors.apply_gradients(zip(grads, self.asst_actor.model.trainable_variables))

            grads = tape.gradient(critic_loss, self.critic.model.trainable_variables)
            self.optimizer_critic.apply_gradients(zip(grads, self.critic.model.trainable_variables))
            
            if epoch%100 == 0:
                print(running_reward)
            
            if epoch and epoch%100 == 0:
                print(reached)
                if reached>=best:
                    best = reached
                    tf.keras.models.save_model(self.user_actor.model, 'user.h5')
                    tf.keras.models.save_model(self.asst_actor.model, 'asst.h5')
                    tf.keras.models.save_model(self.critic.model, 'critic.h5')
                reached = 0
                
                if epoch%200:
                    self.trial()
                
                
    def trial(self):
        env = self.env
        max_steps = 20
        done = False
        episode_reward = 0
        start, dest = env.give_start_dest()
        ob_user = [start[0], start[1], dest[0], dest[1]]
        prev_steps_assist = []
        prev_steps_assist = give_prev_steps(prev_steps_assist, self.memory_len)
        step = 0
        episode_reward = 0

        while not done and step<max_steps:
            curr_loc = ob_user[:2]
            target_loc = ob_user[2:4]
            step+=1
            print(ob_user)

            ob_user = np.array(ob_user)[np.newaxis]
            user_probs = self.user_actor.model(ob_user)
            user_action = np.argmax(np.squeeze(user_probs))

            action_user_one_hot = make_one_hot(user_action, 4)

            ob_assist = [action_user_one_hot + curr_loc] 
            ob_assist = prev_steps_assist + ob_assist
            ob_assist = np.array(ob_assist)[np.newaxis]

            asst_probs = self.asst_actor.model([ob_assist, self.env_cell_mapping])
            print(asst_probs)
            asst_action = np.argmax(np.squeeze(asst_probs)) 
            new_loc, reward_user, reward_assist, done = self.env.step(user_action, asst_action + 1, target_loc, curr_loc)

            next_ob_user = new_loc[:]
            next_ob_user = next_ob_user + target_loc

            ob_user = next_ob_user
            prev_steps_assist = np.squeeze(ob_assist).tolist()[1:]
            episode_reward+=reward_user




In [4]:
agent = Agent()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 4)]          0                                            
__________________________________________________________________________________________________
tf_op_layer_strided_slice (Tens [(None, 2)]          0           input_1[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_strided_slice_1 (Te [(None, 2)]          0           input_1[0][0]                    
__________________________________________________________________________________________________
subtract (Subtract)             (None, 2)            0           tf_op_layer_strided_slice[0][0]  
                                                                 tf_op_layer_strided_sl

In [5]:
agent.learn()

  0%|                                                                             | 1/10000 [00:04<13:40:42,  4.92s/it]

-2.0


  1%|▊                                                                           | 100/10000 [03:19<6:21:29,  2.31s/it]

-37.49327097516518
15
[0.8, 0.0, 0.6, 0.2]
tf.Tensor([[0.18986392 0.29892808 0.2663672  0.24484079]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.18842435 0.2995277  0.25923106 0.2528169 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.1873081  0.30114833 0.25204197 0.25950158]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.18759032 0.30375895 0.24658625 0.26206455]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.19079363 0.30227187 0.24208172 0.26485276]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.19079363 0.30227187 0.24208172 0.26485276]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.19079363 0.30227187 0.24208172 0.26485276]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.19079363 0.30227187 0.24208172 0.26485276]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.19079363 0.30227187 0.24208172 0.26485276]], shape=(1, 4), dtype=float3

  1%|▊                                                                           | 101/10000 [03:21<6:39:07,  2.42s/it]


[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.19079363 0.30227187 0.24208172 0.26485276]], shape=(1, 4), dtype=float32)


  2%|█▌                                                                          | 201/10000 [06:51<5:23:27,  1.98s/it]

-35.97049159348055
12


  3%|██▎                                                                         | 301/10000 [10:07<5:50:14,  2.17s/it]

-35.49214107690794
17
[0.0, 0.9, 0.2, 0.9]
tf.Tensor([[0.20995502 0.27653995 0.24737939 0.26612565]], shape=(1, 4), dtype=float32)


  4%|███                                                                         | 401/10000 [13:47<7:03:00,  2.64s/it]

-37.50275126392293
12


  5%|███▊                                                                        | 500/10000 [17:35<6:04:18,  2.30s/it]

-34.837018191786576
16
[0.0, 0.5, 0.7, 0.1]
tf.Tensor([[0.23849665 0.24340251 0.21001232 0.30808854]], shape=(1, 4), dtype=float32)
[0.4, 0.5, 0.7, 0.1]
tf.Tensor([[0.23905069 0.24118504 0.19990687 0.3198574 ]], shape=(1, 4), dtype=float32)
[0.8, 0.5, 0.7, 0.1]
tf.Tensor([[0.24043088 0.24118371 0.18842506 0.32996035]], shape=(1, 4), dtype=float32)
[1.0, 0.5, 0.7, 0.1]
tf.Tensor([[0.25798836 0.22985561 0.16878627 0.34336978]], shape=(1, 4), dtype=float32)
[1.0, 0.1, 0.7, 0.1]
tf.Tensor([[0.26922005 0.22439195 0.15838279 0.34800515]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.7, 0.1]
tf.Tensor([[0.27348903 0.22243786 0.15253283 0.3515403 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.7, 0.1]
tf.Tensor([[0.2740605  0.22202376 0.14954613 0.35436958]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.7, 0.1]
tf.Tensor([[0.27284527 0.22276255 0.15072134 0.3536709 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.7, 0.1]
tf.Tensor([[0.2726093  0.222894   0.150944   0.35355267]], shape=(1, 4), dtype=float

  5%|███▊                                                                        | 501/10000 [17:37<6:17:09,  2.38s/it]

tf.Tensor([[0.2726093  0.222894   0.150944   0.35355267]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.7, 0.1]
tf.Tensor([[0.2726093  0.222894   0.150944   0.35355267]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.7, 0.1]
tf.Tensor([[0.2726093  0.222894   0.150944   0.35355267]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.7, 0.1]
tf.Tensor([[0.2726093  0.222894   0.150944   0.35355267]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.7, 0.1]
tf.Tensor([[0.2726093  0.222894   0.150944   0.35355267]], shape=(1, 4), dtype=float32)


  6%|████▌                                                                       | 601/10000 [21:28<6:55:56,  2.66s/it]

-35.208542079446225
12


  7%|█████▎                                                                      | 700/10000 [25:37<5:48:15,  2.25s/it]

-33.68147404869147
18
[0.4, 0.2, 0.7, 0.4]
tf.Tensor([[0.27236396 0.22067578 0.17109503 0.33586526]], shape=(1, 4), dtype=float32)
[0.8, 0.2, 0.7, 0.4]
tf.Tensor([[0.28294662 0.21302882 0.15579407 0.3482305 ]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.29509026 0.20502251 0.1410467  0.3588405 ]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.30385306 0.19877636 0.13028267 0.36708793]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.3084647  0.19571958 0.12658523 0.3692305 ]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.3100389  0.19462843 0.12538464 0.36994806]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.3100389  0.19462843 0.12538464 0.36994806]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.3100389  0.19462843 0.12538464 0.36994806]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.3100389  0.19462843 0.12538464 0.36994806]], shape=(1, 4), dtype=float3

  7%|█████▎                                                                      | 701/10000 [25:40<6:23:19,  2.47s/it]


[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.3100389  0.19462843 0.12538464 0.36994806]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.3100389  0.19462843 0.12538464 0.36994806]], shape=(1, 4), dtype=float32)


  8%|██████                                                                      | 801/10000 [29:22<5:53:24,  2.31s/it]

-34.862767386433134
21


  9%|██████▊                                                                     | 900/10000 [33:25<5:42:18,  2.26s/it]

-34.40814688276347
16
[0.8, 0.0, 0.5, 0.7]
tf.Tensor([[0.28272593 0.21243656 0.16492987 0.3399077 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.29455695 0.20473795 0.14973927 0.3509659 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.3048916  0.19810036 0.13793094 0.35907716]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.3116275  0.19310655 0.12952793 0.36573806]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float3

  9%|██████▊                                                                     | 901/10000 [33:28<6:13:04,  2.46s/it]


[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.31296793 0.19211291 0.12837976 0.36653936]], shape=(1, 4), dtype=float32)


 10%|███████▌                                                                   | 1001/10000 [37:35<4:06:49,  1.65s/it]

-32.44079517260361
19


 11%|████████▎                                                                  | 1100/10000 [40:55<4:28:38,  1.81s/it]

-28.928363572672783
17
[0.5, 0.3, 0.2, 0.9]
tf.Tensor([[0.29462188 0.20548227 0.15410456 0.34579128]], shape=(1, 4), dtype=float32)
[0.9, 0.3, 0.2, 0.9]
tf.Tensor([[0.30764762 0.19682476 0.13970444 0.35582322]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.3198204  0.18882    0.12743989 0.3639197 ]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33067656 0.18140373 0.11775099 0.37016872]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33453998 0.17866904 0.11471147 0.37207955]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33528996 0.178119   0.11411612 0.3724749 ]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33528996 0.178119   0.11411612 0.3724749 ]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33528996 0.178119   0.11411612 0.3724749 ]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33528996 0.178119   0.11411612 0.3724749 ]], shape=(1, 4), dtype=float

 11%|████████▎                                                                  | 1101/10000 [40:55<3:23:01,  1.37s/it]


[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33528996 0.178119   0.11411612 0.3724749 ]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33528996 0.178119   0.11411612 0.3724749 ]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33528996 0.178119   0.11411612 0.3724749 ]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33528996 0.178119   0.11411612 0.3724749 ]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.2, 0.9]
tf.Tensor([[0.33528996 0.178119   0.11411612 0.3724749 ]], shape=(1, 4), dtype=float32)


 12%|█████████                                                                  | 1200/10000 [44:34<4:37:39,  1.89s/it]

-31.71424480550614
21


 13%|█████████▊                                                                 | 1300/10000 [48:31<5:37:35,  2.33s/it]

-34.32225068820223
22
[0.5, 0.9, 0.6, 0.2]
tf.Tensor([[0.33672783 0.18189426 0.12719138 0.3541865 ]], shape=(1, 4), dtype=float32)
[0.5, 0.5, 0.6, 0.2]
tf.Tensor([[0.34412977 0.17936862 0.12154066 0.35496095]], shape=(1, 4), dtype=float32)
[0.9, 0.5, 0.6, 0.2]
tf.Tensor([[0.36123803 0.16564332 0.10489921 0.36821947]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.6, 0.2]
tf.Tensor([[0.3742254  0.1575606  0.094123   0.37409106]], shape=(1, 4), dtype=float32)
[0.9, 0.0, 0.6, 0.2]
tf.Tensor([[0.37041143 0.16525857 0.09982531 0.36450478]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.3737008  0.15908265 0.09427223 0.37294433]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]


 13%|█████████▊                                                                 | 1301/10000 [48:34<5:59:50,  2.48s/it]

tf.Tensor([[0.37313357 0.15921535 0.09430986 0.37334126]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.37415877 0.15832481 0.09322621 0.37429017]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.37632552 0.15607339 0.08967471 0.3779264 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.37632552 0.15607339 0.08967471 0.3779264 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.37632552 0.15607339 0.08967471 0.3779264 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.37632552 0.15607339 0.08967471 0.3779264 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.37632552 0.15607339 0.08967471 0.3779264 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.37632552 0.15607339 0.08967471 0.3779264 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.37632552 0.15607339 0.08967471 0.3779264 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.6, 0.2]
tf.Tensor([[0.37632

 14%|██████████▌                                                                | 1401/10000 [53:03<4:56:24,  2.07s/it]

-31.12675973869293
19


 15%|███████████▎                                                               | 1500/10000 [56:46<4:50:54,  2.05s/it]

-30.159642623938346
25
[0.6, 0.5, 0.7, 0.4]
tf.Tensor([[0.34796035 0.17661262 0.12411094 0.35131606]], shape=(1, 4), dtype=float32)
[1.0, 0.5, 0.7, 0.4]
tf.Tensor([[0.37482506 0.15671459 0.1002861  0.36817428]], shape=(1, 4), dtype=float32)
[1.0, 0.4, 0.7, 0.4]
tf.Tensor([[0.39842245 0.14212035 0.08347113 0.37598604]], shape=(1, 4), dtype=float32)
[1.0, 0.3, 0.7, 0.4]
tf.Tensor([[0.4190622  0.13071622 0.07148491 0.3787367 ]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4238905  0.13362812 0.07190452 0.37057695]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.41757238 0.13860467 0.07725138 0.36657155]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.41179925 0.14266329 0.08208323 0.3634542 ]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4039043  0.14639246 0.08635636 0.36334693]], shape=(1, 4), dtype=float32)


 15%|███████████▎                                                               | 1501/10000 [56:48<5:09:23,  2.18s/it]

[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4039043  0.14639246 0.08635636 0.36334693]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4039043  0.14639246 0.08635636 0.36334693]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4039043  0.14639246 0.08635636 0.36334693]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4039043  0.14639246 0.08635636 0.36334693]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4039043  0.14639246 0.08635636 0.36334693]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4039043  0.14639246 0.08635636 0.36334693]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4039043  0.14639246 0.08635636 0.36334693]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4039043  0.14639246 0.08635636 0.36334693]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4]
tf.Tensor([[0.4039043  0.14639246 0.08635636 0.36334693]], shape=(1, 4), dtype=float32)
[1.0, 0.2, 0.7, 0.4

 16%|███████████▋                                                             | 1601/10000 [1:00:06<4:27:05,  1.91s/it]

-36.48251848029395
17


 17%|████████████▍                                                            | 1700/10000 [1:03:46<5:05:20,  2.21s/it]

-37.37264786342496
12
[0.4, 0.6, 0.1, 0.1]
tf.Tensor([[0.34637693 0.17467049 0.12377647 0.35517606]], shape=(1, 4), dtype=float32)
[0.4, 0.2, 0.1, 0.1]
tf.Tensor([[0.36215997 0.1626531  0.10870758 0.36647934]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.37593046 0.1532718  0.09752842 0.3732693 ]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.38903713 0.14530164 0.08838592 0.37727532]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.3848867  0.14791659 0.09084477 0.37635195]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.38356373 0.14878128 0.09165834 0.37599656]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.38356373 0.14878128 0.09165834 0.37599656]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.38356373 0.14878128 0.09165834 0.37599656]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.38356373 0.14878128 0.09165834 0.37599656]], shape=(1, 4), dtype=float3

 17%|████████████▍                                                            | 1701/10000 [1:03:49<5:20:01,  2.31s/it]


[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.38356373 0.14878128 0.09165834 0.37599656]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.38356373 0.14878128 0.09165834 0.37599656]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.38356373 0.14878128 0.09165834 0.37599656]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.1, 0.1]
tf.Tensor([[0.38356373 0.14878128 0.09165834 0.37599656]], shape=(1, 4), dtype=float32)


 18%|█████████████▏                                                           | 1801/10000 [1:07:26<5:07:59,  2.25s/it]

-34.36795344656025
17


 19%|█████████████▊                                                           | 1900/10000 [1:10:58<6:26:44,  2.86s/it]

-33.4010099781039
20
[0.8, 0.7, 0.1, 0.1]
tf.Tensor([[0.35312817 0.1822361  0.12205275 0.34258297]], shape=(1, 4), dtype=float32)
[0.8, 0.6, 0.1, 0.1]
tf.Tensor([[0.37871772 0.1671452  0.10209422 0.3520429 ]], shape=(1, 4), dtype=float32)
[0.8, 0.5, 0.1, 0.1]
tf.Tensor([[0.40075907 0.15508786 0.08792709 0.356226  ]], shape=(1, 4), dtype=float32)
[0.8, 0.4, 0.1, 0.1]
tf.Tensor([[0.41927314 0.14572127 0.07778863 0.357217  ]], shape=(1, 4), dtype=float32)
[0.8, 0.3, 0.1, 0.1]
tf.Tensor([[0.41648337 0.1473106  0.07909863 0.35710737]], shape=(1, 4), dtype=float32)
[0.8, 0.2, 0.1, 0.1]
tf.Tensor([[0.41377905 0.14879204 0.08034999 0.35707894]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.1, 0.1]
tf.Tensor([[0.41114703 0.15020113 0.08157551 0.35707644]], shape=(1, 4), dtype=float32)
[0.8, 0.0, 0.1, 0.1]
tf.Tensor([[0.40855515 0.15154754 0.08279541 0.35710195]], shape=(1, 4), dtype=float32)
[0.8, 0.0, 0.1, 0.1]
tf.Tensor([[0.40669835 0.15251465 0.08369703 0.35709   ]], shape=(1, 4), dtype=float32

 19%|█████████████▉                                                           | 1901/10000 [1:11:00<6:17:21,  2.80s/it]


[0.8, 0.0, 0.1, 0.1]
tf.Tensor([[0.40496743 0.15344709 0.08455861 0.35702685]], shape=(1, 4), dtype=float32)
[0.8, 0.0, 0.1, 0.1]
tf.Tensor([[0.40496743 0.15344709 0.08455861 0.35702685]], shape=(1, 4), dtype=float32)
[0.8, 0.0, 0.1, 0.1]
tf.Tensor([[0.40496743 0.15344709 0.08455861 0.35702685]], shape=(1, 4), dtype=float32)
[0.8, 0.0, 0.1, 0.1]
tf.Tensor([[0.40496743 0.15344709 0.08455861 0.35702685]], shape=(1, 4), dtype=float32)


 20%|██████████████▌                                                          | 2001/10000 [1:14:28<4:12:53,  1.90s/it]

-31.125793210827407
24


 21%|███████████████▎                                                         | 2100/10000 [1:17:25<3:39:56,  1.67s/it]

-29.348592225005465
36
[0.7, 0.7, 0.1, 0.1]
tf.Tensor([[0.37815297 0.17649227 0.1061562  0.33919853]], shape=(1, 4), dtype=float32)
[0.7, 0.6, 0.1, 0.1]
tf.Tensor([[0.40715745 0.16093098 0.08722686 0.34468466]], shape=(1, 4), dtype=float32)
[0.7, 0.5, 0.1, 0.1]
tf.Tensor([[0.43237895 0.14755878 0.07382604 0.3462363 ]], shape=(1, 4), dtype=float32)
[0.7, 0.4, 0.1, 0.1]
tf.Tensor([[0.45381466 0.13670178 0.06424905 0.3452345 ]], shape=(1, 4), dtype=float32)
[0.7, 0.3, 0.1, 0.1]
tf.Tensor([[0.45042244 0.13862444 0.06564815 0.34530503]], shape=(1, 4), dtype=float32)
[0.7, 0.2, 0.1, 0.1]
tf.Tensor([[0.44703352 0.14055145 0.06706271 0.34535232]], shape=(1, 4), dtype=float32)
[0.7, 0.1, 0.1, 0.1]
tf.Tensor([[0.4436429  0.14240307 0.06847134 0.34548265]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.1, 0.1]


 21%|███████████████▎                                                         | 2101/10000 [1:17:28<4:25:11,  2.01s/it]

tf.Tensor([[0.44024122 0.14419389 0.0698852  0.34567967]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.1, 0.1]
tf.Tensor([[0.4377594  0.14547876 0.07093419 0.34582773]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.1, 0.1]
tf.Tensor([[0.4362066  0.14628075 0.07159846 0.34591419]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.1, 0.1]
tf.Tensor([[0.43545923 0.14667282 0.07192195 0.345946  ]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.1, 0.1]
tf.Tensor([[0.43545923 0.14667282 0.07192195 0.345946  ]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.1, 0.1]
tf.Tensor([[0.43545923 0.14667282 0.07192195 0.345946  ]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.1, 0.1]
tf.Tensor([[0.43545923 0.14667282 0.07192195 0.345946  ]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.1, 0.1]
tf.Tensor([[0.43545923 0.14667282 0.07192195 0.345946  ]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.1, 0.1]
tf.Tensor([[0.43545923 0.14667282 0.07192195 0.345946  ]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.1, 0.1]
tf.Tensor([[0.43545

 22%|████████████████                                                         | 2201/10000 [1:20:41<4:10:05,  1.92s/it]

-30.383272957606522
22


 23%|████████████████▊                                                        | 2301/10000 [1:24:08<3:44:33,  1.75s/it]

-28.136422657844054
26
[0.0, 0.1, 0.1, 0.1]
tf.Tensor([[0.34108397 0.19776313 0.13472164 0.32643133]], shape=(1, 4), dtype=float32)


 24%|█████████████████▌                                                       | 2401/10000 [1:27:28<4:32:55,  2.15s/it]

-33.81654357269623
25


 25%|██████████████████▎                                                      | 2500/10000 [1:30:39<4:37:36,  2.22s/it]

-33.21730862270207
31
[0.5, 0.8, 0.1, 0.1]
tf.Tensor([[0.3863187  0.17774542 0.10655313 0.32938275]], shape=(1, 4), dtype=float32)
[0.5, 0.7, 0.1, 0.1]
tf.Tensor([[0.41410992 0.16442774 0.08916938 0.33229303]], shape=(1, 4), dtype=float32)
[0.5, 0.6, 0.1, 0.1]
tf.Tensor([[0.44059306 0.15151861 0.07566316 0.3322251 ]], shape=(1, 4), dtype=float32)
[0.5, 0.5, 0.1, 0.1]
tf.Tensor([[0.46264943 0.14110148 0.066111   0.33013812]], shape=(1, 4), dtype=float32)
[0.5, 0.4, 0.1, 0.1]
tf.Tensor([[0.45928827 0.1429185  0.06749738 0.3302959 ]], shape=(1, 4), dtype=float32)
[0.5, 0.3, 0.1, 0.1]
tf.Tensor([[0.45592868 0.14472888 0.06890365 0.33043885]], shape=(1, 4), dtype=float32)
[0.5, 0.2, 0.1, 0.1]
tf.Tensor([[0.43616867 0.15742478 0.07998715 0.32641938]], shape=(1, 4), dtype=float32)
[0.4, 0.2, 0.1, 0.1]
tf.Tensor([[0.43038672 0.15918206 0.0812503  0.32918093]], shape=(1, 4), dtype=float32)
[0.4, 0.1, 0.1, 0.1]
tf.Tensor([[0.41168398 0.1710115  0.09264641 0.3246581 ]], shape=(1, 4), dtype=float3

 25%|██████████████████▎                                                      | 2501/10000 [1:30:42<4:58:47,  2.39s/it]


[0.2, 0.0, 0.1, 0.1]
tf.Tensor([[0.41466457 0.16676246 0.08907748 0.32949543]], shape=(1, 4), dtype=float32)
[0.2, 0.0, 0.1, 0.1]
tf.Tensor([[0.4280801  0.15920992 0.08241821 0.33029172]], shape=(1, 4), dtype=float32)
[0.2, 0.0, 0.1, 0.1]
tf.Tensor([[0.42715195 0.15968782 0.08286414 0.330296  ]], shape=(1, 4), dtype=float32)
[0.2, 0.0, 0.1, 0.1]
tf.Tensor([[0.42715195 0.15968782 0.08286414 0.330296  ]], shape=(1, 4), dtype=float32)
[0.2, 0.0, 0.1, 0.1]
tf.Tensor([[0.42715195 0.15968782 0.08286414 0.330296  ]], shape=(1, 4), dtype=float32)
[0.2, 0.0, 0.1, 0.1]
tf.Tensor([[0.42715195 0.15968782 0.08286414 0.330296  ]], shape=(1, 4), dtype=float32)
[0.2, 0.0, 0.1, 0.1]
tf.Tensor([[0.42715195 0.15968782 0.08286414 0.330296  ]], shape=(1, 4), dtype=float32)
[0.2, 0.0, 0.1, 0.1]
tf.Tensor([[0.42715195 0.15968782 0.08286414 0.330296  ]], shape=(1, 4), dtype=float32)


 26%|██████████████████▉                                                      | 2601/10000 [1:34:11<3:26:56,  1.68s/it]

-28.90407195766516
25


 27%|███████████████████▋                                                     | 2701/10000 [1:37:28<3:50:15,  1.89s/it]

-31.751271047173176
25
[0.2, 0.9, 0.7, 0.1]
tf.Tensor([[0.35802522 0.1907808  0.12595943 0.32523456]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.7, 0.1]
tf.Tensor([[0.37076035 0.18952122 0.11943267 0.32028574]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.7, 0.1]
tf.Tensor([[0.39278555 0.17809747 0.10390327 0.32521373]], shape=(1, 4), dtype=float32)
[0.3, 0.7, 0.7, 0.1]
tf.Tensor([[0.4137024  0.16789676 0.09197257 0.32642826]], shape=(1, 4), dtype=float32)
[0.3, 0.6, 0.7, 0.1]
tf.Tensor([[0.4054536  0.17644887 0.0995302  0.31856734]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.7, 0.1]
tf.Tensor([[0.41108868 0.16981329 0.0937674  0.3253307 ]], shape=(1, 4), dtype=float32)
[0.4, 0.5, 0.7, 0.1]
tf.Tensor([[0.40238526 0.17866273 0.1015751  0.31737688]], shape=(1, 4), dtype=float32)
[0.5, 0.5, 0.7, 0.1]
tf.Tensor([[0.4019953  0.17533672 0.09979585 0.3228721 ]], shape=(1, 4), dtype=float32)
[0.5, 0.4, 0.7, 0.1]
tf.Tensor([[0.41061455 0.16960464 0.09323747 0.32654336]], shape=(1, 4), dtype=float

 28%|████████████████████▍                                                    | 2801/10000 [1:40:27<3:35:13,  1.79s/it]

-29.032828006466872
29


 29%|█████████████████████▏                                                   | 2900/10000 [1:43:35<4:07:25,  2.09s/it]

-30.946821127285244
37
[0.6, 0.0, 0.5, 0.7]
tf.Tensor([[0.33326316 0.22000898 0.15214173 0.29458606]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.5, 0.7]
tf.Tensor([[0.34690443 0.216027   0.14070949 0.29635912]], shape=(1, 4), dtype=float32)
[0.8, 0.0, 0.5, 0.7]
tf.Tensor([[0.36173993 0.21138145 0.13078971 0.29608887]], shape=(1, 4), dtype=float32)
[0.9, 0.0, 0.5, 0.7]
tf.Tensor([[0.3762232  0.20622797 0.12201867 0.29553017]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.38123515 0.20367724 0.11900534 0.29608223]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.3848397  0.20176832 0.11695588 0.29643616]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.38690326 0.20076501 0.11584622 0.2964855 ]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.38788596 0.20026949 0.11532992 0.29651466]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.38788596 0.20026949 0.11532992 0.29651466]], shape=(1, 4), dtype=float

 29%|█████████████████████▏                                                   | 2901/10000 [1:43:38<4:40:56,  2.37s/it]


[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.38788596 0.20026949 0.11532992 0.29651466]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.38788596 0.20026949 0.11532992 0.29651466]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.38788596 0.20026949 0.11532992 0.29651466]], shape=(1, 4), dtype=float32)
[1.0, 0.0, 0.5, 0.7]
tf.Tensor([[0.38788596 0.20026949 0.11532992 0.29651466]], shape=(1, 4), dtype=float32)


 30%|█████████████████████▉                                                   | 3001/10000 [1:46:35<3:19:21,  1.71s/it]

-25.36822436166749
37


 31%|██████████████████████▋                                                  | 3101/10000 [1:49:24<3:28:12,  1.81s/it]

-25.733782566660242
34
[0.6, 0.9, 0.7, 0.4]
tf.Tensor([[0.35437837 0.22079523 0.14038785 0.2844386 ]], shape=(1, 4), dtype=float32)
[0.6, 0.8, 0.7, 0.4]
tf.Tensor([[0.37774393 0.21266536 0.1223232  0.28726757]], shape=(1, 4), dtype=float32)
[0.6, 0.7, 0.7, 0.4]
tf.Tensor([[0.39954755 0.20436208 0.10882474 0.28726572]], shape=(1, 4), dtype=float32)
[0.6, 0.6, 0.7, 0.4]
tf.Tensor([[0.4177876  0.19659726 0.09848492 0.28713024]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.7, 0.4]
tf.Tensor([[0.41397494 0.20212008 0.10429375 0.27961126]], shape=(1, 4), dtype=float32)
[0.7, 0.5, 0.7, 0.4]
tf.Tensor([[0.409636   0.20107208 0.10522896 0.28406298]], shape=(1, 4), dtype=float32)


 32%|███████████████████████▎                                                 | 3201/10000 [1:52:34<3:47:21,  2.01s/it]

-31.49215231580601
29


 33%|████████████████████████                                                 | 3301/10000 [1:55:42<3:04:18,  1.65s/it]

-29.80887459508691
32
[0.5, 0.9, 0.5, 0.7]
tf.Tensor([[0.37570715 0.20654808 0.13280153 0.28494325]], shape=(1, 4), dtype=float32)
[0.5, 0.8, 0.5, 0.7]
tf.Tensor([[0.4087897  0.19364984 0.11107296 0.28648746]], shape=(1, 4), dtype=float32)


 34%|████████████████████████▊                                                | 3401/10000 [1:58:28<3:38:33,  1.99s/it]

-29.019242069266568
42


 35%|█████████████████████████▌                                               | 3500/10000 [2:01:34<3:57:31,  2.19s/it]

-31.167148443673014
30
[0.5, 0.7, 0.2, 0.9]
tf.Tensor([[0.36189187 0.21876393 0.14877428 0.27056992]], shape=(1, 4), dtype=float32)
[0.4, 0.7, 0.2, 0.9]
tf.Tensor([[0.37399015 0.21797529 0.13976033 0.26827425]], shape=(1, 4), dtype=float32)
[0.3, 0.7, 0.2, 0.9]
tf.Tensor([[0.38930774 0.21676603 0.12890759 0.2650186 ]], shape=(1, 4), dtype=float32)
[0.4, 0.7, 0.2, 0.9]
tf.Tensor([[0.39945194 0.21375494 0.12283459 0.2639586 ]], shape=(1, 4), dtype=float32)
[0.3, 0.7, 0.2, 0.9]
tf.Tensor([[0.4056349  0.21332027 0.11984863 0.26119617]], shape=(1, 4), dtype=float32)
[0.4, 0.7, 0.2, 0.9]
tf.Tensor([[0.40912187 0.208971   0.11942311 0.26248398]], shape=(1, 4), dtype=float32)
[0.3, 0.7, 0.2, 0.9]
tf.Tensor([[0.4056349  0.21332027 0.11984863 0.26119617]], shape=(1, 4), dtype=float32)
[0.4, 0.7, 0.2, 0.9]
tf.Tensor([[0.40912187 0.208971   0.11942311 0.26248398]], shape=(1, 4), dtype=float32)
[0.3, 0.7, 0.2, 0.9]
tf.Tensor([[0.4056349  0.21332027 0.11984863 0.26119617]], shape=(1, 4), dtype=float

 35%|█████████████████████████▌                                               | 3501/10000 [2:01:36<4:12:56,  2.34s/it]


[0.3, 0.7, 0.2, 0.9]
tf.Tensor([[0.4056349  0.21332027 0.11984863 0.26119617]], shape=(1, 4), dtype=float32)
[0.4, 0.7, 0.2, 0.9]
tf.Tensor([[0.40912187 0.208971   0.11942311 0.26248398]], shape=(1, 4), dtype=float32)
[0.3, 0.7, 0.2, 0.9]
tf.Tensor([[0.4056349  0.21332027 0.11984863 0.26119617]], shape=(1, 4), dtype=float32)
[0.4, 0.7, 0.2, 0.9]
tf.Tensor([[0.40912187 0.208971   0.11942311 0.26248398]], shape=(1, 4), dtype=float32)


 36%|██████████████████████████▎                                              | 3601/10000 [2:04:45<3:40:37,  2.07s/it]

-27.88892185836729
36


 37%|███████████████████████████                                              | 3701/10000 [2:07:46<3:17:19,  1.88s/it]

-29.763000862623716
36
[0.4, 0.7, 0.7, 0.4]
tf.Tensor([[0.37717056 0.20846231 0.1419653  0.27240184]], shape=(1, 4), dtype=float32)
[0.5, 0.7, 0.7, 0.4]
tf.Tensor([[0.4114916  0.19205013 0.11653651 0.27992174]], shape=(1, 4), dtype=float32)
[0.5, 0.6, 0.7, 0.4]
tf.Tensor([[0.43478835 0.19019651 0.10645739 0.2685577 ]], shape=(1, 4), dtype=float32)
[0.6, 0.6, 0.7, 0.4]
tf.Tensor([[0.46146014 0.17575279 0.09020617 0.27258086]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.7, 0.4]
tf.Tensor([[0.4658492  0.17784865 0.09048989 0.26581225]], shape=(1, 4), dtype=float32)
[0.7, 0.5, 0.7, 0.4]
tf.Tensor([[0.4608955  0.17630613 0.09021499 0.2725834 ]], shape=(1, 4), dtype=float32)


 38%|███████████████████████████▋                                             | 3801/10000 [2:10:31<3:41:11,  2.14s/it]

-25.342229711686247
45


 39%|████████████████████████████▍                                            | 3900/10000 [2:13:21<2:26:51,  1.44s/it]

-20.88741192027423
45
[0.6, 0.1, 0.2, 0.9]
tf.Tensor([[0.41190627 0.19723745 0.12988605 0.26097026]], shape=(1, 4), dtype=float32)
[0.7, 0.1, 0.2, 0.9]
tf.Tensor([[0.4285622  0.19791278 0.120107   0.25341806]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.2, 0.9]
tf.Tensor([[0.44516248 0.19653125 0.11138966 0.24691656]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.2, 0.9]
tf.Tensor([[0.4547731  0.1909284  0.10579526 0.24850322]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.2, 0.9]
tf.Tensor([[0.4573618  0.19326094 0.10468088 0.2446964 ]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.2, 0.9]
tf.Tensor([[0.45206112 0.18976612 0.10529324 0.25287956]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.2, 0.9]
tf.Tensor([[0.45194262 0.19406757 0.10564283 0.24834701]], shape=(1, 4), dtype=float32)

 39%|████████████████████████████▍                                            | 3901/10000 [2:13:22<2:07:01,  1.25s/it]


[0.9, 0.1, 0.2, 0.9]
tf.Tensor([[0.45206112 0.18976612 0.10529324 0.25287956]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.2, 0.9]
tf.Tensor([[0.45194262 0.19406757 0.10564283 0.24834701]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.2, 0.9]
tf.Tensor([[0.45206112 0.18976612 0.10529324 0.25287956]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.2, 0.9]
tf.Tensor([[0.45194262 0.19406757 0.10564283 0.24834701]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.2, 0.9]
tf.Tensor([[0.45206112 0.18976612 0.10529324 0.25287956]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.2, 0.9]
tf.Tensor([[0.45194262 0.19406757 0.10564283 0.24834701]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.2, 0.9]
tf.Tensor([[0.45206112 0.18976612 0.10529324 0.25287956]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.2, 0.9]
tf.Tensor([[0.45194262 0.19406757 0.10564283 0.24834701]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.2, 0.9]
tf.Tensor([[0.45206112 0.18976612 0.10529324 0.25287956]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.2, 0.

 40%|█████████████████████████████▏                                           | 4001/10000 [2:16:19<2:30:14,  1.50s/it]

-27.402832589869156
40


 41%|█████████████████████████████▉                                           | 4100/10000 [2:19:04<2:19:28,  1.42s/it]

-24.988933917956032
43
[0.9, 0.5, 0.5, 0.7]
tf.Tensor([[0.4061784  0.19771366 0.12507942 0.27102852]], shape=(1, 4), dtype=float32)
[0.8, 0.5, 0.5, 0.7]
tf.Tensor([[0.42042568 0.19571547 0.1157983  0.26806056]], shape=(1, 4), dtype=float32)
[0.7, 0.5, 0.5, 0.7]
tf.Tensor([[0.43179706 0.19443189 0.10922561 0.26454544]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.5, 0.7]
tf.Tensor([[0.44942176 0.19306006 0.10120944 0.2563087 ]], shape=(1, 4), dtype=float32)
[0.7, 0.5, 0.5, 0.7]
tf.Tensor([[0.4470812  0.19169131 0.1028114  0.25841615]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.5, 0.7]
tf.Tensor([[0.45499802 0.19230933 0.10057522 0.25211746]], shape=(1, 4), dtype=float32)
[0.7, 0.5, 0.5, 0.7]
tf.Tensor([[0.4551916  0.18891498 0.1006543  0.25523907]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.5, 0.7]
tf.Tensor([[0.45499802 0.19230933 0.10057522 0.25211746]], shape=(1, 4), dtype=float32)
[0.7, 0.5, 0.5, 0.7]
tf.Tensor([[0.4551916  0.18891498 0.1006543  0.25523907]], shape=(1, 4), dtype=float

 41%|█████████████████████████████▉                                           | 4101/10000 [2:19:06<2:21:46,  1.44s/it]


[0.6, 0.5, 0.5, 0.7]
tf.Tensor([[0.45499802 0.19230933 0.10057522 0.25211746]], shape=(1, 4), dtype=float32)
[0.7, 0.5, 0.5, 0.7]
tf.Tensor([[0.4551916  0.18891498 0.1006543  0.25523907]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.5, 0.7]
tf.Tensor([[0.45499802 0.19230933 0.10057522 0.25211746]], shape=(1, 4), dtype=float32)


 42%|██████████████████████████████▋                                          | 4201/10000 [2:21:57<2:43:26,  1.69s/it]

-25.574620991797183
42


 43%|███████████████████████████████▍                                         | 4300/10000 [2:24:45<3:03:22,  1.93s/it]

-22.764559971847376
49
[0.4, 0.1, 0.7, 0.4]
tf.Tensor([[0.4271496  0.19623454 0.13303268 0.24358325]], shape=(1, 4), dtype=float32)
[0.5, 0.1, 0.7, 0.4]
tf.Tensor([[0.44019678 0.19943596 0.12687464 0.23349255]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.7, 0.4]
tf.Tensor([[0.45361677 0.2006681  0.12123694 0.22447826]], shape=(1, 4), dtype=float32)
[0.7, 0.1, 0.7, 0.4]
tf.Tensor([[0.46637642 0.20113647 0.11625171 0.2162354 ]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.7, 0.4]
tf.Tensor([[0.47152048 0.1991173  0.11382871 0.21553345]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.7, 0.4]
tf.Tensor([[0.47298336 0.19062518 0.11081302 0.22557843]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.7, 0.4]
tf.Tensor([[0.47527865 0.19362457 0.11013481 0.22096197]], shape=(1, 4), dtype=float32)

 43%|███████████████████████████████▍                                         | 4301/10000 [2:24:48<3:19:43,  2.10s/it]


[0.9, 0.1, 0.7, 0.4]
tf.Tensor([[0.47097704 0.18857771 0.109414   0.23103128]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.7, 0.4]
tf.Tensor([[0.4703492  0.19361569 0.11030169 0.22573346]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.7, 0.4]
tf.Tensor([[0.47097704 0.18857771 0.109414   0.23103128]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.7, 0.4]
tf.Tensor([[0.4703492  0.19361569 0.11030169 0.22573346]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.7, 0.4]
tf.Tensor([[0.47097704 0.18857771 0.109414   0.23103128]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.7, 0.4]
tf.Tensor([[0.4703492  0.19361569 0.11030169 0.22573346]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.7, 0.4]
tf.Tensor([[0.47097704 0.18857771 0.109414   0.23103128]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.7, 0.4]
tf.Tensor([[0.4703492  0.19361569 0.11030169 0.22573346]], shape=(1, 4), dtype=float32)
[0.9, 0.1, 0.7, 0.4]
tf.Tensor([[0.47097704 0.18857771 0.109414   0.23103128]], shape=(1, 4), dtype=float32)
[0.8, 0.1, 0.7, 0.

 44%|████████████████████████████████▏                                        | 4401/10000 [2:27:24<3:08:20,  2.02s/it]

-27.76126303899089
50


 45%|████████████████████████████████▊                                        | 4501/10000 [2:29:51<2:12:31,  1.45s/it]

-24.52311409043982
60
[0.8, 0.9, 0.2, 0.9]
tf.Tensor([[0.44779244 0.19662176 0.12587193 0.2297139 ]], shape=(1, 4), dtype=float32)
[0.7, 0.9, 0.2, 0.9]
tf.Tensor([[0.4674087  0.19330628 0.11572878 0.2235563 ]], shape=(1, 4), dtype=float32)
[0.6, 0.9, 0.2, 0.9]
tf.Tensor([[0.48144394 0.19127902 0.10835002 0.21892697]], shape=(1, 4), dtype=float32)
[0.5, 0.9, 0.2, 0.9]
tf.Tensor([[0.48997688 0.1915727  0.10462086 0.21382959]], shape=(1, 4), dtype=float32)
[0.4, 0.9, 0.2, 0.9]
tf.Tensor([[0.48489383 0.1934019  0.10705018 0.21465407]], shape=(1, 4), dtype=float32)
[0.3, 0.9, 0.2, 0.9]
tf.Tensor([[0.48055762 0.1950704  0.10936642 0.21500556]], shape=(1, 4), dtype=float32)


 46%|█████████████████████████████████▌                                       | 4601/10000 [2:32:25<2:11:01,  1.46s/it]

-19.24054454918547
58


 47%|██████████████████████████████████▎                                      | 4700/10000 [2:35:09<1:50:13,  1.25s/it]

-20.187792457366616
60
[0.3, 0.0, 0.2, 0.9]
tf.Tensor([[0.49942744 0.1928516  0.11491829 0.19280268]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.2, 0.9]
tf.Tensor([[0.51759064 0.19580893 0.10895421 0.17764619]], shape=(1, 4), dtype=float32)
[0.5, 0.0, 0.2, 0.9]
tf.Tensor([[0.5363634  0.19565022 0.1028439  0.16514248]], shape=(1, 4), dtype=float32)
[0.6, 0.0, 0.2, 0.9]
tf.Tensor([[0.5476582  0.19358991 0.09726188 0.16148998]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.2, 0.9]
tf.Tensor([[0.5429611  0.19342983 0.0969338  0.16667531]], shape=(1, 4), dtype=float32)
[0.6, 0.2, 0.2, 0.9]
tf.Tensor([[0.53437376 0.19509628 0.09808193 0.17244805]], shape=(1, 4), dtype=float32)
[0.6, 0.3, 0.2, 0.9]
tf.Tensor([[0.523647   0.19778377 0.10017498 0.17839426]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.2, 0.9]
tf.Tensor([[0.52702165 0.19634217 0.09884549 0.17779066]], shape=(1, 4), dtype=float32)

 47%|██████████████████████████████████▎                                      | 4701/10000 [2:35:12<2:23:28,  1.62s/it]


[0.6, 0.5, 0.2, 0.9]
tf.Tensor([[0.5219503  0.19445352 0.10091367 0.18268253]], shape=(1, 4), dtype=float32)
[0.5, 0.5, 0.2, 0.9]
tf.Tensor([[0.52558035 0.19530287 0.09895967 0.1801571 ]], shape=(1, 4), dtype=float32)
[0.5, 0.6, 0.2, 0.9]
tf.Tensor([[0.5187338  0.19462048 0.10193753 0.18470822]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.2, 0.9]
tf.Tensor([[0.5206346  0.19609328 0.10027616 0.18299598]], shape=(1, 4), dtype=float32)
[0.4, 0.7, 0.2, 0.9]
tf.Tensor([[0.516543   0.19492738 0.10284895 0.18568067]], shape=(1, 4), dtype=float32)
[0.3, 0.7, 0.2, 0.9]
tf.Tensor([[0.5193213  0.1958735  0.10078296 0.1840222 ]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.5165541  0.19440806 0.10303096 0.18600687]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.5314248  0.19401035 0.09933126 0.17523357]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.53208756 0.1909992  0.09979817 0.17711511]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.

 48%|███████████████████████████████████                                      | 4800/10000 [2:37:25<1:53:32,  1.31s/it]

-13.702504076349177
71


 49%|███████████████████████████████████▊                                     | 4901/10000 [2:39:37<1:17:06,  1.10it/s]

-15.097160988355043
67
[0.4, 0.3, 0.6, 0.2]
tf.Tensor([[0.53086764 0.18388234 0.10278161 0.18246843]], shape=(1, 4), dtype=float32)
[0.5, 0.3, 0.6, 0.2]
tf.Tensor([[0.5875433  0.16374606 0.08116182 0.16754879]], shape=(1, 4), dtype=float32)
[0.5, 0.2, 0.6, 0.2]
tf.Tensor([[0.6283226  0.1564087  0.07142892 0.14383975]], shape=(1, 4), dtype=float32)


 50%|████████████████████████████████████▌                                    | 5001/10000 [2:42:00<2:29:04,  1.79s/it]

-18.737280252286062
68


 51%|█████████████████████████████████████▏                                   | 5100/10000 [2:44:18<1:26:31,  1.06s/it]

-13.01555955621363
74
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.57111025 0.18085909 0.0895285  0.15850212]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.59392023 0.17411305 0.08210879 0.14985788]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.6292884  0.16971862 0.07174971 0.1292432 ]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.6401115  0.16646229 0.06885198 0.12457425]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.6376382  0.17100799 0.06904485 0.12230897]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.6401115  0.16646229 0.06885198 0.12457425]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.6376382  0.17100799 0.06904485 0.12230897]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.6401115  0.16646229 0.06885198 0.12457425]], shape=(1, 4), dtype=float32)

 51%|█████████████████████████████████████▏                                   | 5101/10000 [2:44:21<1:55:17,  1.41s/it]


[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.6376382  0.17100799 0.06904485 0.12230897]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.6401115  0.16646229 0.06885198 0.12457425]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.6376382  0.17100799 0.06904485 0.12230897]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.6401115  0.16646229 0.06885198 0.12457425]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.6376382  0.17100799 0.06904485 0.12230897]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.6401115  0.16646229 0.06885198 0.12457425]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.6376382  0.17100799 0.06904485 0.12230897]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.6401115  0.16646229 0.06885198 0.12457425]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.6376382  0.17100799 0.06904485 0.12230897]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.

 52%|█████████████████████████████████████▉                                   | 5200/10000 [2:46:17<1:39:19,  1.24s/it]

-11.899867866601033
79


 53%|██████████████████████████████████████▋                                  | 5301/10000 [2:48:15<1:56:56,  1.49s/it]

-15.974469581940713
83
[0.4, 0.3, 0.1, 0.1]
tf.Tensor([[0.59461445 0.1739478  0.0790011  0.15243664]], shape=(1, 4), dtype=float32)
[0.3, 0.3, 0.1, 0.1]
tf.Tensor([[0.6120732  0.17127874 0.07378022 0.14286785]], shape=(1, 4), dtype=float32)
[0.2, 0.3, 0.1, 0.1]
tf.Tensor([[0.68029666 0.14516985 0.05331629 0.1212172 ]], shape=(1, 4), dtype=float32)
[0.2, 0.2, 0.1, 0.1]
tf.Tensor([[0.6992267  0.13925311 0.04947979 0.11204035]], shape=(1, 4), dtype=float32)
[0.1, 0.2, 0.1, 0.1]
tf.Tensor([[0.7366855  0.12388282 0.03985672 0.09957489]], shape=(1, 4), dtype=float32)


 54%|███████████████████████████████████████▍                                 | 5401/10000 [2:50:11<1:31:44,  1.20s/it]

-13.341949117222889
83


 55%|████████████████████████████████████████▏                                | 5500/10000 [2:52:10<1:12:53,  1.03it/s]

-10.095369866476233
85
[0.3, 0.0, 0.2, 0.9]
tf.Tensor([[0.56349    0.1947849  0.09206793 0.14965723]], shape=(1, 4), dtype=float32)
[0.3, 0.1, 0.2, 0.9]
tf.Tensor([[0.5897056  0.19015731 0.08245198 0.1376852 ]], shape=(1, 4), dtype=float32)
[0.3, 0.2, 0.2, 0.9]
tf.Tensor([[0.6187513  0.18311095 0.07294593 0.12519176]], shape=(1, 4), dtype=float32)
[0.3, 0.3, 0.2, 0.9]
tf.Tensor([[0.645327   0.17711757 0.06513899 0.11241646]], shape=(1, 4), dtype=float32)
[0.3, 0.4, 0.2, 0.9]
tf.Tensor([[0.6518103  0.17450458 0.06328406 0.11040108]], shape=(1, 4), dtype=float32)
[0.3, 0.5, 0.2, 0.9]
tf.Tensor([[0.6583681  0.17183055 0.06140204 0.10839929]], shape=(1, 4), dtype=float32)

 55%|█████████████████████████████████████████▎                                 | 5501/10000 [2:52:10<57:56,  1.29it/s]


[0.3, 0.6, 0.2, 0.9]
tf.Tensor([[0.6650761  0.16909531 0.05948346 0.10634512]], shape=(1, 4), dtype=float32)
[0.3, 0.7, 0.2, 0.9]
tf.Tensor([[0.6722521  0.16621192 0.05745963 0.10407633]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.6723368  0.16280828 0.05874396 0.10611086]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.67629117 0.16425934 0.05667765 0.10277183]], shape=(1, 4), dtype=float32)


 56%|████████████████████████████████████████▉                                | 5601/10000 [2:53:52<1:05:50,  1.11it/s]

-5.651081773119594
93


 57%|█████████████████████████████████████████▌                               | 5701/10000 [2:55:35<1:05:38,  1.09it/s]

-6.866721368118853
89
[0.2, 0.8, 0.1, 0.1]
tf.Tensor([[0.6254893  0.19246736 0.07278632 0.10925709]], shape=(1, 4), dtype=float32)
[0.2, 0.7, 0.1, 0.1]
tf.Tensor([[0.69851685 0.16926634 0.0507937  0.08142321]], shape=(1, 4), dtype=float32)
[0.2, 0.6, 0.1, 0.1]
tf.Tensor([[0.74891603 0.15306021 0.03742322 0.06060056]], shape=(1, 4), dtype=float32)
[0.2, 0.5, 0.1, 0.1]
tf.Tensor([[0.7886649  0.13720194 0.02828304 0.04585015]], shape=(1, 4), dtype=float32)
[0.2, 0.4, 0.1, 0.1]
tf.Tensor([[0.78673327 0.13771814 0.02884825 0.04670034]], shape=(1, 4), dtype=float32)
[0.2, 0.3, 0.1, 0.1]
tf.Tensor([[0.78482145 0.13822497 0.02941749 0.04753611]], shape=(1, 4), dtype=float32)
[0.2, 0.2, 0.1, 0.1]
tf.Tensor([[0.76233464 0.14608328 0.03605651 0.05552556]], shape=(1, 4), dtype=float32)
[0.1, 0.2, 0.1, 0.1]
tf.Tensor([[0.7530103  0.15231018 0.03743605 0.05724354]], shape=(1, 4), dtype=float32)


 58%|███████████████████████████████████████████▌                               | 5801/10000 [2:56:59<44:47,  1.56it/s]

-3.6501034988511076
92


 59%|████████████████████████████████████████████▎                              | 5900/10000 [2:58:26<51:26,  1.33it/s]

-4.2815030050965
93


 59%|████████████████████████████████████████████▎                              | 5901/10000 [2:58:27<51:55,  1.32it/s]

[0.0, 0.6, 0.2, 0.9]
tf.Tensor([[0.5812222  0.214471   0.08424895 0.12005791]], shape=(1, 4), dtype=float32)
[0.0, 0.7, 0.2, 0.9]
tf.Tensor([[0.65066123 0.19443476 0.06396761 0.09093639]], shape=(1, 4), dtype=float32)
[0.1, 0.7, 0.2, 0.9]
tf.Tensor([[0.69504315 0.18002485 0.05061334 0.07431864]], shape=(1, 4), dtype=float32)
[0.1, 0.8, 0.2, 0.9]
tf.Tensor([[0.7410052  0.1655309  0.03921727 0.05424656]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.7528558  0.16065595 0.03582611 0.05066219]], shape=(1, 4), dtype=float32)


 60%|███████████████████████████████████████████▊                             | 6001/10000 [2:59:45<1:16:50,  1.15s/it]

-5.668152342321671
99


 61%|████████████████████████████████████████████▌                            | 6101/10000 [3:01:05<1:01:33,  1.06it/s]

-4.209927664481727
95
[0.1, 0.4, 0.1, 0.1]
tf.Tensor([[0.6223935  0.21044198 0.06945881 0.09770572]], shape=(1, 4), dtype=float32)
[0.1, 0.3, 0.1, 0.1]
tf.Tensor([[0.72200346 0.16957332 0.04209899 0.06632424]], shape=(1, 4), dtype=float32)
[0.1, 0.2, 0.1, 0.1]
tf.Tensor([[0.78889185 0.14160672 0.02651072 0.04299076]], shape=(1, 4), dtype=float32)


 62%|██████████████████████████████████████████████▌                            | 6201/10000 [3:02:38<23:38,  2.68it/s]

-2.4394431058737247
96


 63%|███████████████████████████████████████████████▎                           | 6301/10000 [3:03:51<41:18,  1.49it/s]

-1.867245704691085
97
[0.2, 0.0, 0.6, 0.2]
tf.Tensor([[0.60437363 0.22110602 0.08096084 0.09355952]], shape=(1, 4), dtype=float32)
[0.3, 0.0, 0.6, 0.2]
tf.Tensor([[0.68194723 0.19419393 0.05744507 0.06641387]], shape=(1, 4), dtype=float32)
[0.4, 0.0, 0.6, 0.2]
tf.Tensor([[0.75060314 0.16612013 0.03918108 0.04409568]], shape=(1, 4), dtype=float32)
[0.5, 0.0, 0.6, 0.2]
tf.Tensor([[0.78905886 0.14812171 0.02926687 0.0335525 ]], shape=(1, 4), dtype=float32)
[0.5, 0.1, 0.6, 0.2]
tf.Tensor([[0.7930274  0.14661035 0.0284139  0.03194832]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.6, 0.2]
tf.Tensor([[0.78447765 0.15065716 0.02967803 0.03518713]], shape=(1, 4), dtype=float32)


 64%|████████████████████████████████████████████████                           | 6401/10000 [3:05:06<50:49,  1.18it/s]

-2.1987870513864882
97


 65%|████████████████████████████████████████████████▊                          | 6500/10000 [3:06:18<49:55,  1.17it/s]

-1.8810117369110304
99
[0.0, 0.2, 0.5, 0.7]
tf.Tensor([[0.6173148  0.21249308 0.07784545 0.09234665]], shape=(1, 4), dtype=float32)
[0.1, 0.2, 0.5, 0.7]
tf.Tensor([[0.6751901  0.19045079 0.05924375 0.07511535]], shape=(1, 4), dtype=float32)
[0.1, 0.3, 0.5, 0.7]
tf.Tensor([[0.75895137 0.15565975 0.038036   0.04735297]], shape=(1, 4), dtype=float32)
[0.2, 0.3, 0.5, 0.7]
tf.Tensor([[0.80456525 0.13409276 0.02688972 0.03445227]], shape=(1, 4), dtype=float32)
[0.2, 0.4, 0.5, 0.7]
tf.Tensor([[0.80435604 0.13480613 0.02704207 0.0337958 ]], shape=(1, 4), dtype=float32)
[0.3, 0.4, 0.5, 0.7]
tf.Tensor([[0.8170599  0.128168   0.02400771 0.03076444]], shape=(1, 4), dtype=float32)
[0.3, 0.5, 0.5, 0.7]
tf.Tensor([[0.8169984  0.12859984 0.0241686  0.03023309]], shape=(1, 4), dtype=float32)
[0.4, 0.5, 0.5, 0.7]
tf.Tensor([[0.82799953 0.12288062 0.02156892 0.02755095]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.8285273  0.1226587  0.02169544 0.02711867]], shape=(1, 4), dtype=float

 65%|████████████████████████████████████████████████▊                          | 6501/10000 [3:06:19<44:57,  1.30it/s]

tf.Tensor([[0.83787465 0.11783177 0.01949221 0.02480135]], shape=(1, 4), dtype=float32)


 66%|█████████████████████████████████████████████████▌                         | 6601/10000 [3:07:19<44:02,  1.29it/s]

0.4014967530539628
98


 67%|██████████████████████████████████████████████████▎                        | 6701/10000 [3:08:19<41:21,  1.33it/s]

-1.1199635308711429
100
[0.2, 0.3, 0.5, 0.7]
tf.Tensor([[0.5429215  0.23357339 0.10350049 0.12000459]], shape=(1, 4), dtype=float32)
[0.2, 0.4, 0.5, 0.7]
tf.Tensor([[0.62626064 0.20508839 0.0746882  0.09396275]], shape=(1, 4), dtype=float32)
[0.2, 0.5, 0.5, 0.7]
tf.Tensor([[0.7491865  0.15523101 0.04183713 0.0537453 ]], shape=(1, 4), dtype=float32)
[0.3, 0.5, 0.5, 0.7]
tf.Tensor([[0.81919366 0.12132272 0.0253998  0.03408382]], shape=(1, 4), dtype=float32)
[0.3, 0.6, 0.5, 0.7]
tf.Tensor([[0.84967226 0.10475895 0.01983899 0.02572978]], shape=(1, 4), dtype=float32)
[0.4, 0.6, 0.5, 0.7]
tf.Tensor([[0.8726496  0.09197871 0.01563479 0.0197369 ]], shape=(1, 4), dtype=float32)
[0.5, 0.6, 0.5, 0.7]
tf.Tensor([[0.8626065  0.09816384 0.01698153 0.02224805]], shape=(1, 4), dtype=float32)


 68%|███████████████████████████████████████████████████                        | 6801/10000 [3:09:16<38:29,  1.38it/s]

1.0060782276234914
100


 69%|███████████████████████████████████████████████████▊                       | 6901/10000 [3:10:13<40:00,  1.29it/s]

0.36398710864983463
100
[0.7, 0.8, 0.7, 0.4]
tf.Tensor([[0.50925374 0.26158246 0.11700723 0.11215656]], shape=(1, 4), dtype=float32)
[0.7, 0.7, 0.7, 0.4]
tf.Tensor([[0.70546025 0.18232639 0.05204794 0.06016538]], shape=(1, 4), dtype=float32)
[0.7, 0.6, 0.7, 0.4]
tf.Tensor([[0.83754843 0.11537175 0.02059704 0.0264828 ]], shape=(1, 4), dtype=float32)
[0.7, 0.5, 0.7, 0.4]
tf.Tensor([[0.8998925  0.07759002 0.00961801 0.01289949]], shape=(1, 4), dtype=float32)


 70%|████████████████████████████████████████████████████▌                      | 7001/10000 [3:11:11<28:26,  1.76it/s]

2.1629487619108185
99


 71%|█████████████████████████████████████████████████████▎                     | 7100/10000 [3:12:15<41:42,  1.16it/s]

-0.6670958608837445
98
[0.9, 0.9, 0.1, 0.1]
tf.Tensor([[0.44361928 0.29363266 0.14453098 0.11821707]], shape=(1, 4), dtype=float32)
[0.9, 0.8, 0.1, 0.1]
tf.Tensor([[0.6109989  0.23289491 0.08096585 0.0751403 ]], shape=(1, 4), dtype=float32)
[0.8, 0.8, 0.1, 0.1]
tf.Tensor([[0.78594875 0.15222259 0.03050664 0.03132198]], shape=(1, 4), dtype=float32)
[0.8, 0.7, 0.1, 0.1]
tf.Tensor([[0.8581644  0.10838853 0.01640664 0.01704041]], shape=(1, 4), dtype=float32)
[0.7, 0.7, 0.1, 0.1]
tf.Tensor([[0.844891   0.11846016 0.01795307 0.01869568]], shape=(1, 4), dtype=float32)
[0.7, 0.6, 0.1, 0.1]
tf.Tensor([[0.8522113  0.11165847 0.01773371 0.01839648]], shape=(1, 4), dtype=float32)
[0.6, 0.6, 0.1, 0.1]
tf.Tensor([[0.83850014 0.1217773  0.01942921 0.02029328]], shape=(1, 4), dtype=float32)
[0.6, 0.5, 0.1, 0.1]
tf.Tensor([[0.84572625 0.1150765  0.01924231 0.01995501]], shape=(1, 4), dtype=float32)
[0.5, 0.5, 0.1, 0.1]
tf.Tensor([[0.83129543 0.12541465 0.02114053 0.02214937]], shape=(1, 4), dtype=float

 71%|█████████████████████████████████████████████████████▎                     | 7101/10000 [3:12:16<40:41,  1.19it/s]


[0.2, 0.1, 0.1, 0.1]
tf.Tensor([[0.81345356 0.13088839 0.0271839  0.02847419]], shape=(1, 4), dtype=float32)


 72%|██████████████████████████████████████████████████████                     | 7201/10000 [3:13:14<30:50,  1.51it/s]

1.0705036789434554
99


 73%|██████████████████████████████████████████████████████▊                    | 7300/10000 [3:14:11<27:19,  1.65it/s]

1.0696548308973846
100


 73%|██████████████████████████████████████████████████████▊                    | 7301/10000 [3:14:12<28:30,  1.58it/s]

[0.4, 0.8, 0.2, 0.9]
tf.Tensor([[0.29884627 0.3285769  0.22895795 0.14361888]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.42062962 0.30236843 0.16090806 0.11609384]], shape=(1, 4), dtype=float32)


 74%|███████████████████████████████████████████████████████▌                   | 7401/10000 [3:15:06<34:28,  1.26it/s]

1.3730364309027172
100


 75%|████████████████████████████████████████████████████████▎                  | 7501/10000 [3:15:54<18:59,  2.19it/s]

1.6312877023647743
100
[0.8, 0.7, 0.7, 0.4]
tf.Tensor([[0.2738638  0.34401384 0.24868587 0.13343653]], shape=(1, 4), dtype=float32)
[0.8, 0.5, 0.7, 0.4]
tf.Tensor([[0.5170234  0.27866447 0.12091172 0.0834004 ]], shape=(1, 4), dtype=float32)
[0.8, 0.4, 0.7, 0.4]
tf.Tensor([[0.7351439  0.17917635 0.04736881 0.03831097]], shape=(1, 4), dtype=float32)


 76%|█████████████████████████████████████████████████████████                  | 7601/10000 [3:16:41<19:24,  2.06it/s]

2.2811993518352365
100


 77%|█████████████████████████████████████████████████████████▊                 | 7701/10000 [3:17:28<21:47,  1.76it/s]

2.935823126691351
100
[0.4, 0.2, 0.1, 0.1]
tf.Tensor([[0.1769396  0.35806713 0.31680325 0.14819002]], shape=(1, 4), dtype=float32)
[0.2, 0.2, 0.1, 0.1]
tf.Tensor([[0.3167785  0.33818552 0.21747304 0.12756296]], shape=(1, 4), dtype=float32)
[0.2, 0.0, 0.1, 0.1]
tf.Tensor([[0.5337836  0.2674593  0.11417721 0.08457987]], shape=(1, 4), dtype=float32)
[0.1, 0.0, 0.1, 0.1]
tf.Tensor([[0.7460459  0.17257051 0.04439855 0.03698512]], shape=(1, 4), dtype=float32)


 78%|██████████████████████████████████████████████████████████▌                | 7801/10000 [3:18:19<24:38,  1.49it/s]

1.5252316411696114
100


 79%|███████████████████████████████████████████████████████████▎               | 7901/10000 [3:19:05<21:58,  1.59it/s]

2.097134443788903
100
[0.3, 0.8, 0.6, 0.2]
tf.Tensor([[0.13492906 0.35594305 0.36289585 0.146232  ]], shape=(1, 4), dtype=float32)
[0.3, 0.5, 0.6, 0.2]
tf.Tensor([[0.2746836  0.36459345 0.24318098 0.11754199]], shape=(1, 4), dtype=float32)
[0.3, 0.3, 0.6, 0.2]
tf.Tensor([[0.56773865 0.28352407 0.09645182 0.05228544]], shape=(1, 4), dtype=float32)
[0.4, 0.3, 0.6, 0.2]
tf.Tensor([[0.80909115 0.15184623 0.02484871 0.01421392]], shape=(1, 4), dtype=float32)
[0.5, 0.3, 0.6, 0.2]
tf.Tensor([[0.79024106 0.16527125 0.02818715 0.01630056]], shape=(1, 4), dtype=float32)
[0.5, 0.2, 0.6, 0.2]
tf.Tensor([[0.7736703  0.17592695 0.03273551 0.01766719]], shape=(1, 4), dtype=float32)


 80%|████████████████████████████████████████████████████████████               | 8001/10000 [3:19:54<09:18,  3.58it/s]

2.54445381581578
99


 81%|████████████████████████████████████████████████████████████▊              | 8101/10000 [3:20:41<17:35,  1.80it/s]

3.11604528188995
100
[0.1, 0.7, 0.5, 0.7]
tf.Tensor([[0.11065163 0.35468674 0.39516205 0.13949962]], shape=(1, 4), dtype=float32)
[0.4, 0.7, 0.5, 0.7]
tf.Tensor([[0.1980787  0.37161535 0.3079978  0.12230814]], shape=(1, 4), dtype=float32)
[0.6, 0.7, 0.5, 0.7]
tf.Tensor([[0.46416402 0.32003123 0.1419943  0.07381051]], shape=(1, 4), dtype=float32)


 82%|█████████████████████████████████████████████████████████████▌             | 8201/10000 [3:21:26<15:14,  1.97it/s]

3.3243321954925484
100


 83%|██████████████████████████████████████████████████████████████▎            | 8301/10000 [3:22:16<17:18,  1.64it/s]

2.8063992105297895
100
[0.9, 0.9, 0.2, 0.9]
tf.Tensor([[0.09804106 0.34285194 0.41977754 0.13932942]], shape=(1, 4), dtype=float32)
[0.6, 0.9, 0.2, 0.9]
tf.Tensor([[0.15549228 0.35796538 0.34774843 0.13879395]], shape=(1, 4), dtype=float32)
[0.4, 0.9, 0.2, 0.9]
tf.Tensor([[0.41081142 0.32760495 0.16583994 0.09574376]], shape=(1, 4), dtype=float32)
[0.3, 0.9, 0.2, 0.9]
tf.Tensor([[0.7700321  0.16653067 0.03491056 0.02852663]], shape=(1, 4), dtype=float32)


 84%|███████████████████████████████████████████████████████████████            | 8401/10000 [3:22:56<13:13,  2.02it/s]

4.244127894816128
100


 85%|███████████████████████████████████████████████████████████████▊           | 8501/10000 [3:23:42<10:06,  2.47it/s]

4.10049208265708
100
[0.8, 0.6, 0.7, 0.1]
tf.Tensor([[0.07158025 0.33226028 0.46183842 0.13432097]], shape=(1, 4), dtype=float32)
[0.8, 0.3, 0.7, 0.1]
tf.Tensor([[0.12674408 0.35695538 0.3863124  0.12998813]], shape=(1, 4), dtype=float32)
[0.8, 0.0, 0.7, 0.1]
tf.Tensor([[0.47536546 0.3072977  0.1386837  0.07865316]], shape=(1, 4), dtype=float32)
[0.7, 0.0, 0.7, 0.1]
tf.Tensor([[0.8684124  0.10486812 0.01468856 0.01203099]], shape=(1, 4), dtype=float32)


 86%|████████████████████████████████████████████████████████████████▌          | 8601/10000 [3:24:27<14:24,  1.62it/s]

2.3695456336577054
100


 87%|█████████████████████████████████████████████████████████████████▎         | 8700/10000 [3:25:11<10:41,  2.03it/s]

3.651146809743416
100
[0.9, 0.7, 0.1, 0.1]
tf.Tensor([[0.05515496 0.3316143  0.49539128 0.11783946]], shape=(1, 4), dtype=float32)
[0.6, 0.7, 0.1, 0.1]
tf.Tensor([[0.07344518 0.33704144 0.46644154 0.1230718 ]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.1, 0.1]
tf.Tensor([[0.28601682 0.36465457 0.24729317 0.10203543]], shape=(1, 4), dtype=float32)
[0.4, 0.4, 0.1, 0.1]
tf.Tensor([[0.8187159  0.13976637 0.02458522 0.01693252]], shape=(1, 4), dtype=float32)
[0.4, 0.3, 0.1, 0.1]
tf.Tensor([[0.82329494 0.13321155 0.02538734 0.01810618]], shape=(1, 4), dtype=float32)
[0.3, 0.3, 0.1, 0.1]
tf.Tensor([[0.7728614  0.16740003 0.03605652 0.02368207]], shape=(1, 4), dtype=float32)
[0.3, 0.2, 0.1, 0.1]
tf.Tensor([[0.7878888  0.15443479 0.03415339 0.02352302]], shape=(1, 4), dtype=float32)
[0.2, 0.2, 0.1, 0.1]
tf.Tensor([[0.74081403 0.18512805 0.04514633 0.02891161]], shape=(1, 4), dtype=float32)
[0.2, 0.1, 0.1, 0.1]


 87%|█████████████████████████████████████████████████████████████████▎         | 8701/10000 [3:25:11<10:22,  2.09it/s]

tf.Tensor([[0.7631352  0.16812424 0.04101924 0.02772128]], shape=(1, 4), dtype=float32)


 88%|██████████████████████████████████████████████████████████████████         | 8801/10000 [3:25:58<06:59,  2.86it/s]

3.4430875665990968
100


 89%|██████████████████████████████████████████████████████████████████▊        | 8901/10000 [3:26:40<07:20,  2.49it/s]

3.8264168146196953
100
[0.7, 0.2, 0.6, 0.2]
tf.Tensor([[0.0413103  0.31103882 0.543312   0.10433882]], shape=(1, 4), dtype=float32)
[0.4, 0.2, 0.6, 0.2]
tf.Tensor([[0.05524544 0.3238431  0.50646985 0.11444171]], shape=(1, 4), dtype=float32)
[0.7, 0.2, 0.6, 0.2]
tf.Tensor([[0.1443309  0.3721649  0.37386835 0.10963586]], shape=(1, 4), dtype=float32)
[0.4, 0.2, 0.6, 0.2]
tf.Tensor([[0.72620815 0.19876654 0.05004648 0.02497889]], shape=(1, 4), dtype=float32)
[0.5, 0.2, 0.6, 0.2]
tf.Tensor([[0.75244856 0.18446428 0.04281622 0.02027099]], shape=(1, 4), dtype=float32)


 90%|███████████████████████████████████████████████████████████████████▌       | 9001/10000 [3:27:23<08:29,  1.96it/s]

2.8325997224090402
100


 91%|████████████████████████████████████████████████████████████████████▎      | 9101/10000 [3:28:05<05:59,  2.50it/s]

3.8802628593371833
100
[0.4, 0.7, 0.1, 0.1]
tf.Tensor([[0.02823104 0.2825974  0.5956834  0.0934882 ]], shape=(1, 4), dtype=float32)
[0.4, 0.4, 0.1, 0.1]
tf.Tensor([[0.0429085  0.29954052 0.55177915 0.10577184]], shape=(1, 4), dtype=float32)
[0.1, 0.4, 0.1, 0.1]
tf.Tensor([[0.0841673  0.3352256  0.47840413 0.10220301]], shape=(1, 4), dtype=float32)


 92%|█████████████████████████████████████████████████████████████████████      | 9201/10000 [3:28:53<07:55,  1.68it/s]

2.4013215610471432
99


 93%|█████████████████████████████████████████████████████████████████████▊     | 9301/10000 [3:29:39<05:38,  2.06it/s]

3.016279726806402
100
[0.4, 0.7, 0.7, 0.4]
tf.Tensor([[0.02571296 0.2731437  0.6133188  0.08782447]], shape=(1, 4), dtype=float32)
[0.7, 0.7, 0.7, 0.4]
tf.Tensor([[0.03101257 0.28080747 0.5948688  0.0933112 ]], shape=(1, 4), dtype=float32)


 94%|██████████████████████████████████████████████████████████████████████▌    | 9400/10000 [3:30:28<04:29,  2.23it/s]

2.995764841815749
100


 95%|███████████████████████████████████████████████████████████████████████▎   | 9501/10000 [3:31:29<04:41,  1.77it/s]

1.9491451568471176
100
[0.7, 0.3, 0.1, 0.1]
tf.Tensor([[0.0192521  0.2567615  0.6531954  0.07079102]], shape=(1, 4), dtype=float32)
[0.4, 0.3, 0.1, 0.1]
tf.Tensor([[0.02848057 0.27369788 0.6145402  0.0832813 ]], shape=(1, 4), dtype=float32)
[0.1, 0.3, 0.1, 0.1]
tf.Tensor([[0.04491468 0.30479032 0.56308806 0.08720694]], shape=(1, 4), dtype=float32)
[0.1, 0.0, 0.1, 0.1]
tf.Tensor([[0.38588282 0.37112114 0.1957373  0.0472587 ]], shape=(1, 4), dtype=float32)


 96%|████████████████████████████████████████████████████████████████████████   | 9600/10000 [3:32:24<04:20,  1.53it/s]

-0.33433559178366723
100


 97%|████████████████████████████████████████████████████████████████████████▊  | 9701/10000 [3:33:23<03:45,  1.33it/s]

-0.21075767434868214
100
[0.2, 0.5, 0.2, 0.9]
tf.Tensor([[0.01073484 0.21830864 0.7184237  0.05253278]], shape=(1, 4), dtype=float32)
[0.2, 0.8, 0.2, 0.9]
tf.Tensor([[0.01506078 0.23312251 0.69108987 0.0607268 ]], shape=(1, 4), dtype=float32)
[0.2, 1.0, 0.2, 0.9]
tf.Tensor([[0.02033024 0.24872714 0.66935146 0.06159122]], shape=(1, 4), dtype=float32)
[0.2, 0.7, 0.2, 0.9]
tf.Tensor([[0.19783883 0.4035843  0.35063604 0.04794081]], shape=(1, 4), dtype=float32)


 98%|█████████████████████████████████████████████████████████████████████████▌ | 9801/10000 [3:34:29<02:34,  1.29it/s]

-1.871527848345127
99


 99%|██████████████████████████████████████████████████████████████████████████▎| 9900/10000 [3:35:47<01:03,  1.58it/s]

-4.017943176209828
94
[0.6, 0.7, 0.6, 0.2]
tf.Tensor([[0.00528828 0.17147344 0.7860138  0.03722442]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.6, 0.2]
tf.Tensor([[0.00639358 0.18035056 0.77308106 0.04017481]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.6, 0.2]
tf.Tensor([[0.01326304 0.21159863 0.7242547  0.05088358]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.6, 0.2]
tf.Tensor([[0.1278641  0.3461463  0.4854215  0.04056813]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.6, 0.2]
tf.Tensor([[0.14036207 0.34577325 0.46725863 0.04660609]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.6, 0.2]
tf.Tensor([[0.11982757 0.34137014 0.4935509  0.0452515 ]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.6, 0.2]
tf.Tensor([[0.14036207 0.34577325 0.46725863 0.04660609]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.6, 0.2]
tf.Tensor([[0.11982757 0.34137014 0.4935509  0.0452515 ]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.6, 0.2]
tf.Tensor([[0.14036207 0.34577325 0.46725863 0.04660609]], shape=(1, 4), dtype=float3

 99%|██████████████████████████████████████████████████████████████████████████▎| 9901/10000 [3:35:48<01:00,  1.65it/s]


[0.6, 0.4, 0.6, 0.2]
tf.Tensor([[0.11982757 0.34137014 0.4935509  0.0452515 ]], shape=(1, 4), dtype=float32)
[0.6, 0.1, 0.6, 0.2]
tf.Tensor([[0.14036207 0.34577325 0.46725863 0.04660609]], shape=(1, 4), dtype=float32)
[0.6, 0.4, 0.6, 0.2]
tf.Tensor([[0.11982757 0.34137014 0.4935509  0.0452515 ]], shape=(1, 4), dtype=float32)


100%|██████████████████████████████████████████████████████████████████████████| 10000/10000 [3:37:20<00:00,  1.30s/it]


In [6]:
agent.trial()

[0.3, 0.2, 0.2, 0.9]
tf.Tensor([[0.00582913 0.17895536 0.7793471  0.03586836]], shape=(1, 4), dtype=float32)
[0.3, 0.5, 0.2, 0.9]
tf.Tensor([[0.00809922 0.19222458 0.7576188  0.04205743]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.0160022  0.2217195  0.7092075  0.05307089]], shape=(1, 4), dtype=float32)
[0.0, 0.8, 0.2, 0.9]
tf.Tensor([[0.0756771  0.30847886 0.57068694 0.04515709]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.10498974 0.32770896 0.5189384  0.04836291]], shape=(1, 4), dtype=float32)
[0.0, 0.8, 0.2, 0.9]
tf.Tensor([[0.11071416 0.3404957  0.5080102  0.04077994]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.11487425 0.33421937 0.5067677  0.04413863]], shape=(1, 4), dtype=float32)
[0.0, 0.8, 0.2, 0.9]
tf.Tensor([[0.11071416 0.3404957  0.5080102  0.04077994]], shape=(1, 4), dtype=float32)
[0.3, 0.8, 0.2, 0.9]
tf.Tensor([[0.11487425 0.33421937 0.5067677  0.04413863]], shape=(1, 4), dtype=float32)
[0.0, 0.8, 0.2, 0.9