In [1]:
from sac import SAC
from replay_memory import ReplayMemory
import gym
from gym import spaces
from addict import Dict
import numpy as np
import itertools
from scipy.special import softmax
import random
from tqdm import tqdm

In [2]:
WARM_UP = 0
MEMORY_SIZE = 10000
ACT_THRESHOLD = 0
BATCH_SIZE = 64
UPDATES_PER_STEP = 1
PRINT_FREQ = 1000
TEST_EPISODES = 1000
MIN_REQUIRED_UPDATES = 0

In [3]:
num_inputs = 4
num_hidden = 4
num_outputs = 1
bandwidth = 3

In [4]:
env = gym.make('CartPole-v0')
env.action_space.shape = (1,)
env.action_space.high = np.array([1])
env.action_space.low = np.array([0])

In [5]:
input_layer_action_space = gym.spaces.Box(low=0, high=1, shape=(num_hidden + bandwidth,))
hidden_layer_action_space = gym.spaces.Box(low=0, high=1, shape=(num_outputs + bandwidth,))
output_layer_action_space = env.action_space



In [6]:
args = Dict()
args.gamma = 0.99
args.tau = 1
args.alpha = 0.0
args.policy = 'Gaussian'
args.target_update_interval = 400
args.automatic_entropy_tuning = False
args.cuda = False
args.hidden_size = 256
args.lr = 0.003

In [7]:
input_layer = [SAC(1, input_layer_action_space, args) for _ in range(num_inputs)]
input_memory = [ReplayMemory(MEMORY_SIZE) for _ in range(num_inputs)]
input_updates = [0 for _ in range(num_inputs)]
hidden_layer = [SAC(bandwidth, hidden_layer_action_space, args) for _ in range(num_hidden)]
hidden_memory = [ReplayMemory(MEMORY_SIZE) for _ in range(num_hidden)]
hidden_updates = [0 for _ in range(num_hidden)]
output_layer = [SAC(bandwidth, output_layer_action_space, args) for _ in range(num_outputs)]
output_memory = [ReplayMemory(MEMORY_SIZE) for _ in range(num_outputs)]
output_updates = [0 for _ in range(num_outputs)]

In [8]:
def eval_layers(input_state):
    isa = [tuple() for _ in range(num_inputs)]
    hsa = [tuple() for _ in range(num_hidden)]
    osa = [tuple() for _ in range(num_outputs)]
    input_actions = [agent.select_action(input_state[i:i+1]) for i, agent in enumerate(input_layer)]
    for i, input_action in enumerate(input_actions):
        input_action[:num_hidden] = softmax(input_action[:num_hidden])
        if max(input_action[:num_hidden]) > ACT_THRESHOLD:
            isa[i] = (input_state[i:i+1], input_action, True)
            input_actions[i] = (np.argmax(input_action[:num_hidden]), input_action[num_hidden:])
        else:
            isa[i] = (input_state[i:i+1], input_action, False)
            input_actions[i] = tuple()
    hidden_state = [tuple() for _ in range(num_hidden)]
    for input_action in input_actions:
        try:
            hidden_i, hidden_msg = input_action
        except:
            continue
        try:
            hidden_state[hidden_i] += hidden_msg
        except:
            hidden_state[hidden_i] = hidden_msg
    hidden_actions = []
    for i, _hidden_state in enumerate(hidden_state):
        if len(_hidden_state):
            hidden_action = hidden_layer[i].select_action(softmax(hidden_state[i])) 
            if hidden_action[0] > ACT_THRESHOLD:
                hsa[i] = (_hidden_state, hidden_action, True)
                hidden_actions.append((0, hidden_action[1:]))
            else:
                hsa[i] = (_hidden_state, hidden_action, False)
                hidden_actions.append(tuple())
    output_state = [tuple() for _ in range(num_outputs)]
    for hidden_action in hidden_actions:
        try:
            output_i, output_msg = hidden_action
        except:
            continue
        try:
            output_state[output_i] += output_msg
        except:
            output_state[output_i] = output_msg
    output_actions = [agent.select_action(softmax(output_state[i])) for i, agent in enumerate(output_layer) if len(output_state[i])]
    if output_actions:
        osa = [(output_state[0], output_actions[0], True)]
    else:
        osa = [tuple()]
    inner_activations = Dict()
    inner_activations.isa = isa
    inner_activations.osa = osa
    inner_activations.hsa = hsa
    try:
        if output_actions[0] > 0.5:
            return 1, inner_activations
        else:
            return 0, inner_activations
    except:
        return random.randint(0, 1), inner_activations

In [9]:
def sample_layers(input_state):
    isa = [tuple() for _ in range(num_inputs)]
    hsa = [tuple() for _ in range(num_hidden)]
    osa = [tuple() for _ in range(num_outputs)]
    input_actions = [input_layer_action_space.sample() for _ in input_layer]
    for i, input_action in enumerate(input_actions):
        input_action[:num_hidden] = softmax(input_action[:num_hidden])
        if max(input_action[:num_hidden]) > ACT_THRESHOLD:
            isa[i] = (input_state[i:i+1], input_action, True)
            input_actions[i] = (np.argmax(input_action[:num_hidden]), input_action[num_hidden:])
        else:
            isa[i] = (input_state[i:i+1], input_action, False)
            input_actions[i] = tuple()
    hidden_state = [tuple() for _ in range(num_hidden)]
    for input_action in input_actions:
        try:
            hidden_i, hidden_msg = input_action
        except:
            continue
        try:
            hidden_state[hidden_i] += hidden_msg
        except:
            hidden_state[hidden_i] = hidden_msg
    hidden_actions = []
    for i, _hidden_state in enumerate(hidden_state):
        if len(_hidden_state):
            hidden_action = hidden_layer_action_space.sample()
            if hidden_action[0] > ACT_THRESHOLD:
                hidden_actions.append((0, hidden_action[1:]))
                hsa[i] = (_hidden_state, hidden_action, True)
            else:
                hidden_actions.append(tuple())
                hsa[i] = (_hidden_state, hidden_action, False)
    output_state = [tuple() for _ in range(num_outputs)]
    for hidden_action in hidden_actions:
        try:
            output_i, output_msg = hidden_action
        except:
            continue
        try:
            output_state[output_i] += output_msg
        except:
            output_state[output_i] = output_msg
    output_actions = [np.array([output_layer_action_space.sample()]) for i, _ in enumerate(output_layer) if len(output_state[i])]
    osa = [(output_state[0], output_actions[0], True)]
    inner_activations = Dict()
    inner_activations.isa = isa
    inner_activations.osa = osa
    inner_activations.hsa = hsa
    try:
        if output_actions[0] > 0.5:
            return 1, inner_activations
        else:
            return 0, inner_activations
    except:
        return random.randint(0, 1), inner_activations

In [10]:
def push_memory(inner_activations, reward, next_state, mask):
    isa = inner_activations.isa
    hsa = inner_activations.hsa
    osa = inner_activations.osa
    for i, ((state, action, flag), mem) in enumerate(zip(isa, input_memory)):
        _next_state = next_state[i:i+1]
        if flag:
            mem.push(state, action, reward, _next_state, mask)
        else:
            mem.push(state, action, 0, _next_state, mask)
    next_action, next_inner_activations = eval_layers(next_state)
    for hsa, mem, next_hsa in zip(hsa, hidden_memory, next_inner_activations.hsa):
        try:
            state, action, flag = hsa
            _next_state, _, _ = next_hsa
        except:
            continue
        if flag:
            mem.push(state, action, reward, _next_state, mask)
        else:
            mem.push(state, action, 0, _next_state, mask)
    for osa, mem, next_osa in zip(osa, output_memory, next_inner_activations.osa):
        try:
            state, action, flag = osa
            _next_state, _, _ = next_osa
        except:
            continue
        if flag:
            mem.push(state, action, reward, _next_state, mask)
        else:
            mem.push(state, action, 0, _next_state, mask)
    return next_action, next_inner_activations

In [11]:
def norm_stats(stats):
    for k in stats:
        stats[k] /= stats.cnt
    return stats
def average_stats(lst):
    avg = Dict()
    for stats in lst:
        for k in stats:
            avg[k] += stats[k]
        avg.cnt += 1
    return norm_stats(avg)
def get_avg_loss(train_stats):
    try:
        input_loss = (train_stats.input.critic_1_loss + train_stats.input.critic_2_loss)/2
    except:
        input_loss = float('nan')
    try:
        hidden_loss = (train_stats.hidden.critic_1_loss + train_stats.hidden.critic_2_loss)/2
    except:
        hidden_loss = float('nan')
    try:
        output_loss = (train_stats.output.critic_1_loss + train_stats.output.critic_2_loss)/2
    except:
        output_loss = float('nan')
    return input_loss, hidden_loss, output_loss

In [None]:
total_steps = 0
last_total_steps = 0
total_reward = 0.0
act_stats = Dict()
act_stats.input = np.array([0.0 for _ in input_layer])
act_stats.hidden = np.array([0.0 for _ in hidden_layer])
act_stats.output = np.array([0.0 for _ in output_layer])
for i_episode in itertools.count(1):
    episode_reward = 0
    episode_steps = 0
    done = False
    state = env.reset()
    inner_activations = {}
    action = None
    train_stats = Dict()
    train_stats.input = []
    train_stats.hidden = []
    train_stats.output = []
    while not done:
        if WARM_UP > total_steps:
            action, inner_activations = sample_layers(state)  # Sample random action
        else:
            if total_steps == WARM_UP:
                print('--WARM UP ENDED--'*3)
            if not inner_activations:
                action, inner_activations = eval_layers(state)  # Sample action from policy
        for i, isa in enumerate(inner_activations.isa):
            if len(isa):
                act_stats.input[i] += 1
        for i, hsa in enumerate(inner_activations.hsa):
            if len(hsa):
                act_stats.hidden[i] += 1
        for i, osa in enumerate(inner_activations.osa):
            if len(osa):
                act_stats.output[i] += 1

        # Number of updates per step in environment
        for i in range(UPDATES_PER_STEP):
            # Update parameters of all the networks
            for j, (agent, memory, updates) in enumerate(zip(input_layer, input_memory, input_updates)):
                input_stats = Dict()
                if len(memory) > BATCH_SIZE and inner_activations.isa[j]:
                    critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = agent.update_parameters(memory, BATCH_SIZE, updates)
                    input_stats.critic_1_loss += critic_1_loss
                    input_stats.critic_2_loss += critic_2_loss
                    input_stats.policy_loss += policy_loss
                    input_stats.ent_loss += ent_loss
                    input_stats.alpha += alpha
                    input_stats.cnt += 1
                    input_updates[j] += 1
                norm_stats(input_stats)
                train_stats.input.append(input_stats)
            for j, (agent, memory, updates) in enumerate(zip(hidden_layer, hidden_memory, hidden_updates)):
                hidden_stats = Dict()
                if len(memory) > BATCH_SIZE and inner_activations.hsa[j]:
                    critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = agent.update_parameters(memory, BATCH_SIZE, updates)
                    hidden_stats.critic_1_loss += critic_1_loss
                    hidden_stats.critic_2_loss += critic_2_loss
                    hidden_stats.policy_loss += policy_loss
                    hidden_stats.ent_loss += ent_loss
                    hidden_stats.alpha += alpha
                    hidden_stats.cnt += 1
                    hidden_updates[j] += 1
                norm_stats(hidden_stats)
                train_stats.hidden.append(hidden_stats)
            for j, (agent, memory, updates) in enumerate(zip(output_layer, output_memory, output_updates)):
                output_stats = Dict()
                if len(memory) > BATCH_SIZE and inner_activations.osa[j]:
                    critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = agent.update_parameters(memory, BATCH_SIZE, updates)
                    output_stats.critic_1_loss += critic_1_loss
                    output_stats.critic_2_loss += critic_2_loss
                    output_stats.policy_loss += policy_loss
                    output_stats.ent_loss += ent_loss
                    output_stats.alpha += alpha
                    output_stats.cnt += 1
                    output_updates[j] += 1
                norm_stats(output_stats)
                train_stats.output.append(output_stats)

        next_state, reward, done, _ = env.step(action) # Step
        episode_steps += 1
        total_steps += 1
        episode_reward += reward

        # Ignore the "done" signal if it comes from hitting the time horizon.
        # (https://github.com/openai/spinningup/blob/master/spinup/algos/sac/sac.py)
        mask = 1 if episode_steps == env._max_episode_steps else float(not done)
        action, inner_activations = push_memory(inner_activations, reward, next_state, mask)

        state = next_state
    total_reward += episode_reward
    train_stats.input = average_stats(train_stats.input)
    train_stats.hidden = average_stats(train_stats.hidden)
    train_stats.output = average_stats(train_stats.output)
    loss = get_avg_loss(train_stats)
    print("Episode: {}, episode steps: {}, avg. reward: {}, loss_i: {}, loss_h: {}, loss_o: {}".format(i_episode, episode_steps, round(total_reward/((i_episode % PRINT_FREQ) or PRINT_FREQ), 2), *list(map(lambda x : round(x, 3), loss))))
    if i_episode % args.target_update_interval == 0 and total_steps > 0:
        ptr = -1
        input_update_map = np.array(input_updates) >= MIN_REQUIRED_UPDATES
        hidden_update_map = np.array(hidden_updates) >= MIN_REQUIRED_UPDATES
        output_update_map = np.array(output_updates) >= MIN_REQUIRED_UPDATES
        for j, agent, memory, updates in zip(list(range(num_inputs)) + list(range(num_hidden)) + list(range(num_outputs)), input_layer + hidden_layer + output_layer, input_memory + hidden_memory + output_memory, input_updates + hidden_updates + output_updates):
            if j == 0:
                ptr += 1
            if updates >= MIN_REQUIRED_UPDATES:
                agent.update_target()
                memory.empty()
                if ptr == 0:
                    input_updates[j] = 0
                elif ptr == 1:
                    hidden_updates[j] = 0
                else:
                    output_updates[j] = 0
        print('--TARGET UPDATED--'*3)
        print('i: %s' % input_update_map)
        print('h: %s' % hidden_update_map)
        print('o: %s' % output_update_map)
        print('-'*80)
    if i_episode % PRINT_FREQ == 0:
        act_stats.input /= total_steps - last_total_steps
        act_stats.hidden /= total_steps - last_total_steps
        act_stats.output /= total_steps - last_total_steps
        last_total_steps = total_steps
        total_reward = 0
        print("-"*80)
        print('Train episodes: %d' % PRINT_FREQ)
        print('-'*80)
        print('LOSS')
        print('i: %s' % str(train_stats.input))
        print('h: %s' % str(train_stats.hidden))
        print('o: %s' % str(train_stats.output))
        print('ACTIVATIONS')
        print('i: %s' % str(list(map(lambda x : round(x, 2), act_stats.input))))
        print('h: %s' % str(list(map(lambda x : round(x, 2), act_stats.hidden))))
        print('o: %s' % str(list(map(lambda x : round(x, 2), act_stats.output))))
        print('UPDATES')
        print('i: %s' % input_updates)
        print('h: %s' % hidden_updates)
        print('o: %s' % output_updates)
        print('MEMORY')
        print('i: %s' % str(list(map(len, input_memory))))
        print('h: %s' % str(list(map(len, hidden_memory))))
        print('o: %s' % str(list(map(len, output_memory))))
        act_stats.input = np.array([0.0 for _ in input_layer])
        act_stats.hidden = np.array([0.0 for _ in hidden_layer])
        act_stats.output = np.array([0.0 for _ in output_layer])
        print('\n')
        avg_reward = 0.
        max_reward = 0.
        for _  in tqdm(range(TEST_EPISODES)):
            state = env.reset()
            episode_reward = 0
            done = False
            while not done:
                action, _ = eval_layers(state)

                next_state, reward, done, _ = env.step(action)
                episode_reward += reward


                state = next_state
            avg_reward += episode_reward
            max_reward = max(episode_reward, max_reward)
        avg_reward /= TEST_EPISODES

        print("-"*80)
        print("Test Episodes: {}, Avg. Reward: {}, Max. Reward: {}".format(TEST_EPISODES, round(avg_reward, 2), round(max_reward, 2)))
        print("-"*80)

Episode: 1, episode steps: 16, avg. reward: 16.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2, episode steps: 24, avg. reward: 20.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 3, episode steps: 54, avg. reward: 31.33, loss_i: 61.477, loss_h: nan, loss_o: 6.253
Episode: 4, episode steps: 20, avg. reward: 28.5, loss_i: 0.081, loss_h: nan, loss_o: 0.03
Episode: 5, episode steps: 33, avg. reward: 29.4, loss_i: 0.034, loss_h: nan, loss_o: 0.028
Episode: 6, episode steps: 33, avg. reward: 30.0, loss_i: 0.02, loss_h: nan, loss_o: 0.025
Episode: 7, episode steps: 40, avg. reward: 31.43, loss_i: 0.016, loss_h: nan, loss_o: 0.022
Episode: 8, episode steps: 9, avg. reward: 28.62, loss_i: 0.015, loss_h: nan, loss_o: 0.021
Episode: 9, episode steps: 36, avg. reward: 29.44, loss_i: 0.013, loss_h: nan, loss_o: 0.019
Episode: 10, episode steps: 10, avg. reward: 27.5, loss_i: 0.013, loss_h: nan, loss_o: 0.018
Episode: 11, episode steps: 30, avg. reward: 27.73, loss_i: 0.012, loss_h: nan, loss_

Episode: 88, episode steps: 45, avg. reward: 22.52, loss_i: 0.002, loss_h: 2.072, loss_o: 0.001
Episode: 89, episode steps: 10, avg. reward: 22.38, loss_i: 0.002, loss_h: 0.439, loss_o: 0.001
Episode: 90, episode steps: 37, avg. reward: 22.54, loss_i: 0.002, loss_h: 1.563, loss_o: 0.001
Episode: 91, episode steps: 13, avg. reward: 22.44, loss_i: 0.002, loss_h: 0.582, loss_o: 0.001
Episode: 92, episode steps: 20, avg. reward: 22.41, loss_i: 0.002, loss_h: 0.833, loss_o: 0.001
Episode: 93, episode steps: 22, avg. reward: 22.41, loss_i: 0.002, loss_h: 0.882, loss_o: 0.001
Episode: 94, episode steps: 27, avg. reward: 22.46, loss_i: 0.002, loss_h: 0.004, loss_o: 0.001
Episode: 95, episode steps: 61, avg. reward: 22.86, loss_i: 0.002, loss_h: 2.438, loss_o: 0.001
Episode: 96, episode steps: 43, avg. reward: 23.07, loss_i: 0.002, loss_h: 1.744, loss_o: 0.001
Episode: 97, episode steps: 21, avg. reward: 23.05, loss_i: 0.002, loss_h: 0.825, loss_o: 0.001
Episode: 98, episode steps: 13, avg. rew

Episode: 173, episode steps: 23, avg. reward: 22.9, loss_i: 0.001, loss_h: 0.532, loss_o: 0.0
Episode: 174, episode steps: 11, avg. reward: 22.83, loss_i: 0.001, loss_h: 0.256, loss_o: 0.001
Episode: 175, episode steps: 16, avg. reward: 22.79, loss_i: 0.001, loss_h: 0.345, loss_o: 0.0
Episode: 176, episode steps: 31, avg. reward: 22.84, loss_i: 0.001, loss_h: 0.002, loss_o: 0.001
Episode: 177, episode steps: 36, avg. reward: 22.92, loss_i: 0.001, loss_h: 0.774, loss_o: 0.0
Episode: 178, episode steps: 33, avg. reward: 22.97, loss_i: 0.001, loss_h: 0.002, loss_o: 0.0
Episode: 179, episode steps: 16, avg. reward: 22.93, loss_i: 0.001, loss_h: 0.332, loss_o: 0.001
Episode: 180, episode steps: 12, avg. reward: 22.87, loss_i: 0.001, loss_h: 0.262, loss_o: 0.0
Episode: 181, episode steps: 20, avg. reward: 22.86, loss_i: 0.001, loss_h: 0.427, loss_o: 0.0
Episode: 182, episode steps: 46, avg. reward: 22.98, loss_i: 0.001, loss_h: 0.002, loss_o: 0.0
Episode: 183, episode steps: 16, avg. reward:

Episode: 260, episode steps: 40, avg. reward: 22.53, loss_i: 0.0, loss_h: 0.717, loss_o: 0.0
Episode: 261, episode steps: 34, avg. reward: 22.58, loss_i: 0.0, loss_h: 0.001, loss_o: 0.0
Episode: 262, episode steps: 38, avg. reward: 22.64, loss_i: 0.0, loss_h: 0.607, loss_o: 0.0
Episode: 263, episode steps: 13, avg. reward: 22.6, loss_i: 0.001, loss_h: 0.191, loss_o: 0.0
Episode: 264, episode steps: 13, avg. reward: 22.56, loss_i: 0.001, loss_h: 0.204, loss_o: 0.0
Episode: 265, episode steps: 12, avg. reward: 22.52, loss_i: 0.0, loss_h: 0.236, loss_o: 0.0
Episode: 266, episode steps: 19, avg. reward: 22.51, loss_i: 0.0, loss_h: 0.002, loss_o: 0.0
Episode: 267, episode steps: 9, avg. reward: 22.46, loss_i: 0.0, loss_h: 0.156, loss_o: 0.0
Episode: 268, episode steps: 11, avg. reward: 22.42, loss_i: 0.0, loss_h: 0.001, loss_o: 0.0
Episode: 269, episode steps: 24, avg. reward: 22.42, loss_i: 0.0, loss_h: 0.369, loss_o: 0.0
Episode: 270, episode steps: 12, avg. reward: 22.39, loss_i: 0.0, lo

Episode: 349, episode steps: 18, avg. reward: 22.73, loss_i: 0.0, loss_h: 0.001, loss_o: 0.0
Episode: 350, episode steps: 14, avg. reward: 22.7, loss_i: 0.0, loss_h: 0.194, loss_o: 0.0
Episode: 351, episode steps: 36, avg. reward: 22.74, loss_i: 0.0, loss_h: 0.493, loss_o: 0.0
Episode: 352, episode steps: 26, avg. reward: 22.75, loss_i: 0.0, loss_h: 0.382, loss_o: 0.0
Episode: 353, episode steps: 16, avg. reward: 22.73, loss_i: 0.0, loss_h: 0.199, loss_o: 0.0
Episode: 354, episode steps: 12, avg. reward: 22.7, loss_i: 0.0, loss_h: 0.001, loss_o: 0.0
Episode: 355, episode steps: 15, avg. reward: 22.68, loss_i: 0.0, loss_h: 0.179, loss_o: 0.0
Episode: 356, episode steps: 18, avg. reward: 22.67, loss_i: 0.0, loss_h: 0.272, loss_o: 0.0
Episode: 357, episode steps: 41, avg. reward: 22.72, loss_i: 0.0, loss_h: 0.001, loss_o: 0.0
Episode: 358, episode steps: 23, avg. reward: 22.72, loss_i: 0.0, loss_h: 0.001, loss_o: 0.0
Episode: 359, episode steps: 17, avg. reward: 22.7, loss_i: 0.0, loss_h:

Episode: 434, episode steps: 34, avg. reward: 22.36, loss_i: 0.078, loss_h: 6.345, loss_o: 0.025
Episode: 435, episode steps: 9, avg. reward: 22.33, loss_i: 0.065, loss_h: 1.241, loss_o: 0.023
Episode: 436, episode steps: 16, avg. reward: 22.31, loss_i: 0.07, loss_h: 2.354, loss_o: 0.024
Episode: 437, episode steps: 34, avg. reward: 22.34, loss_i: 0.073, loss_h: 5.316, loss_o: 0.028
Episode: 438, episode steps: 30, avg. reward: 22.36, loss_i: 0.071, loss_h: 0.016, loss_o: 0.025
Episode: 439, episode steps: 27, avg. reward: 22.37, loss_i: 0.068, loss_h: 0.016, loss_o: 0.026
Episode: 440, episode steps: 21, avg. reward: 22.37, loss_i: 0.065, loss_h: 3.109, loss_o: 0.026
Episode: 441, episode steps: 12, avg. reward: 22.34, loss_i: 0.069, loss_h: 1.729, loss_o: 0.026
Episode: 442, episode steps: 13, avg. reward: 22.32, loss_i: 0.077, loss_h: 1.999, loss_o: 0.028
Episode: 443, episode steps: 27, avg. reward: 22.33, loss_i: 0.069, loss_h: 0.013, loss_o: 0.027
Episode: 444, episode steps: 16,

Episode: 519, episode steps: 26, avg. reward: 22.15, loss_i: 0.067, loss_h: 0.01, loss_o: 0.025
Episode: 520, episode steps: 18, avg. reward: 22.14, loss_i: 0.062, loss_h: 0.011, loss_o: 0.025
Episode: 521, episode steps: 25, avg. reward: 22.14, loss_i: 0.069, loss_h: 0.01, loss_o: 0.021
Episode: 522, episode steps: 27, avg. reward: 22.15, loss_i: 0.065, loss_h: 0.011, loss_o: 0.02
Episode: 523, episode steps: 12, avg. reward: 22.13, loss_i: 0.069, loss_h: 0.012, loss_o: 0.021
Episode: 524, episode steps: 42, avg. reward: 22.17, loss_i: 0.066, loss_h: 4.71, loss_o: 0.021
Episode: 525, episode steps: 19, avg. reward: 22.17, loss_i: 0.068, loss_h: 2.357, loss_o: 0.02
Episode: 526, episode steps: 25, avg. reward: 22.17, loss_i: 0.063, loss_h: 0.01, loss_o: 0.02
Episode: 527, episode steps: 21, avg. reward: 22.17, loss_i: 0.071, loss_h: 2.384, loss_o: 0.023
Episode: 528, episode steps: 26, avg. reward: 22.18, loss_i: 0.068, loss_h: 3.098, loss_o: 0.021
Episode: 529, episode steps: 17, avg.

Episode: 604, episode steps: 21, avg. reward: 21.78, loss_i: 0.061, loss_h: 2.49, loss_o: 0.021
Episode: 605, episode steps: 11, avg. reward: 21.76, loss_i: 0.062, loss_h: 1.216, loss_o: 0.034
Episode: 606, episode steps: 31, avg. reward: 21.78, loss_i: 0.067, loss_h: 0.011, loss_o: 0.024
Episode: 607, episode steps: 30, avg. reward: 21.79, loss_i: 0.06, loss_h: 3.779, loss_o: 0.024
Episode: 608, episode steps: 25, avg. reward: 21.8, loss_i: 0.065, loss_h: 0.01, loss_o: 0.03
Episode: 609, episode steps: 8, avg. reward: 21.77, loss_i: 0.065, loss_h: 1.076, loss_o: 0.021
Episode: 610, episode steps: 15, avg. reward: 21.76, loss_i: 0.068, loss_h: 0.01, loss_o: 0.021
Episode: 611, episode steps: 17, avg. reward: 21.75, loss_i: 0.062, loss_h: 1.822, loss_o: 0.024
Episode: 612, episode steps: 11, avg. reward: 21.74, loss_i: 0.079, loss_h: 0.011, loss_o: 0.022
Episode: 613, episode steps: 21, avg. reward: 21.74, loss_i: 0.07, loss_h: 0.01, loss_o: 0.021
Episode: 614, episode steps: 15, avg. r

Episode: 689, episode steps: 12, avg. reward: 22.11, loss_i: 0.061, loss_h: 1.383, loss_o: 0.023
Episode: 690, episode steps: 32, avg. reward: 22.13, loss_i: 0.065, loss_h: 0.009, loss_o: 0.021
Episode: 691, episode steps: 16, avg. reward: 22.12, loss_i: 0.065, loss_h: 0.01, loss_o: 0.016
Episode: 692, episode steps: 20, avg. reward: 22.12, loss_i: 0.064, loss_h: 0.008, loss_o: 0.02
Episode: 693, episode steps: 25, avg. reward: 22.12, loss_i: 0.066, loss_h: 2.444, loss_o: 0.019
Episode: 694, episode steps: 12, avg. reward: 22.11, loss_i: 0.059, loss_h: 1.348, loss_o: 0.024
Episode: 695, episode steps: 22, avg. reward: 22.11, loss_i: 0.071, loss_h: 2.565, loss_o: 0.021
Episode: 696, episode steps: 20, avg. reward: 22.1, loss_i: 0.062, loss_h: 2.031, loss_o: 0.024
Episode: 697, episode steps: 76, avg. reward: 22.18, loss_i: 0.064, loss_h: 7.039, loss_o: 0.021
Episode: 698, episode steps: 10, avg. reward: 22.16, loss_i: 0.062, loss_h: 1.105, loss_o: 0.026
Episode: 699, episode steps: 11, 

Episode: 774, episode steps: 17, avg. reward: 22.11, loss_i: 0.067, loss_h: 1.672, loss_o: 0.025
Episode: 775, episode steps: 18, avg. reward: 22.11, loss_i: 0.061, loss_h: 0.01, loss_o: 0.023
Episode: 776, episode steps: 22, avg. reward: 22.11, loss_i: 0.059, loss_h: 2.664, loss_o: 0.024
Episode: 777, episode steps: 26, avg. reward: 22.11, loss_i: 0.067, loss_h: 2.526, loss_o: 0.02
Episode: 778, episode steps: 44, avg. reward: 22.14, loss_i: 0.061, loss_h: 4.207, loss_o: 0.024
Episode: 779, episode steps: 15, avg. reward: 22.13, loss_i: 0.071, loss_h: 0.009, loss_o: 0.022
Episode: 780, episode steps: 17, avg. reward: 22.12, loss_i: 0.059, loss_h: 0.009, loss_o: 0.023
Episode: 781, episode steps: 42, avg. reward: 22.15, loss_i: 0.062, loss_h: 4.591, loss_o: 0.017
Episode: 782, episode steps: 16, avg. reward: 22.14, loss_i: 0.071, loss_h: 1.443, loss_o: 0.022
Episode: 783, episode steps: 19, avg. reward: 22.14, loss_i: 0.066, loss_h: 0.009, loss_o: 0.022
Episode: 784, episode steps: 45,

Episode: 857, episode steps: 26, avg. reward: 22.23, loss_i: 0.41, loss_h: 10.854, loss_o: 0.064
Episode: 858, episode steps: 12, avg. reward: 22.22, loss_i: 0.366, loss_h: 0.028, loss_o: 0.051
Episode: 859, episode steps: 24, avg. reward: 22.22, loss_i: 0.371, loss_h: 9.405, loss_o: 0.08
Episode: 860, episode steps: 38, avg. reward: 22.24, loss_i: 0.356, loss_h: 12.642, loss_o: 0.066
Episode: 861, episode steps: 53, avg. reward: 22.28, loss_i: 0.37, loss_h: 18.474, loss_o: 0.058
Episode: 862, episode steps: 15, avg. reward: 22.27, loss_i: 0.386, loss_h: 0.028, loss_o: 0.081
Episode: 863, episode steps: 19, avg. reward: 22.27, loss_i: 0.386, loss_h: 0.033, loss_o: 0.077
Episode: 864, episode steps: 21, avg. reward: 22.26, loss_i: 0.398, loss_h: 0.03, loss_o: 0.066
Episode: 865, episode steps: 19, avg. reward: 22.26, loss_i: 0.347, loss_h: 6.711, loss_o: 0.071
Episode: 866, episode steps: 14, avg. reward: 22.25, loss_i: 0.41, loss_h: 0.029, loss_o: 0.078
Episode: 867, episode steps: 19,

Episode: 942, episode steps: 9, avg. reward: 22.04, loss_i: 0.374, loss_h: 0.046, loss_o: 0.058
Episode: 943, episode steps: 18, avg. reward: 22.04, loss_i: 0.432, loss_h: 8.659, loss_o: 0.079
Episode: 944, episode steps: 33, avg. reward: 22.05, loss_i: 0.42, loss_h: 14.651, loss_o: 0.072
Episode: 945, episode steps: 20, avg. reward: 22.05, loss_i: 0.379, loss_h: 8.382, loss_o: 0.074
Episode: 946, episode steps: 16, avg. reward: 22.04, loss_i: 0.435, loss_h: 0.043, loss_o: 0.086
Episode: 947, episode steps: 26, avg. reward: 22.05, loss_i: 0.389, loss_h: 10.716, loss_o: 0.069
Episode: 948, episode steps: 16, avg. reward: 22.04, loss_i: 0.429, loss_h: 6.944, loss_o: 0.07
Episode: 949, episode steps: 18, avg. reward: 22.04, loss_i: 0.433, loss_h: 7.599, loss_o: 0.085
Episode: 950, episode steps: 19, avg. reward: 22.03, loss_i: 0.424, loss_h: 0.041, loss_o: 0.085
Episode: 951, episode steps: 16, avg. reward: 22.03, loss_i: 0.397, loss_h: 0.037, loss_o: 0.079
Episode: 952, episode steps: 84

  0%|          | 2/1000 [00:00<01:31, 10.91it/s]

Episode: 1000, episode steps: 14, avg. reward: 22.03, loss_i: 0.43, loss_h: 5.671, loss_o: 0.096
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 0.4300456516045545, 'critic_2_loss': 0.4302270076171096, 'policy_loss': -4.848784165722983, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'cnt': 1.0, 'critic_1_loss': 5.672813537530601, 'critic_2_loss': 5.669991448987275, 'policy_loss': -126.87473249435425, 'ent_loss': 0.0, 'alpha': 0.0}
o: {'critic_1_loss': 0.09561827419591802, 'critic_2_loss': 0.09578562022319861, 'policy_loss': -1.3969896606036596, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [0.42, 0.42, 0.42, 0.41, 0.42, 0.4, 0.42, 0.41]
o: [1.0]
UPDATES
i: [4281, 4281, 4281, 4281]
h: [1591, 1282, 1689, 1487, 892, 1632, 1704, 1721]
o: [4281]
MEMORY
i: [4346, 4346, 4346, 4346]
h: [516, 265, 875

100%|██████████| 1000/1000 [01:53<00:00,  8.83it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.64, Max. Reward: 15.0
--------------------------------------------------------------------------------
Episode: 1001, episode steps: 32, avg. reward: 32.0, loss_i: 0.412, loss_h: 15.602, loss_o: 0.08
Episode: 1002, episode steps: 25, avg. reward: 28.5, loss_i: 0.418, loss_h: 9.514, loss_o: 0.087
Episode: 1003, episode steps: 18, avg. reward: 25.0, loss_i: 0.398, loss_h: 7.833, loss_o: 0.078
Episode: 1004, episode steps: 34, avg. reward: 27.25, loss_i: 0.408, loss_h: 14.667, loss_o: 0.077
Episode: 1005, episode steps: 20, avg. reward: 25.8, loss_i: 0.41, loss_h: 8.264, loss_o: 0.085
Episode: 1006, episode steps: 37, avg. reward: 27.67, loss_i: 0.409, loss_h: 16.702, loss_o: 0.079
Episode: 1007, episode steps: 28, avg. reward: 27.71, loss_i: 0.418, loss_h: 0.035, loss_o: 0.079
Episode: 1008, episode steps: 19, avg. reward: 26.62, loss_i: 0.435, loss_h: 8.187, loss_o: 0.073

Episode: 1083, episode steps: 17, avg. reward: 23.72, loss_i: 0.433, loss_h: 7.472, loss_o: 0.08
Episode: 1084, episode steps: 20, avg. reward: 23.68, loss_i: 0.397, loss_h: 8.96, loss_o: 0.071
Episode: 1085, episode steps: 14, avg. reward: 23.56, loss_i: 0.449, loss_h: 0.038, loss_o: 0.084
Episode: 1086, episode steps: 10, avg. reward: 23.41, loss_i: 0.441, loss_h: 3.878, loss_o: 0.076
Episode: 1087, episode steps: 17, avg. reward: 23.33, loss_i: 0.369, loss_h: 9.036, loss_o: 0.072
Episode: 1088, episode steps: 15, avg. reward: 23.24, loss_i: 0.438, loss_h: 0.043, loss_o: 0.091
Episode: 1089, episode steps: 18, avg. reward: 23.18, loss_i: 0.438, loss_h: 8.198, loss_o: 0.093
Episode: 1090, episode steps: 23, avg. reward: 23.18, loss_i: 0.438, loss_h: 0.041, loss_o: 0.069
Episode: 1091, episode steps: 20, avg. reward: 23.14, loss_i: 0.344, loss_h: 0.037, loss_o: 0.072
Episode: 1092, episode steps: 30, avg. reward: 23.22, loss_i: 0.395, loss_h: 0.044, loss_o: 0.072
Episode: 1093, episode

Episode: 1167, episode steps: 9, avg. reward: 22.51, loss_i: 0.384, loss_h: 0.047, loss_o: 0.079
Episode: 1168, episode steps: 23, avg. reward: 22.52, loss_i: 0.388, loss_h: 11.453, loss_o: 0.077
Episode: 1169, episode steps: 15, avg. reward: 22.47, loss_i: 0.446, loss_h: 0.043, loss_o: 0.072
Episode: 1170, episode steps: 19, avg. reward: 22.45, loss_i: 0.416, loss_h: 10.017, loss_o: 0.08
Episode: 1171, episode steps: 25, avg. reward: 22.47, loss_i: 0.364, loss_h: 13.415, loss_o: 0.072
Episode: 1172, episode steps: 13, avg. reward: 22.41, loss_i: 0.409, loss_h: 5.768, loss_o: 0.081
Episode: 1173, episode steps: 40, avg. reward: 22.51, loss_i: 0.378, loss_h: 0.045, loss_o: 0.081
Episode: 1174, episode steps: 36, avg. reward: 22.59, loss_i: 0.387, loss_h: 0.042, loss_o: 0.082
Episode: 1175, episode steps: 14, avg. reward: 22.54, loss_i: 0.376, loss_h: 0.041, loss_o: 0.075
Episode: 1176, episode steps: 23, avg. reward: 22.55, loss_i: 0.356, loss_h: 0.042, loss_o: 0.078
Episode: 1177, epis

Episode: 1249, episode steps: 55, avg. reward: 22.09, loss_i: 1.711, loss_h: 0.099, loss_o: 0.162
Episode: 1250, episode steps: 15, avg. reward: 22.06, loss_i: 1.824, loss_h: 0.091, loss_o: 0.126
Episode: 1251, episode steps: 14, avg. reward: 22.03, loss_i: 1.826, loss_h: 0.09, loss_o: 0.205
Episode: 1252, episode steps: 15, avg. reward: 22.0, loss_i: 1.607, loss_h: 0.087, loss_o: 0.166
Episode: 1253, episode steps: 37, avg. reward: 22.06, loss_i: 1.712, loss_h: 0.089, loss_o: 0.155
Episode: 1254, episode steps: 28, avg. reward: 22.09, loss_i: 1.615, loss_h: 27.438, loss_o: 0.18
Episode: 1255, episode steps: 13, avg. reward: 22.05, loss_i: 1.88, loss_h: 10.873, loss_o: 0.198
Episode: 1256, episode steps: 34, avg. reward: 22.1, loss_i: 1.723, loss_h: 0.096, loss_o: 0.173
Episode: 1257, episode steps: 19, avg. reward: 22.09, loss_i: 1.585, loss_h: 0.09, loss_o: 0.156
Episode: 1258, episode steps: 50, avg. reward: 22.19, loss_i: 1.604, loss_h: 0.08, loss_o: 0.166
Episode: 1259, episode st

Episode: 1333, episode steps: 34, avg. reward: 21.81, loss_i: 1.805, loss_h: 37.249, loss_o: 0.18
Episode: 1334, episode steps: 29, avg. reward: 21.83, loss_i: 1.71, loss_h: 30.803, loss_o: 0.181
Episode: 1335, episode steps: 9, avg. reward: 21.79, loss_i: 1.555, loss_h: 0.087, loss_o: 0.195
Episode: 1336, episode steps: 28, avg. reward: 21.81, loss_i: 1.56, loss_h: 0.102, loss_o: 0.186
Episode: 1337, episode steps: 26, avg. reward: 21.82, loss_i: 1.656, loss_h: 0.092, loss_o: 0.131
Episode: 1338, episode steps: 16, avg. reward: 21.81, loss_i: 1.812, loss_h: 16.154, loss_o: 0.168
Episode: 1339, episode steps: 15, avg. reward: 21.79, loss_i: 1.811, loss_h: 0.095, loss_o: 0.113
Episode: 1340, episode steps: 13, avg. reward: 21.76, loss_i: 1.812, loss_h: 13.585, loss_o: 0.185
Episode: 1341, episode steps: 12, avg. reward: 21.73, loss_i: 1.555, loss_h: 13.727, loss_o: 0.151
Episode: 1342, episode steps: 24, avg. reward: 21.74, loss_i: 1.696, loss_h: 0.088, loss_o: 0.171
Episode: 1343, epis

Episode: 1417, episode steps: 35, avg. reward: 21.91, loss_i: 1.549, loss_h: 0.083, loss_o: 0.208
Episode: 1418, episode steps: 20, avg. reward: 21.9, loss_i: 1.605, loss_h: 19.93, loss_o: 0.185
Episode: 1419, episode steps: 22, avg. reward: 21.9, loss_i: 1.575, loss_h: 0.092, loss_o: 0.179
Episode: 1420, episode steps: 22, avg. reward: 21.9, loss_i: 1.651, loss_h: 23.437, loss_o: 0.146
Episode: 1421, episode steps: 14, avg. reward: 21.88, loss_i: 1.831, loss_h: 16.767, loss_o: 0.147
Episode: 1422, episode steps: 24, avg. reward: 21.89, loss_i: 1.592, loss_h: 0.094, loss_o: 0.156
Episode: 1423, episode steps: 12, avg. reward: 21.87, loss_i: 1.482, loss_h: 12.795, loss_o: 0.223
Episode: 1424, episode steps: 34, avg. reward: 21.89, loss_i: 1.524, loss_h: 35.633, loss_o: 0.179
Episode: 1425, episode steps: 24, avg. reward: 21.9, loss_i: 1.65, loss_h: 0.1, loss_o: 0.154
Episode: 1426, episode steps: 38, avg. reward: 21.94, loss_i: 1.612, loss_h: 0.095, loss_o: 0.181
Episode: 1427, episode 

Episode: 1501, episode steps: 20, avg. reward: 21.66, loss_i: 1.468, loss_h: 0.097, loss_o: 0.179
Episode: 1502, episode steps: 12, avg. reward: 21.64, loss_i: 1.83, loss_h: 14.016, loss_o: 0.163
Episode: 1503, episode steps: 14, avg. reward: 21.62, loss_i: 1.553, loss_h: 17.814, loss_o: 0.152
Episode: 1504, episode steps: 16, avg. reward: 21.61, loss_i: 1.638, loss_h: 18.978, loss_o: 0.148
Episode: 1505, episode steps: 13, avg. reward: 21.6, loss_i: 1.679, loss_h: 0.085, loss_o: 0.194
Episode: 1506, episode steps: 36, avg. reward: 21.62, loss_i: 1.625, loss_h: 0.099, loss_o: 0.196
Episode: 1507, episode steps: 21, avg. reward: 21.62, loss_i: 1.628, loss_h: 0.098, loss_o: 0.12
Episode: 1508, episode steps: 23, avg. reward: 21.63, loss_i: 1.54, loss_h: 23.184, loss_o: 0.142
Episode: 1509, episode steps: 40, avg. reward: 21.66, loss_i: 1.598, loss_h: 41.203, loss_o: 0.174
Episode: 1510, episode steps: 12, avg. reward: 21.64, loss_i: 1.527, loss_h: 0.094, loss_o: 0.179
Episode: 1511, epis

Episode: 1585, episode steps: 49, avg. reward: 21.7, loss_i: 1.542, loss_h: 53.194, loss_o: 0.17
Episode: 1586, episode steps: 28, avg. reward: 21.71, loss_i: 1.536, loss_h: 33.343, loss_o: 0.166
Episode: 1587, episode steps: 10, avg. reward: 21.69, loss_i: 1.587, loss_h: 9.98, loss_o: 0.176
Episode: 1588, episode steps: 17, avg. reward: 21.68, loss_i: 1.444, loss_h: 19.451, loss_o: 0.202
Episode: 1589, episode steps: 17, avg. reward: 21.68, loss_i: 1.44, loss_h: 19.538, loss_o: 0.156
Episode: 1590, episode steps: 14, avg. reward: 21.66, loss_i: 1.574, loss_h: 15.63, loss_o: 0.168
Episode: 1591, episode steps: 14, avg. reward: 21.65, loss_i: 1.653, loss_h: 16.33, loss_o: 0.182
Episode: 1592, episode steps: 41, avg. reward: 21.68, loss_i: 1.65, loss_h: 0.108, loss_o: 0.173
Episode: 1593, episode steps: 41, avg. reward: 21.72, loss_i: 1.589, loss_h: 50.959, loss_o: 0.178
Episode: 1594, episode steps: 21, avg. reward: 21.71, loss_i: 1.562, loss_h: 24.728, loss_o: 0.161
Episode: 1595, epis

Episode: 1667, episode steps: 21, avg. reward: 21.89, loss_i: 5.501, loss_h: 34.035, loss_o: 0.261
Episode: 1668, episode steps: 14, avg. reward: 21.88, loss_i: 4.777, loss_h: 23.538, loss_o: 0.214
Episode: 1669, episode steps: 13, avg. reward: 21.87, loss_i: 4.805, loss_h: 20.283, loss_o: 0.286
Episode: 1670, episode steps: 23, avg. reward: 21.87, loss_i: 4.907, loss_h: 38.989, loss_o: 0.246
Episode: 1671, episode steps: 17, avg. reward: 21.86, loss_i: 4.725, loss_h: 0.172, loss_o: 0.255
Episode: 1672, episode steps: 12, avg. reward: 21.85, loss_i: 5.32, loss_h: 17.626, loss_o: 0.254
Episode: 1673, episode steps: 28, avg. reward: 21.85, loss_i: 4.892, loss_h: 50.104, loss_o: 0.283
Episode: 1674, episode steps: 24, avg. reward: 21.86, loss_i: 4.571, loss_h: 0.148, loss_o: 0.27
Episode: 1675, episode steps: 14, avg. reward: 21.85, loss_i: 5.623, loss_h: 0.147, loss_o: 0.261
Episode: 1676, episode steps: 13, avg. reward: 21.83, loss_i: 5.152, loss_h: 0.125, loss_o: 0.28
Episode: 1677, ep

Episode: 1751, episode steps: 11, avg. reward: 21.75, loss_i: 5.708, loss_h: 19.531, loss_o: 0.258
Episode: 1752, episode steps: 13, avg. reward: 21.74, loss_i: 5.269, loss_h: 19.124, loss_o: 0.314
Episode: 1753, episode steps: 23, avg. reward: 21.74, loss_i: 5.106, loss_h: 0.152, loss_o: 0.285
Episode: 1754, episode steps: 25, avg. reward: 21.75, loss_i: 5.136, loss_h: 0.143, loss_o: 0.256
Episode: 1755, episode steps: 28, avg. reward: 21.75, loss_i: 5.263, loss_h: 47.531, loss_o: 0.292
Episode: 1756, episode steps: 22, avg. reward: 21.76, loss_i: 5.295, loss_h: 38.847, loss_o: 0.242
Episode: 1757, episode steps: 13, avg. reward: 21.74, loss_i: 5.111, loss_h: 20.374, loss_o: 0.253
Episode: 1758, episode steps: 21, avg. reward: 21.74, loss_i: 5.261, loss_h: 32.953, loss_o: 0.272
Episode: 1759, episode steps: 11, avg. reward: 21.73, loss_i: 5.84, loss_h: 0.152, loss_o: 0.25
Episode: 1760, episode steps: 14, avg. reward: 21.72, loss_i: 4.99, loss_h: 0.147, loss_o: 0.287
Episode: 1761, ep

Episode: 1835, episode steps: 25, avg. reward: 21.9, loss_i: 5.871, loss_h: 41.089, loss_o: 0.253
Episode: 1836, episode steps: 12, avg. reward: 21.89, loss_i: 5.426, loss_h: 0.161, loss_o: 0.263
Episode: 1837, episode steps: 23, avg. reward: 21.89, loss_i: 5.117, loss_h: 38.545, loss_o: 0.266
Episode: 1838, episode steps: 13, avg. reward: 21.88, loss_i: 4.928, loss_h: 0.136, loss_o: 0.307
Episode: 1839, episode steps: 22, avg. reward: 21.88, loss_i: 5.13, loss_h: 0.165, loss_o: 0.303
Episode: 1840, episode steps: 31, avg. reward: 21.89, loss_i: 5.306, loss_h: 53.161, loss_o: 0.313
Episode: 1841, episode steps: 27, avg. reward: 21.9, loss_i: 5.367, loss_h: 0.171, loss_o: 0.287
Episode: 1842, episode steps: 11, avg. reward: 21.88, loss_i: 4.923, loss_h: 0.155, loss_o: 0.325
Episode: 1843, episode steps: 10, avg. reward: 21.87, loss_i: 5.873, loss_h: 18.623, loss_o: 0.35
Episode: 1844, episode steps: 11, avg. reward: 21.86, loss_i: 5.136, loss_h: 0.16, loss_o: 0.29
Episode: 1845, episode

Episode: 1919, episode steps: 48, avg. reward: 22.01, loss_i: 5.085, loss_h: 0.155, loss_o: 0.279
Episode: 1920, episode steps: 68, avg. reward: 22.06, loss_i: 5.158, loss_h: 118.93, loss_o: 0.289
Episode: 1921, episode steps: 10, avg. reward: 22.05, loss_i: 5.562, loss_h: 16.678, loss_o: 0.248
Episode: 1922, episode steps: 27, avg. reward: 22.05, loss_i: 4.039, loss_h: 47.593, loss_o: 0.281
Episode: 1923, episode steps: 30, avg. reward: 22.06, loss_i: 5.199, loss_h: 51.592, loss_o: 0.28
Episode: 1924, episode steps: 11, avg. reward: 22.05, loss_i: 4.881, loss_h: 21.317, loss_o: 0.286
Episode: 1925, episode steps: 11, avg. reward: 22.04, loss_i: 5.017, loss_h: 0.129, loss_o: 0.351
Episode: 1926, episode steps: 27, avg. reward: 22.04, loss_i: 5.509, loss_h: 0.167, loss_o: 0.297
Episode: 1927, episode steps: 9, avg. reward: 22.03, loss_i: 4.716, loss_h: 19.805, loss_o: 0.291
Episode: 1928, episode steps: 58, avg. reward: 22.07, loss_i: 5.486, loss_h: 0.155, loss_o: 0.292
Episode: 1929, e

  0%|          | 0/1000 [00:00<?, ?it/s]

--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 5.471406618556515, 'critic_2_loss': 5.459994586692615, 'policy_loss': -13.887047762220556, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'cnt': 1.0, 'critic_1_loss': 75.94988647196442, 'critic_2_loss': 75.80940438108519, 'policy_loss': -641.5085830688477, 'ent_loss': 0.0, 'alpha': 0.0}
o: {'critic_1_loss': 0.2999023157383569, 'critic_2_loss': 0.3001408746720038, 'policy_loss': -2.220668202096766, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [0.43, 0.41, 0.41, 0.41, 0.41, 0.41, 0.41, 0.41]
o: [1.0]
UPDATES
i: [0, 0, 0, 

100%|██████████| 1000/1000 [01:53<00:00,  8.78it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.44, Max. Reward: 15.0
--------------------------------------------------------------------------------
Episode: 2001, episode steps: 17, avg. reward: 17.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2002, episode steps: 15, avg. reward: 16.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2003, episode steps: 15, avg. reward: 15.67, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2004, episode steps: 27, avg. reward: 18.5, loss_i: 1566.831, loss_h: nan, loss_o: 7.539
Episode: 2005, episode steps: 26, avg. reward: 20.0, loss_i: 16.017, loss_h: nan, loss_o: 0.424
Episode: 2006, episode steps: 21, avg. reward: 20.17, loss_i: 17.705, loss_h: nan, loss_o: 0.379
Episode: 2007, episode steps: 12, avg. reward: 19.0, loss_i: 21.564, loss_h: nan, loss_o: 0.361
Episode: 2008, episode steps: 18, avg. reward: 18.88, loss_i: 17.516, loss_h: nan, loss_o: 0.44
Episode: 2009, episode 

Episode: 2082, episode steps: 23, avg. reward: 21.62, loss_i: 14.546, loss_h: 53.969, loss_o: 0.392
Episode: 2083, episode steps: 22, avg. reward: 21.63, loss_i: 14.59, loss_h: 0.236, loss_o: 0.397
Episode: 2084, episode steps: 11, avg. reward: 21.5, loss_i: 13.23, loss_h: 23.531, loss_o: 0.505
Episode: 2085, episode steps: 16, avg. reward: 21.44, loss_i: 15.438, loss_h: 0.242, loss_o: 0.448
Episode: 2086, episode steps: 40, avg. reward: 21.65, loss_i: 13.679, loss_h: 113.158, loss_o: 0.415
Episode: 2087, episode steps: 16, avg. reward: 21.59, loss_i: 13.926, loss_h: 0.247, loss_o: 0.449
Episode: 2088, episode steps: 28, avg. reward: 21.66, loss_i: 12.598, loss_h: 74.778, loss_o: 0.447
Episode: 2089, episode steps: 32, avg. reward: 21.78, loss_i: 13.799, loss_h: 0.241, loss_o: 0.344
Episode: 2090, episode steps: 51, avg. reward: 22.1, loss_i: 15.058, loss_h: 133.356, loss_o: 0.398
Episode: 2091, episode steps: 21, avg. reward: 22.09, loss_i: 12.206, loss_h: 49.917, loss_o: 0.467
Episod

Episode: 2165, episode steps: 20, avg. reward: 21.94, loss_i: 13.554, loss_h: 0.222, loss_o: 0.387
Episode: 2166, episode steps: 14, avg. reward: 21.89, loss_i: 14.051, loss_h: 35.827, loss_o: 0.438
Episode: 2167, episode steps: 10, avg. reward: 21.82, loss_i: 13.636, loss_h: 0.219, loss_o: 0.394
Episode: 2168, episode steps: 12, avg. reward: 21.76, loss_i: 15.855, loss_h: 25.797, loss_o: 0.539
Episode: 2169, episode steps: 15, avg. reward: 21.72, loss_i: 16.076, loss_h: 0.225, loss_o: 0.401
Episode: 2170, episode steps: 16, avg. reward: 21.69, loss_i: 13.493, loss_h: 0.248, loss_o: 0.39
Episode: 2171, episode steps: 26, avg. reward: 21.71, loss_i: 15.763, loss_h: 0.206, loss_o: 0.354
Episode: 2172, episode steps: 28, avg. reward: 21.75, loss_i: 14.294, loss_h: 76.651, loss_o: 0.377
Episode: 2173, episode steps: 11, avg. reward: 21.69, loss_i: 13.391, loss_h: 26.009, loss_o: 0.502
Episode: 2174, episode steps: 30, avg. reward: 21.74, loss_i: 14.248, loss_h: 0.213, loss_o: 0.365
Episode

Episode: 2248, episode steps: 16, avg. reward: 21.48, loss_i: 15.274, loss_h: 45.806, loss_o: 0.447
Episode: 2249, episode steps: 11, avg. reward: 21.44, loss_i: 14.648, loss_h: 29.487, loss_o: 0.345
Episode: 2250, episode steps: 18, avg. reward: 21.42, loss_i: 14.394, loss_h: 0.22, loss_o: 0.421
Episode: 2251, episode steps: 12, avg. reward: 21.39, loss_i: 16.183, loss_h: 33.054, loss_o: 0.481
Episode: 2252, episode steps: 43, avg. reward: 21.47, loss_i: 15.028, loss_h: 112.996, loss_o: 0.427
Episode: 2253, episode steps: 20, avg. reward: 21.47, loss_i: 14.073, loss_h: 55.59, loss_o: 0.397
Episode: 2254, episode steps: 21, avg. reward: 21.46, loss_i: 13.32, loss_h: 46.023, loss_o: 0.36
Episode: 2255, episode steps: 45, avg. reward: 21.56, loss_i: 15.998, loss_h: 111.073, loss_o: 0.44
Episode: 2256, episode steps: 15, avg. reward: 21.53, loss_i: 14.239, loss_h: 0.208, loss_o: 0.442
Episode: 2257, episode steps: 25, avg. reward: 21.54, loss_i: 13.312, loss_h: 69.208, loss_o: 0.467
Episo

Episode: 2331, episode steps: 14, avg. reward: 21.66, loss_i: 16.559, loss_h: 0.248, loss_o: 0.379
Episode: 2332, episode steps: 21, avg. reward: 21.65, loss_i: 13.433, loss_h: 61.054, loss_o: 0.402
Episode: 2333, episode steps: 20, avg. reward: 21.65, loss_i: 13.554, loss_h: 53.945, loss_o: 0.499
Episode: 2334, episode steps: 18, avg. reward: 21.64, loss_i: 13.422, loss_h: 48.495, loss_o: 0.397
Episode: 2335, episode steps: 27, avg. reward: 21.65, loss_i: 13.89, loss_h: 73.72, loss_o: 0.359
Episode: 2336, episode steps: 46, avg. reward: 21.73, loss_i: 13.219, loss_h: 0.232, loss_o: 0.4
Episode: 2337, episode steps: 11, avg. reward: 21.69, loss_i: 12.842, loss_h: 0.222, loss_o: 0.444
Episode: 2338, episode steps: 53, avg. reward: 21.79, loss_i: 13.395, loss_h: 131.162, loss_o: 0.423
Episode: 2339, episode steps: 37, avg. reward: 21.83, loss_i: 14.119, loss_h: 101.722, loss_o: 0.374
Episode: 2340, episode steps: 68, avg. reward: 21.97, loss_i: 14.947, loss_h: 0.25, loss_o: 0.419
Episode

Episode: 2412, episode steps: 53, avg. reward: 22.08, loss_i: 14.184, loss_h: 12.882, loss_o: 0.64
Episode: 2413, episode steps: 26, avg. reward: 22.09, loss_i: 13.293, loss_h: 24.31, loss_o: 0.479
Episode: 2414, episode steps: 10, avg. reward: 22.06, loss_i: 12.988, loss_h: 13.717, loss_o: 0.542
Episode: 2415, episode steps: 29, avg. reward: 22.07, loss_i: 13.83, loss_h: 38.707, loss_o: 0.567
Episode: 2416, episode steps: 19, avg. reward: 22.07, loss_i: 14.275, loss_h: 27.002, loss_o: 0.53
Episode: 2417, episode steps: 12, avg. reward: 22.04, loss_i: 15.354, loss_h: 15.831, loss_o: 0.473
Episode: 2418, episode steps: 32, avg. reward: 22.07, loss_i: 13.116, loss_h: 62.331, loss_o: 0.599
Episode: 2419, episode steps: 14, avg. reward: 22.05, loss_i: 13.281, loss_h: 33.61, loss_o: 0.605
Episode: 2420, episode steps: 17, avg. reward: 22.04, loss_i: 14.961, loss_h: 43.634, loss_o: 0.618
Episode: 2421, episode steps: 20, avg. reward: 22.03, loss_i: 14.189, loss_h: 53.256, loss_o: 0.607
Episo

Episode: 2495, episode steps: 12, avg. reward: 22.26, loss_i: 15.818, loss_h: 0.314, loss_o: 0.515
Episode: 2496, episode steps: 12, avg. reward: 22.24, loss_i: 14.764, loss_h: 38.806, loss_o: 0.457
Episode: 2497, episode steps: 19, avg. reward: 22.23, loss_i: 15.942, loss_h: 69.5, loss_o: 0.54
Episode: 2498, episode steps: 14, avg. reward: 22.21, loss_i: 15.077, loss_h: 55.626, loss_o: 0.618
Episode: 2499, episode steps: 17, avg. reward: 22.2, loss_i: 15.552, loss_h: 0.286, loss_o: 0.513
Episode: 2500, episode steps: 20, avg. reward: 22.2, loss_i: 13.855, loss_h: 0.298, loss_o: 0.513
Episode: 2501, episode steps: 11, avg. reward: 22.18, loss_i: 13.442, loss_h: 34.951, loss_o: 0.669
Episode: 2502, episode steps: 28, avg. reward: 22.19, loss_i: 14.883, loss_h: 0.317, loss_o: 0.524
Episode: 2503, episode steps: 17, avg. reward: 22.18, loss_i: 14.392, loss_h: 67.336, loss_o: 0.595
Episode: 2504, episode steps: 21, avg. reward: 22.18, loss_i: 15.621, loss_h: 73.32, loss_o: 0.459
Episode: 2

Episode: 2578, episode steps: 15, avg. reward: 21.87, loss_i: 16.056, loss_h: 37.511, loss_o: 0.507
Episode: 2579, episode steps: 14, avg. reward: 21.85, loss_i: 14.131, loss_h: 45.019, loss_o: 0.598
Episode: 2580, episode steps: 10, avg. reward: 21.83, loss_i: 15.526, loss_h: 36.243, loss_o: 0.433
Episode: 2581, episode steps: 19, avg. reward: 21.83, loss_i: 15.516, loss_h: 62.6, loss_o: 0.411
Episode: 2582, episode steps: 18, avg. reward: 21.82, loss_i: 14.219, loss_h: 0.273, loss_o: 0.473
Episode: 2583, episode steps: 22, avg. reward: 21.82, loss_i: 16.427, loss_h: 0.285, loss_o: 0.546
Episode: 2584, episode steps: 45, avg. reward: 21.86, loss_i: 14.684, loss_h: 0.269, loss_o: 0.523
Episode: 2585, episode steps: 18, avg. reward: 21.85, loss_i: 14.201, loss_h: 57.38, loss_o: 0.643
Episode: 2586, episode steps: 26, avg. reward: 21.86, loss_i: 15.523, loss_h: 0.26, loss_o: 0.547
Episode: 2587, episode steps: 17, avg. reward: 21.85, loss_i: 14.526, loss_h: 0.31, loss_o: 0.626
Episode: 2

Episode: 2661, episode steps: 10, avg. reward: 21.64, loss_i: 15.535, loss_h: 38.113, loss_o: 0.625
Episode: 2662, episode steps: 17, avg. reward: 21.64, loss_i: 16.475, loss_h: 0.29, loss_o: 0.541
Episode: 2663, episode steps: 22, avg. reward: 21.64, loss_i: 16.336, loss_h: 71.964, loss_o: 0.581
Episode: 2664, episode steps: 27, avg. reward: 21.65, loss_i: 15.475, loss_h: 0.32, loss_o: 0.54
Episode: 2665, episode steps: 11, avg. reward: 21.63, loss_i: 14.42, loss_h: 42.017, loss_o: 0.451
Episode: 2666, episode steps: 20, avg. reward: 21.63, loss_i: 15.579, loss_h: 68.264, loss_o: 0.682
Episode: 2667, episode steps: 31, avg. reward: 21.64, loss_i: 14.458, loss_h: 120.259, loss_o: 0.581
Episode: 2668, episode steps: 27, avg. reward: 21.65, loss_i: 14.942, loss_h: 88.844, loss_o: 0.661
Episode: 2669, episode steps: 10, avg. reward: 21.63, loss_i: 16.602, loss_h: 0.34, loss_o: 0.542
Episode: 2670, episode steps: 39, avg. reward: 21.66, loss_i: 15.435, loss_h: 136.886, loss_o: 0.583
Episod

Episode: 2744, episode steps: 18, avg. reward: 21.81, loss_i: 13.6, loss_h: 61.57, loss_o: 0.541
Episode: 2745, episode steps: 14, avg. reward: 21.8, loss_i: 14.243, loss_h: 44.214, loss_o: 0.625
Episode: 2746, episode steps: 14, avg. reward: 21.79, loss_i: 16.967, loss_h: 0.281, loss_o: 0.619
Episode: 2747, episode steps: 14, avg. reward: 21.78, loss_i: 15.159, loss_h: 0.279, loss_o: 0.69
Episode: 2748, episode steps: 11, avg. reward: 21.77, loss_i: 14.208, loss_h: 34.221, loss_o: 0.57
Episode: 2749, episode steps: 14, avg. reward: 21.76, loss_i: 16.148, loss_h: 51.592, loss_o: 0.515
Episode: 2750, episode steps: 15, avg. reward: 21.75, loss_i: 15.393, loss_h: 0.309, loss_o: 0.779
Episode: 2751, episode steps: 16, avg. reward: 21.74, loss_i: 15.901, loss_h: 66.849, loss_o: 0.617
Episode: 2752, episode steps: 29, avg. reward: 21.75, loss_i: 15.179, loss_h: 105.875, loss_o: 0.621
Episode: 2753, episode steps: 20, avg. reward: 21.75, loss_i: 15.116, loss_h: 69.879, loss_o: 0.685
Episode:

Episode: 2825, episode steps: 19, avg. reward: 21.74, loss_i: 12.718, loss_h: 54.411, loss_o: 0.559
Episode: 2826, episode steps: 14, avg. reward: 21.73, loss_i: 14.461, loss_h: 49.623, loss_o: 0.686
Episode: 2827, episode steps: 55, avg. reward: 21.77, loss_i: 13.595, loss_h: 154.621, loss_o: 0.699
Episode: 2828, episode steps: 14, avg. reward: 21.76, loss_i: 13.335, loss_h: 39.006, loss_o: 0.623
Episode: 2829, episode steps: 20, avg. reward: 21.76, loss_i: 12.552, loss_h: 56.885, loss_o: 0.583
Episode: 2830, episode steps: 22, avg. reward: 21.76, loss_i: 13.07, loss_h: 58.317, loss_o: 0.664
Episode: 2831, episode steps: 9, avg. reward: 21.75, loss_i: 12.498, loss_h: 25.524, loss_o: 0.598
Episode: 2832, episode steps: 24, avg. reward: 21.75, loss_i: 13.301, loss_h: 79.928, loss_o: 0.534
Episode: 2833, episode steps: 32, avg. reward: 21.76, loss_i: 12.937, loss_h: 96.521, loss_o: 0.638
Episode: 2834, episode steps: 16, avg. reward: 21.75, loss_i: 15.79, loss_h: 48.835, loss_o: 0.528
Ep

Episode: 2908, episode steps: 12, avg. reward: 21.93, loss_i: 11.293, loss_h: 45.296, loss_o: 0.666
Episode: 2909, episode steps: 24, avg. reward: 21.93, loss_i: 12.47, loss_h: 95.902, loss_o: 0.577
Episode: 2910, episode steps: 27, avg. reward: 21.94, loss_i: 13.136, loss_h: 100.251, loss_o: 0.672
Episode: 2911, episode steps: 23, avg. reward: 21.94, loss_i: 13.683, loss_h: 102.056, loss_o: 0.828
Episode: 2912, episode steps: 29, avg. reward: 21.95, loss_i: 13.203, loss_h: 108.448, loss_o: 0.716
Episode: 2913, episode steps: 10, avg. reward: 21.94, loss_i: 11.793, loss_h: 39.363, loss_o: 0.644
Episode: 2914, episode steps: 20, avg. reward: 21.93, loss_i: 12.514, loss_h: 87.247, loss_o: 0.784
Episode: 2915, episode steps: 11, avg. reward: 21.92, loss_i: 12.122, loss_h: 48.491, loss_o: 0.718
Episode: 2916, episode steps: 18, avg. reward: 21.92, loss_i: 13.416, loss_h: 83.214, loss_o: 0.555
Episode: 2917, episode steps: 23, avg. reward: 21.92, loss_i: 11.234, loss_h: 95.093, loss_o: 0.60

Episode: 2991, episode steps: 11, avg. reward: 21.86, loss_i: 13.831, loss_h: 43.51, loss_o: 0.628
Episode: 2992, episode steps: 13, avg. reward: 21.86, loss_i: 13.857, loss_h: 52.03, loss_o: 0.818
Episode: 2993, episode steps: 30, avg. reward: 21.86, loss_i: 14.061, loss_h: 130.287, loss_o: 0.726
Episode: 2994, episode steps: 12, avg. reward: 21.85, loss_i: 13.77, loss_h: 49.077, loss_o: 0.499
Episode: 2995, episode steps: 18, avg. reward: 21.85, loss_i: 13.855, loss_h: 78.876, loss_o: 0.679
Episode: 2996, episode steps: 13, avg. reward: 21.84, loss_i: 13.77, loss_h: 57.928, loss_o: 0.785
Episode: 2997, episode steps: 14, avg. reward: 21.83, loss_i: 14.026, loss_h: 55.555, loss_o: 0.73
Episode: 2998, episode steps: 19, avg. reward: 21.83, loss_i: 14.112, loss_h: 83.439, loss_o: 0.57
Episode: 2999, episode steps: 12, avg. reward: 21.82, loss_i: 15.364, loss_h: 52.353, loss_o: 0.707


  0%|          | 1/1000 [00:00<01:52,  8.86it/s]

Episode: 3000, episode steps: 14, avg. reward: 21.81, loss_i: 13.751, loss_h: 62.327, loss_o: 0.713
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 13.740653677976557, 'critic_2_loss': 13.76066854223609, 'policy_loss': -13.801810277359825, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'cnt': 1.0, 'critic_1_loss': 62.38886168971658, 'critic_2_loss': 62.266051050275564, 'policy_loss': -290.9722948074341, 'ent_loss': 0.0, 'alpha': 0.0}
o: {'critic_1_loss': 0.713055065699986, 'critic_2_loss': 0.7137662547507456, 'policy_loss': -3.2964548553739275, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [0.42, 0.41, 0.42, 0.41, 0.41, 0.41, 0.42, 0.41]
o: [1.0]
UPDATES
i: [4418, 4418, 4418, 4418]
h: [0, 1658, 1776, 825, 1705, 1797, 1713, 1695]
o: [4418]
MEMORY
i: [4483, 4483, 4483, 4483]
h: [25, 497, 1151, 

100%|██████████| 1000/1000 [01:50<00:00,  9.03it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.37, Max. Reward: 12.0
--------------------------------------------------------------------------------
Episode: 3001, episode steps: 21, avg. reward: 21.0, loss_i: 14.074, loss_h: 88.589, loss_o: 0.708
Episode: 3002, episode steps: 31, avg. reward: 26.0, loss_i: 14.163, loss_h: 139.467, loss_o: 0.825
Episode: 3003, episode steps: 16, avg. reward: 22.67, loss_i: 16.265, loss_h: 58.546, loss_o: 0.615
Episode: 3004, episode steps: 24, avg. reward: 23.0, loss_i: 14.891, loss_h: 95.192, loss_o: 0.708
Episode: 3005, episode steps: 23, avg. reward: 23.0, loss_i: 13.614, loss_h: 96.599, loss_o: 0.765
Episode: 3006, episode steps: 15, avg. reward: 21.67, loss_i: 15.431, loss_h: 73.731, loss_o: 0.651
Episode: 3007, episode steps: 26, avg. reward: 22.29, loss_i: 15.898, loss_h: 119.371, loss_o: 0.782
Episode: 3008, episode steps: 13, avg. reward: 21.12, loss_i: 14.01, loss_h: 59.96

Episode: 3081, episode steps: 13, avg. reward: 22.27, loss_i: 13.884, loss_h: 51.378, loss_o: 0.767
Episode: 3082, episode steps: 21, avg. reward: 22.26, loss_i: 12.689, loss_h: 79.605, loss_o: 0.757
Episode: 3083, episode steps: 33, avg. reward: 22.39, loss_i: 14.64, loss_h: 125.48, loss_o: 0.71
Episode: 3084, episode steps: 15, avg. reward: 22.3, loss_i: 13.765, loss_h: 59.011, loss_o: 0.491
Episode: 3085, episode steps: 9, avg. reward: 22.14, loss_i: 16.995, loss_h: 25.203, loss_o: 0.684
Episode: 3086, episode steps: 15, avg. reward: 22.06, loss_i: 13.171, loss_h: 62.359, loss_o: 0.821
Episode: 3087, episode steps: 17, avg. reward: 22.0, loss_i: 15.806, loss_h: 70.192, loss_o: 0.64
Episode: 3088, episode steps: 21, avg. reward: 21.99, loss_i: 14.586, loss_h: 77.926, loss_o: 0.658
Episode: 3089, episode steps: 15, avg. reward: 21.91, loss_i: 15.512, loss_h: 53.638, loss_o: 0.731
Episode: 3090, episode steps: 16, avg. reward: 21.84, loss_i: 15.934, loss_h: 59.136, loss_o: 0.828
Episod

Episode: 3164, episode steps: 13, avg. reward: 21.49, loss_i: 14.058, loss_h: 59.508, loss_o: 0.855
Episode: 3165, episode steps: 47, avg. reward: 21.65, loss_i: 16.136, loss_h: 186.17, loss_o: 0.752
Episode: 3166, episode steps: 11, avg. reward: 21.58, loss_i: 15.412, loss_h: 44.387, loss_o: 0.907
Episode: 3167, episode steps: 24, avg. reward: 21.6, loss_i: 15.937, loss_h: 94.473, loss_o: 0.726
Episode: 3168, episode steps: 32, avg. reward: 21.66, loss_i: 12.669, loss_h: 135.89, loss_o: 0.7
Episode: 3169, episode steps: 35, avg. reward: 21.74, loss_i: 15.61, loss_h: 136.958, loss_o: 0.729
Episode: 3170, episode steps: 22, avg. reward: 21.74, loss_i: 16.121, loss_h: 89.321, loss_o: 0.642
Episode: 3171, episode steps: 41, avg. reward: 21.85, loss_i: 15.884, loss_h: 178.635, loss_o: 0.791
Episode: 3172, episode steps: 16, avg. reward: 21.82, loss_i: 16.51, loss_h: 58.915, loss_o: 0.706
Episode: 3173, episode steps: 13, avg. reward: 21.77, loss_i: 17.167, loss_h: 54.696, loss_o: 0.543
Epi

Episode: 3245, episode steps: 28, avg. reward: 22.3, loss_i: 13.117, loss_h: 110.809, loss_o: 0.748
Episode: 3246, episode steps: 17, avg. reward: 22.28, loss_i: 14.73, loss_h: 75.628, loss_o: 1.076
Episode: 3247, episode steps: 15, avg. reward: 22.25, loss_i: 12.757, loss_h: 58.45, loss_o: 0.691
Episode: 3248, episode steps: 15, avg. reward: 22.22, loss_i: 13.864, loss_h: 53.471, loss_o: 0.744
Episode: 3249, episode steps: 18, avg. reward: 22.2, loss_i: 12.101, loss_h: 76.124, loss_o: 0.843
Episode: 3250, episode steps: 11, avg. reward: 22.16, loss_i: 12.291, loss_h: 36.687, loss_o: 1.025
Episode: 3251, episode steps: 16, avg. reward: 22.13, loss_i: 16.519, loss_h: 65.017, loss_o: 0.756
Episode: 3252, episode steps: 10, avg. reward: 22.08, loss_i: 14.154, loss_h: 34.606, loss_o: 0.64
Episode: 3253, episode steps: 26, avg. reward: 22.1, loss_i: 12.935, loss_h: 109.412, loss_o: 0.95
Episode: 3254, episode steps: 16, avg. reward: 22.07, loss_i: 13.261, loss_h: 71.594, loss_o: 0.697
Episo

Episode: 3328, episode steps: 11, avg. reward: 21.74, loss_i: 15.146, loss_h: 59.625, loss_o: 0.685
Episode: 3329, episode steps: 14, avg. reward: 21.72, loss_i: 12.204, loss_h: 73.976, loss_o: 0.835
Episode: 3330, episode steps: 52, avg. reward: 21.81, loss_i: 16.668, loss_h: 280.732, loss_o: 0.893
Episode: 3331, episode steps: 10, avg. reward: 21.77, loss_i: 12.538, loss_h: 55.189, loss_o: 0.732
Episode: 3332, episode steps: 10, avg. reward: 21.74, loss_i: 17.032, loss_h: 55.794, loss_o: 1.037
Episode: 3333, episode steps: 18, avg. reward: 21.73, loss_i: 16.008, loss_h: 100.588, loss_o: 0.816
Episode: 3334, episode steps: 17, avg. reward: 21.71, loss_i: 15.627, loss_h: 101.317, loss_o: 0.942
Episode: 3335, episode steps: 15, avg. reward: 21.69, loss_i: 17.217, loss_h: 81.435, loss_o: 1.109
Episode: 3336, episode steps: 18, avg. reward: 21.68, loss_i: 16.332, loss_h: 94.562, loss_o: 1.04
Episode: 3337, episode steps: 51, avg. reward: 21.77, loss_i: 15.693, loss_h: 284.632, loss_o: 0.7

Episode: 3410, episode steps: 14, avg. reward: 22.06, loss_i: 13.612, loss_h: 66.192, loss_o: 0.904
Episode: 3411, episode steps: 48, avg. reward: 22.12, loss_i: 13.246, loss_h: 242.899, loss_o: 0.984
Episode: 3412, episode steps: 24, avg. reward: 22.12, loss_i: 13.312, loss_h: 110.827, loss_o: 0.842
Episode: 3413, episode steps: 11, avg. reward: 22.1, loss_i: 13.81, loss_h: 61.562, loss_o: 0.7
Episode: 3414, episode steps: 32, avg. reward: 22.12, loss_i: 12.65, loss_h: 171.075, loss_o: 1.002
Episode: 3415, episode steps: 22, avg. reward: 22.12, loss_i: 14.474, loss_h: 138.998, loss_o: 0.891
Episode: 3416, episode steps: 14, avg. reward: 22.1, loss_i: 12.915, loss_h: 65.245, loss_o: 1.012
Episode: 3417, episode steps: 12, avg. reward: 22.08, loss_i: 14.503, loss_h: 50.296, loss_o: 0.936
Episode: 3418, episode steps: 13, avg. reward: 22.06, loss_i: 13.741, loss_h: 58.021, loss_o: 0.964
Episode: 3419, episode steps: 17, avg. reward: 22.04, loss_i: 15.409, loss_h: 82.331, loss_o: 0.875
Ep

Episode: 3492, episode steps: 36, avg. reward: 21.74, loss_i: 15.153, loss_h: 172.288, loss_o: 0.991
Episode: 3493, episode steps: 12, avg. reward: 21.72, loss_i: 16.399, loss_h: 48.136, loss_o: 0.923
Episode: 3494, episode steps: 31, avg. reward: 21.74, loss_i: 13.677, loss_h: 156.294, loss_o: 0.889
Episode: 3495, episode steps: 10, avg. reward: 21.72, loss_i: 13.331, loss_h: 51.051, loss_o: 0.855
Episode: 3496, episode steps: 24, avg. reward: 21.72, loss_i: 17.161, loss_h: 103.378, loss_o: 0.882
Episode: 3497, episode steps: 9, avg. reward: 21.7, loss_i: 13.855, loss_h: 52.994, loss_o: 1.113
Episode: 3498, episode steps: 12, avg. reward: 21.68, loss_i: 17.92, loss_h: 69.996, loss_o: 1.005
Episode: 3499, episode steps: 20, avg. reward: 21.67, loss_i: 15.626, loss_h: 99.942, loss_o: 0.843
Episode: 3500, episode steps: 9, avg. reward: 21.65, loss_i: 11.698, loss_h: 38.533, loss_o: 0.991
Episode: 3501, episode steps: 27, avg. reward: 21.66, loss_i: 14.908, loss_h: 148.271, loss_o: 0.705


Episode: 3574, episode steps: 14, avg. reward: 21.93, loss_i: 13.115, loss_h: 71.367, loss_o: 0.824
Episode: 3575, episode steps: 23, avg. reward: 21.93, loss_i: 14.4, loss_h: 101.6, loss_o: 0.909
Episode: 3576, episode steps: 17, avg. reward: 21.92, loss_i: 15.704, loss_h: 83.217, loss_o: 0.985
Episode: 3577, episode steps: 14, avg. reward: 21.91, loss_i: 15.844, loss_h: 53.013, loss_o: 0.827
Episode: 3578, episode steps: 16, avg. reward: 21.9, loss_i: 16.323, loss_h: 84.514, loss_o: 1.07
Episode: 3579, episode steps: 10, avg. reward: 21.88, loss_i: 13.698, loss_h: 43.291, loss_o: 0.833
Episode: 3580, episode steps: 72, avg. reward: 21.97, loss_i: 14.885, loss_h: 392.684, loss_o: 0.937
Episode: 3581, episode steps: 17, avg. reward: 21.96, loss_i: 15.701, loss_h: 78.357, loss_o: 1.033
Episode: 3582, episode steps: 11, avg. reward: 21.94, loss_i: 15.713, loss_h: 56.495, loss_o: 1.331
Episode: 3583, episode steps: 27, avg. reward: 21.95, loss_i: 14.975, loss_h: 158.314, loss_o: 0.961
Epi

Episode: 3655, episode steps: 15, avg. reward: 21.98, loss_i: 14.384, loss_h: 90.708, loss_o: 1.084
Episode: 3656, episode steps: 18, avg. reward: 21.97, loss_i: 15.921, loss_h: 86.262, loss_o: 1.147
Episode: 3657, episode steps: 19, avg. reward: 21.96, loss_i: 14.934, loss_h: 85.951, loss_o: 0.924
Episode: 3658, episode steps: 55, avg. reward: 22.02, loss_i: 14.278, loss_h: 271.992, loss_o: 1.114
Episode: 3659, episode steps: 14, avg. reward: 22.0, loss_i: 15.838, loss_h: 57.439, loss_o: 1.382
Episode: 3660, episode steps: 11, avg. reward: 21.99, loss_i: 13.509, loss_h: 39.085, loss_o: 1.083
Episode: 3661, episode steps: 63, avg. reward: 22.05, loss_i: 14.428, loss_h: 315.621, loss_o: 1.199
Episode: 3662, episode steps: 51, avg. reward: 22.09, loss_i: 15.167, loss_h: 226.222, loss_o: 1.1
Episode: 3663, episode steps: 19, avg. reward: 22.09, loss_i: 14.092, loss_h: 78.234, loss_o: 0.983
Episode: 3664, episode steps: 27, avg. reward: 22.09, loss_i: 14.556, loss_h: 114.572, loss_o: 1.048

Episode: 3738, episode steps: 14, avg. reward: 21.76, loss_i: 16.165, loss_h: 76.313, loss_o: 1.103
Episode: 3739, episode steps: 27, avg. reward: 21.77, loss_i: 16.495, loss_h: 143.992, loss_o: 1.227
Episode: 3740, episode steps: 29, avg. reward: 21.78, loss_i: 15.791, loss_h: 163.793, loss_o: 1.07
Episode: 3741, episode steps: 27, avg. reward: 21.79, loss_i: 17.986, loss_h: 179.155, loss_o: 1.166
Episode: 3742, episode steps: 14, avg. reward: 21.78, loss_i: 17.269, loss_h: 83.603, loss_o: 1.056
Episode: 3743, episode steps: 16, avg. reward: 21.77, loss_i: 14.676, loss_h: 108.794, loss_o: 1.352
Episode: 3744, episode steps: 43, avg. reward: 21.8, loss_i: 15.632, loss_h: 256.863, loss_o: 1.241
Episode: 3745, episode steps: 18, avg. reward: 21.79, loss_i: 17.681, loss_h: 93.171, loss_o: 1.428
Episode: 3746, episode steps: 70, avg. reward: 21.86, loss_i: 16.104, loss_h: 391.955, loss_o: 1.195
Episode: 3747, episode steps: 12, avg. reward: 21.84, loss_i: 14.03, loss_h: 63.191, loss_o: 1.0

Episode: 3820, episode steps: 57, avg. reward: 21.93, loss_i: 15.509, loss_h: 309.106, loss_o: 1.277
Episode: 3821, episode steps: 11, avg. reward: 21.92, loss_i: 19.326, loss_h: 69.387, loss_o: 1.076
Episode: 3822, episode steps: 22, avg. reward: 21.92, loss_i: 16.791, loss_h: 125.343, loss_o: 1.009
Episode: 3823, episode steps: 12, avg. reward: 21.91, loss_i: 17.691, loss_h: 67.655, loss_o: 1.119
Episode: 3824, episode steps: 21, avg. reward: 21.91, loss_i: 16.887, loss_h: 125.297, loss_o: 1.037
Episode: 3825, episode steps: 28, avg. reward: 21.92, loss_i: 16.271, loss_h: 165.87, loss_o: 1.22
Episode: 3826, episode steps: 33, avg. reward: 21.93, loss_i: 15.739, loss_h: 176.138, loss_o: 1.148
Episode: 3827, episode steps: 28, avg. reward: 21.94, loss_i: 17.16, loss_h: 142.05, loss_o: 1.103
Episode: 3828, episode steps: 39, avg. reward: 21.96, loss_i: 16.707, loss_h: 219.019, loss_o: 1.251
Episode: 3829, episode steps: 22, avg. reward: 21.96, loss_i: 16.396, loss_h: 142.082, loss_o: 1.

Episode: 3902, episode steps: 12, avg. reward: 21.98, loss_i: 14.202, loss_h: 82.656, loss_o: 1.452
Episode: 3903, episode steps: 32, avg. reward: 21.99, loss_i: 15.788, loss_h: 218.808, loss_o: 0.96
Episode: 3904, episode steps: 37, avg. reward: 22.01, loss_i: 16.442, loss_h: 224.317, loss_o: 1.239
Episode: 3905, episode steps: 33, avg. reward: 22.02, loss_i: 16.826, loss_h: 207.314, loss_o: 1.296
Episode: 3906, episode steps: 25, avg. reward: 22.03, loss_i: 15.661, loss_h: 158.909, loss_o: 1.275
Episode: 3907, episode steps: 16, avg. reward: 22.02, loss_i: 16.88, loss_h: 86.955, loss_o: 1.138
Episode: 3908, episode steps: 20, avg. reward: 22.02, loss_i: 14.461, loss_h: 134.633, loss_o: 1.357
Episode: 3909, episode steps: 17, avg. reward: 22.01, loss_i: 15.258, loss_h: 99.168, loss_o: 1.074
Episode: 3910, episode steps: 20, avg. reward: 22.01, loss_i: 17.249, loss_h: 134.763, loss_o: 0.979
Episode: 3911, episode steps: 10, avg. reward: 22.0, loss_i: 17.2, loss_h: 66.982, loss_o: 1.361

Episode: 3984, episode steps: 29, avg. reward: 21.98, loss_i: 15.447, loss_h: 190.756, loss_o: 1.128
Episode: 3985, episode steps: 16, avg. reward: 21.98, loss_i: 13.618, loss_h: 120.696, loss_o: 1.069
Episode: 3986, episode steps: 24, avg. reward: 21.98, loss_i: 16.007, loss_h: 158.78, loss_o: 0.978
Episode: 3987, episode steps: 17, avg. reward: 21.97, loss_i: 14.968, loss_h: 141.316, loss_o: 1.099
Episode: 3988, episode steps: 13, avg. reward: 21.96, loss_i: 16.207, loss_h: 85.077, loss_o: 1.153
Episode: 3989, episode steps: 13, avg. reward: 21.95, loss_i: 18.021, loss_h: 86.451, loss_o: 1.012
Episode: 3990, episode steps: 73, avg. reward: 22.01, loss_i: 15.203, loss_h: 469.802, loss_o: 1.175
Episode: 3991, episode steps: 9, avg. reward: 21.99, loss_i: 14.557, loss_h: 53.042, loss_o: 1.045
Episode: 3992, episode steps: 18, avg. reward: 21.99, loss_i: 16.549, loss_h: 122.717, loss_o: 1.24
Episode: 3993, episode steps: 14, avg. reward: 21.98, loss_i: 16.609, loss_h: 93.346, loss_o: 1.2

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 4000, episode steps: 21, avg. reward: 22.01, loss_i: 15.045, loss_h: 120.25, loss_o: 0.935
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 15.039278759842826, 'critic_2_loss': 15.050365776887961, 'policy_loss': -15.468101711500259, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'cnt': 1.0, 'critic_1_loss': 120.31867215037346, 'critic_2_loss': 120.18202608823776, 'policy_loss': -443.6728181838989, 'ent_loss': 0.0, 'alpha': 0.0}
o: {'critic_1_loss': 0.935231149728809, 'critic_2_loss': 0.9356497329260621, 'policy_loss': -4.010543539410546, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: 

100%|██████████| 1000/1000 [01:53<00:00,  8.82it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.37, Max. Reward: 12.0
--------------------------------------------------------------------------------
Episode: 4001, episode steps: 57, avg. reward: 57.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 4002, episode steps: 17, avg. reward: 37.0, loss_i: 784.233, loss_h: nan, loss_o: 9.906
Episode: 4003, episode steps: 21, avg. reward: 31.67, loss_i: 8.759, loss_h: nan, loss_o: 0.83
Episode: 4004, episode steps: 19, avg. reward: 28.5, loss_i: 11.235, loss_h: nan, loss_o: 0.924
Episode: 4005, episode steps: 36, avg. reward: 30.0, loss_i: 11.91, loss_h: nan, loss_o: 0.859
Episode: 4006, episode steps: 44, avg. reward: 32.33, loss_i: 11.364, loss_h: 21.992, loss_o: 0.813
Episode: 4007, episode steps: 16, avg. reward: 30.0, loss_i: 13.616, loss_h: 16.065, loss_o: 0.8
Episode: 4008, episode steps: 19, avg. reward: 28.62, loss_i: 12.813, loss_h: 21.686, loss_o: 0.923
Episode: 

Episode: 4081, episode steps: 24, avg. reward: 21.93, loss_i: 16.718, loss_h: 193.008, loss_o: 1.322
Episode: 4082, episode steps: 17, avg. reward: 21.87, loss_i: 16.24, loss_h: 138.797, loss_o: 1.193
Episode: 4083, episode steps: 30, avg. reward: 21.96, loss_i: 16.655, loss_h: 238.399, loss_o: 0.939
Episode: 4084, episode steps: 25, avg. reward: 22.0, loss_i: 15.769, loss_h: 211.691, loss_o: 1.481
Episode: 4085, episode steps: 16, avg. reward: 21.93, loss_i: 15.318, loss_h: 154.376, loss_o: 1.289
Episode: 4086, episode steps: 28, avg. reward: 22.0, loss_i: 17.174, loss_h: 211.463, loss_o: 1.465
Episode: 4087, episode steps: 23, avg. reward: 22.01, loss_i: 15.957, loss_h: 176.266, loss_o: 1.305
Episode: 4088, episode steps: 40, avg. reward: 22.22, loss_i: 16.931, loss_h: 316.875, loss_o: 1.231
Episode: 4089, episode steps: 18, avg. reward: 22.17, loss_i: 16.369, loss_h: 127.493, loss_o: 1.101
Episode: 4090, episode steps: 14, avg. reward: 22.08, loss_i: 14.337, loss_h: 127.173, loss_o:

Episode: 4163, episode steps: 11, avg. reward: 22.48, loss_i: 19.511, loss_h: 79.997, loss_o: 1.568
Episode: 4164, episode steps: 18, avg. reward: 22.45, loss_i: 18.013, loss_h: 134.286, loss_o: 1.019
Episode: 4165, episode steps: 11, avg. reward: 22.38, loss_i: 16.611, loss_h: 99.842, loss_o: 1.526
Episode: 4166, episode steps: 48, avg. reward: 22.54, loss_i: 15.917, loss_h: 376.38, loss_o: 1.317
Episode: 4167, episode steps: 20, avg. reward: 22.52, loss_i: 17.993, loss_h: 173.235, loss_o: 1.744
Episode: 4168, episode steps: 19, avg. reward: 22.5, loss_i: 18.354, loss_h: 170.622, loss_o: 1.32
Episode: 4169, episode steps: 17, avg. reward: 22.47, loss_i: 14.881, loss_h: 124.852, loss_o: 1.228
Episode: 4170, episode steps: 22, avg. reward: 22.46, loss_i: 15.699, loss_h: 168.028, loss_o: 1.353
Episode: 4171, episode steps: 34, avg. reward: 22.53, loss_i: 15.838, loss_h: 246.132, loss_o: 1.467
Episode: 4172, episode steps: 23, avg. reward: 22.53, loss_i: 18.23, loss_h: 193.441, loss_o: 1.

Episode: 4245, episode steps: 12, avg. reward: 21.93, loss_i: 12.887, loss_h: 95.945, loss_o: 1.207
Episode: 4246, episode steps: 11, avg. reward: 21.89, loss_i: 16.833, loss_h: 108.297, loss_o: 1.083
Episode: 4247, episode steps: 17, avg. reward: 21.87, loss_i: 16.427, loss_h: 157.677, loss_o: 1.373
Episode: 4248, episode steps: 19, avg. reward: 21.86, loss_i: 16.512, loss_h: 129.827, loss_o: 1.18
Episode: 4249, episode steps: 47, avg. reward: 21.96, loss_i: 16.765, loss_h: 372.426, loss_o: 1.376
Episode: 4250, episode steps: 14, avg. reward: 21.93, loss_i: 17.245, loss_h: 95.326, loss_o: 1.394
Episode: 4251, episode steps: 26, avg. reward: 21.94, loss_i: 16.132, loss_h: 219.307, loss_o: 1.204
Episode: 4252, episode steps: 11, avg. reward: 21.9, loss_i: 20.874, loss_h: 85.219, loss_o: 1.545
Episode: 4253, episode steps: 28, avg. reward: 21.92, loss_i: 16.557, loss_h: 217.331, loss_o: 1.42
Episode: 4254, episode steps: 28, avg. reward: 21.95, loss_i: 15.807, loss_h: 248.788, loss_o: 1.

Episode: 4327, episode steps: 86, avg. reward: 22.42, loss_i: 16.355, loss_h: 720.33, loss_o: 1.415
Episode: 4328, episode steps: 14, avg. reward: 22.39, loss_i: 18.214, loss_h: 114.483, loss_o: 1.097
Episode: 4329, episode steps: 15, avg. reward: 22.37, loss_i: 17.093, loss_h: 114.6, loss_o: 0.908
Episode: 4330, episode steps: 14, avg. reward: 22.34, loss_i: 17.374, loss_h: 123.171, loss_o: 1.294
Episode: 4331, episode steps: 18, avg. reward: 22.33, loss_i: 15.084, loss_h: 138.842, loss_o: 1.22
Episode: 4332, episode steps: 37, avg. reward: 22.37, loss_i: 17.811, loss_h: 305.281, loss_o: 1.147
Episode: 4333, episode steps: 35, avg. reward: 22.41, loss_i: 19.515, loss_h: 304.845, loss_o: 1.502
Episode: 4334, episode steps: 62, avg. reward: 22.53, loss_i: 17.745, loss_h: 494.556, loss_o: 1.229
Episode: 4335, episode steps: 37, avg. reward: 22.57, loss_i: 16.348, loss_h: 273.477, loss_o: 1.342
Episode: 4336, episode steps: 19, avg. reward: 22.56, loss_i: 17.439, loss_h: 144.51, loss_o: 1

Episode: 4407, episode steps: 22, avg. reward: 22.2, loss_i: 9.963, loss_h: nan, loss_o: 1.471
Episode: 4408, episode steps: 20, avg. reward: 22.19, loss_i: 11.863, loss_h: nan, loss_o: 1.523
Episode: 4409, episode steps: 16, avg. reward: 22.18, loss_i: 10.637, loss_h: nan, loss_o: 1.503
Episode: 4410, episode steps: 12, avg. reward: 22.15, loss_i: 10.359, loss_h: 2.007, loss_o: 1.628
Episode: 4411, episode steps: 20, avg. reward: 22.15, loss_i: 11.606, loss_h: 74.921, loss_o: 1.359
Episode: 4412, episode steps: 37, avg. reward: 22.18, loss_i: 12.127, loss_h: 171.914, loss_o: 1.398
Episode: 4413, episode steps: 19, avg. reward: 22.18, loss_i: 11.721, loss_h: 75.046, loss_o: 1.497
Episode: 4414, episode steps: 14, avg. reward: 22.16, loss_i: 11.66, loss_h: 44.94, loss_o: 1.594
Episode: 4415, episode steps: 18, avg. reward: 22.15, loss_i: 12.819, loss_h: 94.363, loss_o: 1.607
Episode: 4416, episode steps: 12, avg. reward: 22.12, loss_i: 12.278, loss_h: 61.216, loss_o: 1.81
Episode: 4417,

Episode: 4489, episode steps: 17, avg. reward: 22.22, loss_i: 13.728, loss_h: 150.64, loss_o: 1.444
Episode: 4490, episode steps: 30, avg. reward: 22.23, loss_i: 15.772, loss_h: 272.506, loss_o: 1.335
Episode: 4491, episode steps: 14, avg. reward: 22.22, loss_i: 15.656, loss_h: 0.874, loss_o: 1.233
Episode: 4492, episode steps: 17, avg. reward: 22.21, loss_i: 15.794, loss_h: 180.499, loss_o: 1.667
Episode: 4493, episode steps: 20, avg. reward: 22.2, loss_i: 14.662, loss_h: 200.376, loss_o: 1.405
Episode: 4494, episode steps: 21, avg. reward: 22.2, loss_i: 13.947, loss_h: 235.785, loss_o: 1.95
Episode: 4495, episode steps: 44, avg. reward: 22.24, loss_i: 13.905, loss_h: 410.686, loss_o: 1.556
Episode: 4496, episode steps: 15, avg. reward: 22.23, loss_i: 14.535, loss_h: 154.638, loss_o: 1.503
Episode: 4497, episode steps: 20, avg. reward: 22.22, loss_i: 12.412, loss_h: 210.451, loss_o: 1.43
Episode: 4498, episode steps: 16, avg. reward: 22.21, loss_i: 13.553, loss_h: 0.882, loss_o: 1.76


Episode: 4571, episode steps: 9, avg. reward: 21.46, loss_i: 14.062, loss_h: 130.269, loss_o: 1.995
Episode: 4572, episode steps: 9, avg. reward: 21.43, loss_i: 13.698, loss_h: 101.175, loss_o: 1.844
Episode: 4573, episode steps: 10, avg. reward: 21.41, loss_i: 15.26, loss_h: 131.792, loss_o: 1.48
Episode: 4574, episode steps: 9, avg. reward: 21.39, loss_i: 12.997, loss_h: 105.977, loss_o: 1.706
Episode: 4575, episode steps: 9, avg. reward: 21.37, loss_i: 10.933, loss_h: 127.635, loss_o: 1.184
Episode: 4576, episode steps: 9, avg. reward: 21.35, loss_i: 16.84, loss_h: 117.899, loss_o: 1.851
Episode: 4577, episode steps: 9, avg. reward: 21.33, loss_i: 14.444, loss_h: 139.022, loss_o: 1.963
Episode: 4578, episode steps: 9, avg. reward: 21.31, loss_i: 13.473, loss_h: 1.293, loss_o: 1.824
Episode: 4579, episode steps: 9, avg. reward: 21.28, loss_i: 16.652, loss_h: 117.41, loss_o: 1.561
Episode: 4580, episode steps: 10, avg. reward: 21.27, loss_i: 17.323, loss_h: 147.874, loss_o: 2.069
Epis

Episode: 4654, episode steps: 10, avg. reward: 19.93, loss_i: 16.741, loss_h: 135.311, loss_o: 2.714
Episode: 4655, episode steps: 9, avg. reward: 19.91, loss_i: 16.387, loss_h: 147.342, loss_o: 2.341
Episode: 4656, episode steps: 10, avg. reward: 19.89, loss_i: 14.129, loss_h: 135.141, loss_o: 2.11
Episode: 4657, episode steps: 9, avg. reward: 19.88, loss_i: 13.485, loss_h: 136.564, loss_o: 2.22
Episode: 4658, episode steps: 11, avg. reward: 19.86, loss_i: 13.941, loss_h: 194.628, loss_o: 2.067
Episode: 4659, episode steps: 9, avg. reward: 19.85, loss_i: 15.626, loss_h: 143.079, loss_o: 2.667
Episode: 4660, episode steps: 9, avg. reward: 19.83, loss_i: 14.944, loss_h: 137.872, loss_o: 2.104
Episode: 4661, episode steps: 8, avg. reward: 19.81, loss_i: 13.645, loss_h: 1.565, loss_o: 2.704
Episode: 4662, episode steps: 10, avg. reward: 19.8, loss_i: 11.799, loss_h: 166.926, loss_o: 2.005
Episode: 4663, episode steps: 10, avg. reward: 19.78, loss_i: 14.221, loss_h: 183.768, loss_o: 1.873


Episode: 4737, episode steps: 9, avg. reward: 18.73, loss_i: 14.49, loss_h: 161.356, loss_o: 2.262
Episode: 4738, episode steps: 9, avg. reward: 18.71, loss_i: 15.041, loss_h: 1.411, loss_o: 2.007
Episode: 4739, episode steps: 9, avg. reward: 18.7, loss_i: 12.916, loss_h: 172.334, loss_o: 2.068
Episode: 4740, episode steps: 10, avg. reward: 18.69, loss_i: 14.137, loss_h: 188.765, loss_o: 2.546
Episode: 4741, episode steps: 10, avg. reward: 18.68, loss_i: 14.3, loss_h: 166.037, loss_o: 2.596
Episode: 4742, episode steps: 10, avg. reward: 18.66, loss_i: 15.919, loss_h: 194.358, loss_o: 2.281
Episode: 4743, episode steps: 10, avg. reward: 18.65, loss_i: 14.731, loss_h: 1.491, loss_o: 1.873
Episode: 4744, episode steps: 10, avg. reward: 18.64, loss_i: 13.764, loss_h: 189.16, loss_o: 2.013
Episode: 4745, episode steps: 10, avg. reward: 18.63, loss_i: 12.409, loss_h: 169.86, loss_o: 2.163
Episode: 4746, episode steps: 11, avg. reward: 18.62, loss_i: 13.975, loss_h: 222.46, loss_o: 2.619
Epis

Episode: 4818, episode steps: 9, avg. reward: 17.81, loss_i: 6.073, loss_h: 101.057, loss_o: 3.211
Episode: 4819, episode steps: 11, avg. reward: 17.8, loss_i: 7.907, loss_h: 109.181, loss_o: 3.554
Episode: 4820, episode steps: 9, avg. reward: 17.79, loss_i: 6.081, loss_h: 117.655, loss_o: 3.038
Episode: 4821, episode steps: 9, avg. reward: 17.78, loss_i: 9.217, loss_h: 95.012, loss_o: 2.795
Episode: 4822, episode steps: 10, avg. reward: 17.77, loss_i: 7.568, loss_h: 108.0, loss_o: 3.001
Episode: 4823, episode steps: 9, avg. reward: 17.76, loss_i: 7.737, loss_h: 96.679, loss_o: 3.195
Episode: 4824, episode steps: 10, avg. reward: 17.75, loss_i: 9.097, loss_h: 117.295, loss_o: 3.199
Episode: 4825, episode steps: 10, avg. reward: 17.74, loss_i: 6.85, loss_h: 114.223, loss_o: 2.716
Episode: 4826, episode steps: 9, avg. reward: 17.73, loss_i: 7.869, loss_h: 103.735, loss_o: 3.525
Episode: 4827, episode steps: 9, avg. reward: 17.72, loss_i: 7.751, loss_h: 103.997, loss_o: 3.338
Episode: 482

Episode: 4901, episode steps: 9, avg. reward: 17.02, loss_i: 8.433, loss_h: 137.536, loss_o: 3.263
Episode: 4902, episode steps: 10, avg. reward: 17.02, loss_i: 10.854, loss_h: 218.465, loss_o: 2.873
Episode: 4903, episode steps: 10, avg. reward: 17.01, loss_i: 8.338, loss_h: 179.138, loss_o: 2.931
Episode: 4904, episode steps: 10, avg. reward: 17.0, loss_i: 10.805, loss_h: 196.512, loss_o: 3.22
Episode: 4905, episode steps: 9, avg. reward: 16.99, loss_i: 8.659, loss_h: 179.722, loss_o: 3.52
Episode: 4906, episode steps: 10, avg. reward: 16.98, loss_i: 10.83, loss_h: 204.882, loss_o: 2.693
Episode: 4907, episode steps: 8, avg. reward: 16.97, loss_i: 8.447, loss_h: 1.698, loss_o: 3.467
Episode: 4908, episode steps: 8, avg. reward: 16.96, loss_i: 7.675, loss_h: 167.741, loss_o: 3.274
Episode: 4909, episode steps: 9, avg. reward: 16.95, loss_i: 9.408, loss_h: 190.934, loss_o: 3.132
Episode: 4910, episode steps: 9, avg. reward: 16.95, loss_i: 10.231, loss_h: 1.353, loss_o: 2.726
Episode: 4

Episode: 4984, episode steps: 9, avg. reward: 16.38, loss_i: 10.543, loss_h: 178.334, loss_o: 2.422
Episode: 4985, episode steps: 9, avg. reward: 16.38, loss_i: 8.648, loss_h: 161.743, loss_o: 3.547
Episode: 4986, episode steps: 8, avg. reward: 16.37, loss_i: 8.194, loss_h: 148.713, loss_o: 2.974
Episode: 4987, episode steps: 9, avg. reward: 16.36, loss_i: 7.816, loss_h: 142.248, loss_o: 3.166
Episode: 4988, episode steps: 10, avg. reward: 16.35, loss_i: 8.539, loss_h: 189.659, loss_o: 3.434
Episode: 4989, episode steps: 9, avg. reward: 16.35, loss_i: 9.504, loss_h: 157.499, loss_o: 3.63
Episode: 4990, episode steps: 10, avg. reward: 16.34, loss_i: 7.301, loss_h: 186.679, loss_o: 2.847
Episode: 4991, episode steps: 9, avg. reward: 16.33, loss_i: 7.676, loss_h: 129.773, loss_o: 3.319
Episode: 4992, episode steps: 9, avg. reward: 16.32, loss_i: 8.874, loss_h: 144.128, loss_o: 3.85
Episode: 4993, episode steps: 9, avg. reward: 16.32, loss_i: 9.078, loss_h: 153.904, loss_o: 3.147
Episode: 

  0%|          | 1/1000 [00:00<02:06,  7.91it/s]

Episode: 5000, episode steps: 8, avg. reward: 16.27, loss_i: 7.439, loss_h: 116.174, loss_o: 2.801
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 7.590478806756437, 'critic_2_loss': 7.287249353714287, 'policy_loss': -13.370691984891891, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'cnt': 1.0, 'critic_1_loss': 116.52866264991462, 'critic_2_loss': 115.81966791674495, 'policy_loss': -267.6993899345398, 'ent_loss': 0.0, 'alpha': 0.0}
o: {'critic_1_loss': 2.796822041273117, 'critic_2_loss': 2.8049087077379227, 'policy_loss': -4.270122051239014, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [0.38, 0.45, 0.37, 0.38, 0.48, 0.39, 0.55, 0.42]
o: [1.0]
UPDATES
i: [1810, 1810, 1810, 1810]
h: [351, 1063, 181, 411, 1219, 406, 1798, 414]
o: [1810]
MEMORY
i: [1875, 1875, 1875, 1875]
h: [198, 719, 107, 202

100%|██████████| 1000/1000 [01:57<00:00,  8.51it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.37, Max. Reward: 11.0
--------------------------------------------------------------------------------
Episode: 5001, episode steps: 10, avg. reward: 10.0, loss_i: 7.305, loss_h: 175.388, loss_o: 3.215
Episode: 5002, episode steps: 9, avg. reward: 9.5, loss_i: 9.209, loss_h: 184.467, loss_o: 3.237
Episode: 5003, episode steps: 8, avg. reward: 9.0, loss_i: 8.82, loss_h: 126.378, loss_o: 3.369
Episode: 5004, episode steps: 9, avg. reward: 9.0, loss_i: 7.094, loss_h: 1.489, loss_o: 3.118
Episode: 5005, episode steps: 9, avg. reward: 9.0, loss_i: 9.06, loss_h: 163.783, loss_o: 3.22
Episode: 5006, episode steps: 9, avg. reward: 9.0, loss_i: 10.124, loss_h: 1.432, loss_o: 3.276
Episode: 5007, episode steps: 9, avg. reward: 9.0, loss_i: 7.557, loss_h: 163.055, loss_o: 3.074
Episode: 5008, episode steps: 10, avg. reward: 9.12, loss_i: 8.129, loss_h: 164.168, loss_o: 3.084
Episod

Episode: 5084, episode steps: 10, avg. reward: 9.33, loss_i: 7.449, loss_h: 1.416, loss_o: 3.111
Episode: 5085, episode steps: 10, avg. reward: 9.34, loss_i: 7.256, loss_h: 1.042, loss_o: 2.903
Episode: 5086, episode steps: 9, avg. reward: 9.34, loss_i: 8.082, loss_h: 1.475, loss_o: 3.022
Episode: 5087, episode steps: 10, avg. reward: 9.34, loss_i: 7.226, loss_h: 0.964, loss_o: 3.46
Episode: 5088, episode steps: 10, avg. reward: 9.35, loss_i: 9.18, loss_h: 1.134, loss_o: 3.279
Episode: 5089, episode steps: 10, avg. reward: 9.36, loss_i: 7.37, loss_h: 1.314, loss_o: 3.124
Episode: 5090, episode steps: 9, avg. reward: 9.36, loss_i: 7.71, loss_h: 1.355, loss_o: 2.576
Episode: 5091, episode steps: 10, avg. reward: 9.36, loss_i: 7.699, loss_h: 1.137, loss_o: 2.844
Episode: 5092, episode steps: 9, avg. reward: 9.36, loss_i: 8.5, loss_h: 1.211, loss_o: 3.771
Episode: 5093, episode steps: 10, avg. reward: 9.37, loss_i: 7.503, loss_h: 1.247, loss_o: 2.423
Episode: 5094, episode steps: 9, avg. r

Episode: 5169, episode steps: 10, avg. reward: 9.33, loss_i: 10.66, loss_h: 148.53, loss_o: 3.168
Episode: 5170, episode steps: 10, avg. reward: 9.34, loss_i: 8.754, loss_h: 1.675, loss_o: 3.364
Episode: 5171, episode steps: 9, avg. reward: 9.33, loss_i: 8.762, loss_h: 151.567, loss_o: 3.45
Episode: 5172, episode steps: 10, avg. reward: 9.34, loss_i: 9.224, loss_h: 168.227, loss_o: 3.423
Episode: 5173, episode steps: 9, avg. reward: 9.34, loss_i: 8.655, loss_h: 188.169, loss_o: 3.648
Episode: 5174, episode steps: 9, avg. reward: 9.33, loss_i: 10.395, loss_h: 165.371, loss_o: 3.36
Episode: 5175, episode steps: 9, avg. reward: 9.33, loss_i: 10.414, loss_h: 177.596, loss_o: 3.248
Episode: 5176, episode steps: 10, avg. reward: 9.34, loss_i: 9.432, loss_h: 162.892, loss_o: 3.364
Episode: 5177, episode steps: 11, avg. reward: 9.34, loss_i: 9.418, loss_h: 184.645, loss_o: 3.137
Episode: 5178, episode steps: 8, avg. reward: 9.34, loss_i: 8.732, loss_h: 120.645, loss_o: 2.644
Episode: 5179, epi

Episode: 5252, episode steps: 9, avg. reward: 9.33, loss_i: 12.232, loss_h: 86.572, loss_o: 3.056
Episode: 5253, episode steps: 8, avg. reward: 9.32, loss_i: 11.332, loss_h: 65.922, loss_o: 3.233
Episode: 5254, episode steps: 10, avg. reward: 9.32, loss_i: 19.955, loss_h: 0.819, loss_o: 3.135
Episode: 5255, episode steps: 9, avg. reward: 9.32, loss_i: 14.674, loss_h: 77.929, loss_o: 3.824
Episode: 5256, episode steps: 10, avg. reward: 9.32, loss_i: 14.764, loss_h: 89.373, loss_o: 3.366
Episode: 5257, episode steps: 9, avg. reward: 9.32, loss_i: 11.812, loss_h: 104.23, loss_o: 3.609
Episode: 5258, episode steps: 9, avg. reward: 9.32, loss_i: 11.748, loss_h: 89.746, loss_o: 2.884
Episode: 5259, episode steps: 9, avg. reward: 9.32, loss_i: 9.972, loss_h: 80.713, loss_o: 3.022
Episode: 5260, episode steps: 9, avg. reward: 9.32, loss_i: 12.669, loss_h: 90.877, loss_o: 3.39
Episode: 5261, episode steps: 9, avg. reward: 9.32, loss_i: 15.239, loss_h: 96.127, loss_o: 3.772
Episode: 5262, episod

Episode: 5336, episode steps: 9, avg. reward: 9.34, loss_i: 11.122, loss_h: 168.316, loss_o: 3.559
Episode: 5337, episode steps: 9, avg. reward: 9.34, loss_i: 10.743, loss_h: 180.385, loss_o: 3.022
Episode: 5338, episode steps: 9, avg. reward: 9.34, loss_i: 9.656, loss_h: 192.886, loss_o: 3.273
Episode: 5339, episode steps: 10, avg. reward: 9.34, loss_i: 13.305, loss_h: 178.448, loss_o: 3.387
Episode: 5340, episode steps: 11, avg. reward: 9.34, loss_i: 12.617, loss_h: 1.823, loss_o: 2.714
Episode: 5341, episode steps: 10, avg. reward: 9.35, loss_i: 13.276, loss_h: 197.267, loss_o: 3.232
Episode: 5342, episode steps: 9, avg. reward: 9.35, loss_i: 10.773, loss_h: 208.056, loss_o: 3.586
Episode: 5343, episode steps: 9, avg. reward: 9.34, loss_i: 11.241, loss_h: 235.987, loss_o: 3.17
Episode: 5344, episode steps: 10, avg. reward: 9.35, loss_i: 11.29, loss_h: 220.063, loss_o: 3.571
Episode: 5345, episode steps: 9, avg. reward: 9.34, loss_i: 10.875, loss_h: 186.929, loss_o: 2.53
Episode: 534

Episode: 5420, episode steps: 10, avg. reward: 9.35, loss_i: 13.853, loss_h: 224.589, loss_o: 2.977
Episode: 5421, episode steps: 10, avg. reward: 9.35, loss_i: 12.021, loss_h: 200.627, loss_o: 3.74
Episode: 5422, episode steps: 10, avg. reward: 9.36, loss_i: 10.764, loss_h: 228.637, loss_o: 3.173
Episode: 5423, episode steps: 9, avg. reward: 9.35, loss_i: 11.982, loss_h: 206.165, loss_o: 2.941
Episode: 5424, episode steps: 8, avg. reward: 9.35, loss_i: 11.885, loss_h: 134.524, loss_o: 3.109
Episode: 5425, episode steps: 10, avg. reward: 9.35, loss_i: 11.077, loss_h: 1.974, loss_o: 3.028
Episode: 5426, episode steps: 10, avg. reward: 9.35, loss_i: 13.077, loss_h: 187.81, loss_o: 3.563
Episode: 5427, episode steps: 10, avg. reward: 9.36, loss_i: 11.688, loss_h: 222.511, loss_o: 3.633
Episode: 5428, episode steps: 10, avg. reward: 9.36, loss_i: 12.854, loss_h: 211.012, loss_o: 3.348
Episode: 5429, episode steps: 9, avg. reward: 9.36, loss_i: 12.557, loss_h: 189.674, loss_o: 3.097
Episode

Episode: 5503, episode steps: 10, avg. reward: 9.37, loss_i: 11.049, loss_h: 164.817, loss_o: 2.995
Episode: 5504, episode steps: 8, avg. reward: 9.37, loss_i: 11.226, loss_h: 124.485, loss_o: 3.115
Episode: 5505, episode steps: 10, avg. reward: 9.37, loss_i: 11.962, loss_h: 1.464, loss_o: 3.258
Episode: 5506, episode steps: 8, avg. reward: 9.37, loss_i: 10.78, loss_h: 122.66, loss_o: 3.751
Episode: 5507, episode steps: 11, avg. reward: 9.37, loss_i: 11.975, loss_h: 178.973, loss_o: 3.144
Episode: 5508, episode steps: 10, avg. reward: 9.37, loss_i: 13.308, loss_h: 1.23, loss_o: 3.409
Episode: 5509, episode steps: 9, avg. reward: 9.37, loss_i: 13.426, loss_h: 155.181, loss_o: 2.721
Episode: 5510, episode steps: 10, avg. reward: 9.37, loss_i: 13.903, loss_h: 166.72, loss_o: 2.583
Episode: 5511, episode steps: 10, avg. reward: 9.37, loss_i: 10.534, loss_h: 168.498, loss_o: 2.989
Episode: 5512, episode steps: 10, avg. reward: 9.38, loss_i: 13.17, loss_h: 1.786, loss_o: 2.97
Episode: 5513, 

Episode: 5587, episode steps: 10, avg. reward: 9.38, loss_i: 8.731, loss_h: 126.349, loss_o: 2.345
Episode: 5588, episode steps: 9, avg. reward: 9.38, loss_i: 7.99, loss_h: 160.495, loss_o: 2.714
Episode: 5589, episode steps: 10, avg. reward: 9.38, loss_i: 9.257, loss_h: 159.583, loss_o: 2.8
Episode: 5590, episode steps: 10, avg. reward: 9.38, loss_i: 9.418, loss_h: 1.586, loss_o: 3.063
Episode: 5591, episode steps: 9, avg. reward: 9.38, loss_i: 7.222, loss_h: 124.57, loss_o: 2.527
Episode: 5592, episode steps: 10, avg. reward: 9.38, loss_i: 9.451, loss_h: 139.622, loss_o: 2.811
Episode: 5593, episode steps: 10, avg. reward: 9.38, loss_i: 8.497, loss_h: 168.529, loss_o: 2.845
Episode: 5594, episode steps: 9, avg. reward: 9.38, loss_i: 9.593, loss_h: 122.822, loss_o: 2.751
Episode: 5595, episode steps: 10, avg. reward: 9.38, loss_i: 9.06, loss_h: 176.082, loss_o: 2.769
Episode: 5596, episode steps: 8, avg. reward: 9.38, loss_i: 7.913, loss_h: 139.984, loss_o: 2.419
Episode: 5597, episod

Episode: 5669, episode steps: 9, avg. reward: 9.37, loss_i: 17.371, loss_h: 77.485, loss_o: 3.109
Episode: 5670, episode steps: 10, avg. reward: 9.37, loss_i: 10.764, loss_h: 123.525, loss_o: 3.31
Episode: 5671, episode steps: 10, avg. reward: 9.37, loss_i: 12.666, loss_h: 0.862, loss_o: 3.242
Episode: 5672, episode steps: 11, avg. reward: 9.38, loss_i: 11.299, loss_h: 131.444, loss_o: 3.368
Episode: 5673, episode steps: 10, avg. reward: 9.38, loss_i: 12.949, loss_h: 0.84, loss_o: 3.187
Episode: 5674, episode steps: 9, avg. reward: 9.38, loss_i: 11.266, loss_h: 111.557, loss_o: 3.456
Episode: 5675, episode steps: 9, avg. reward: 9.38, loss_i: 14.41, loss_h: 91.516, loss_o: 3.155
Episode: 5676, episode steps: 10, avg. reward: 9.38, loss_i: 14.016, loss_h: 128.311, loss_o: 3.521
Episode: 5677, episode steps: 10, avg. reward: 9.38, loss_i: 11.698, loss_h: 147.129, loss_o: 2.942
Episode: 5678, episode steps: 9, avg. reward: 9.38, loss_i: 10.161, loss_h: 90.381, loss_o: 2.806
Episode: 5679,

Episode: 5752, episode steps: 8, avg. reward: 9.36, loss_i: 11.29, loss_h: 150.922, loss_o: 4.07
Episode: 5753, episode steps: 9, avg. reward: 9.36, loss_i: 12.62, loss_h: 174.293, loss_o: 3.132
Episode: 5754, episode steps: 9, avg. reward: 9.36, loss_i: 12.395, loss_h: 1.38, loss_o: 4.258
Episode: 5755, episode steps: 10, avg. reward: 9.36, loss_i: 14.084, loss_h: 183.204, loss_o: 3.861
Episode: 5756, episode steps: 9, avg. reward: 9.36, loss_i: 13.784, loss_h: 162.934, loss_o: 3.616
Episode: 5757, episode steps: 10, avg. reward: 9.36, loss_i: 11.188, loss_h: 169.881, loss_o: 4.283
Episode: 5758, episode steps: 10, avg. reward: 9.36, loss_i: 14.469, loss_h: 1.308, loss_o: 3.566
Episode: 5759, episode steps: 9, avg. reward: 9.36, loss_i: 15.205, loss_h: 185.382, loss_o: 3.349
Episode: 5760, episode steps: 8, avg. reward: 9.36, loss_i: 12.51, loss_h: 158.016, loss_o: 4.207
Episode: 5761, episode steps: 10, avg. reward: 9.36, loss_i: 12.557, loss_h: 200.706, loss_o: 3.695
Episode: 5762, 

Episode: 5836, episode steps: 9, avg. reward: 9.37, loss_i: 12.736, loss_h: 1.435, loss_o: 3.686
Episode: 5837, episode steps: 9, avg. reward: 9.37, loss_i: 12.511, loss_h: 1.539, loss_o: 4.137
Episode: 5838, episode steps: 10, avg. reward: 9.37, loss_i: 14.194, loss_h: 1.259, loss_o: 4.196
Episode: 5839, episode steps: 10, avg. reward: 9.37, loss_i: 12.531, loss_h: 1.504, loss_o: 4.765
Episode: 5840, episode steps: 10, avg. reward: 9.37, loss_i: 14.211, loss_h: 1.217, loss_o: 4.111
Episode: 5841, episode steps: 9, avg. reward: 9.37, loss_i: 12.855, loss_h: 1.738, loss_o: 3.726
Episode: 5842, episode steps: 10, avg. reward: 9.37, loss_i: 14.645, loss_h: 1.453, loss_o: 3.339
Episode: 5843, episode steps: 9, avg. reward: 9.37, loss_i: 11.041, loss_h: 1.355, loss_o: 4.258
Episode: 5844, episode steps: 9, avg. reward: 9.37, loss_i: 11.518, loss_h: 1.363, loss_o: 4.429
Episode: 5845, episode steps: 10, avg. reward: 9.37, loss_i: 11.888, loss_h: 1.245, loss_o: 4.36
Episode: 5846, episode ste

Episode: 5921, episode steps: 10, avg. reward: 9.36, loss_i: 17.451, loss_h: 1.834, loss_o: 3.852
Episode: 5922, episode steps: 10, avg. reward: 9.36, loss_i: 19.014, loss_h: 1.697, loss_o: 3.839
Episode: 5923, episode steps: 9, avg. reward: 9.36, loss_i: 16.6, loss_h: 1.24, loss_o: 4.342
Episode: 5924, episode steps: 9, avg. reward: 9.36, loss_i: 16.905, loss_h: 1.574, loss_o: 3.692
Episode: 5925, episode steps: 9, avg. reward: 9.36, loss_i: 18.374, loss_h: 1.326, loss_o: 4.044
Episode: 5926, episode steps: 10, avg. reward: 9.36, loss_i: 13.251, loss_h: 1.4, loss_o: 4.183
Episode: 5927, episode steps: 9, avg. reward: 9.36, loss_i: 15.696, loss_h: 1.876, loss_o: 4.981
Episode: 5928, episode steps: 8, avg. reward: 9.36, loss_i: 22.018, loss_h: 1.368, loss_o: 4.319
Episode: 5929, episode steps: 9, avg. reward: 9.36, loss_i: 15.543, loss_h: 1.367, loss_o: 3.302
Episode: 5930, episode steps: 9, avg. reward: 9.36, loss_i: 14.403, loss_h: 1.691, loss_o: 4.306
Episode: 5931, episode steps: 9,

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 6000, episode steps: 10, avg. reward: 9.36, loss_i: 14.934, loss_h: 1.562, loss_o: 4.097
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 14.900454871729016, 'critic_2_loss': 14.968419645354151, 'policy_loss': -16.709208536148072, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 1.556965901402005, 'critic_2_loss': 1.5671871423459889, 'policy_loss': -2.4973844268865752, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 4.09964530467987, 'critic_2_loss': 4.0949455261230465, 'policy_loss': -4.685883665084839, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.

100%|██████████| 1000/1000 [01:55<00:00,  8.67it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.39, Max. Reward: 11.0
--------------------------------------------------------------------------------
Episode: 6001, episode steps: 10, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6002, episode steps: 9, avg. reward: 9.5, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6003, episode steps: 8, avg. reward: 9.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6004, episode steps: 10, avg. reward: 9.25, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6005, episode steps: 9, avg. reward: 9.2, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6006, episode steps: 10, avg. reward: 9.33, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6007, episode steps: 9, avg. reward: 9.29, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6008, episode steps: 10, avg. reward: 9.38, loss_i: 76.807, loss_h: 55.015, loss_o: 4.111
Episode: 6009, episode steps: 9, avg. reward: 9.33,

Episode: 6084, episode steps: 9, avg. reward: 9.19, loss_i: 55.835, loss_h: 1.59, loss_o: 4.158
Episode: 6085, episode steps: 10, avg. reward: 9.2, loss_i: 62.687, loss_h: 1.642, loss_o: 3.927
Episode: 6086, episode steps: 8, avg. reward: 9.19, loss_i: 61.728, loss_h: 1.548, loss_o: 4.549
Episode: 6087, episode steps: 10, avg. reward: 9.2, loss_i: 54.619, loss_h: 1.746, loss_o: 4.285
Episode: 6088, episode steps: 10, avg. reward: 9.2, loss_i: 57.145, loss_h: 1.817, loss_o: 4.055
Episode: 6089, episode steps: 8, avg. reward: 9.19, loss_i: 56.389, loss_h: 1.438, loss_o: 4.475
Episode: 6090, episode steps: 11, avg. reward: 9.21, loss_i: 54.787, loss_h: 1.674, loss_o: 3.814
Episode: 6091, episode steps: 9, avg. reward: 9.21, loss_i: 50.787, loss_h: 1.615, loss_o: 4.1
Episode: 6092, episode steps: 8, avg. reward: 9.2, loss_i: 53.651, loss_h: 1.415, loss_o: 4.679
Episode: 6093, episode steps: 9, avg. reward: 9.19, loss_i: 47.172, loss_h: 1.529, loss_o: 3.773
Episode: 6094, episode steps: 8, 

Episode: 6169, episode steps: 10, avg. reward: 9.33, loss_i: 31.23, loss_h: 1.793, loss_o: 3.794
Episode: 6170, episode steps: 9, avg. reward: 9.33, loss_i: 34.423, loss_h: 1.602, loss_o: 3.732
Episode: 6171, episode steps: 9, avg. reward: 9.33, loss_i: 31.852, loss_h: 1.65, loss_o: 3.472
Episode: 6172, episode steps: 10, avg. reward: 9.33, loss_i: 29.722, loss_h: 1.625, loss_o: 5.392
Episode: 6173, episode steps: 10, avg. reward: 9.34, loss_i: 27.325, loss_h: 1.844, loss_o: 5.044
Episode: 6174, episode steps: 9, avg. reward: 9.33, loss_i: 32.086, loss_h: 1.803, loss_o: 4.075
Episode: 6175, episode steps: 8, avg. reward: 9.33, loss_i: 35.142, loss_h: 1.695, loss_o: 5.01
Episode: 6176, episode steps: 8, avg. reward: 9.32, loss_i: 43.333, loss_h: 1.535, loss_o: 4.175
Episode: 6177, episode steps: 8, avg. reward: 9.31, loss_i: 39.027, loss_h: 1.784, loss_o: 4.46
Episode: 6178, episode steps: 10, avg. reward: 9.31, loss_i: 45.623, loss_h: 1.813, loss_o: 4.456
Episode: 6179, episode steps: 

Episode: 6254, episode steps: 9, avg. reward: 9.34, loss_i: 33.977, loss_h: 1.886, loss_o: 5.223
Episode: 6255, episode steps: 10, avg. reward: 9.34, loss_i: 40.521, loss_h: 1.825, loss_o: 4.7
Episode: 6256, episode steps: 10, avg. reward: 9.34, loss_i: 43.941, loss_h: 1.789, loss_o: 4.605
Episode: 6257, episode steps: 9, avg. reward: 9.34, loss_i: 37.825, loss_h: 1.69, loss_o: 4.779
Episode: 6258, episode steps: 8, avg. reward: 9.34, loss_i: 35.76, loss_h: 1.933, loss_o: 5.416
Episode: 6259, episode steps: 10, avg. reward: 9.34, loss_i: 28.141, loss_h: 1.8, loss_o: 4.442
Episode: 6260, episode steps: 9, avg. reward: 9.34, loss_i: 38.193, loss_h: 1.819, loss_o: 4.879
Episode: 6261, episode steps: 8, avg. reward: 9.33, loss_i: 30.339, loss_h: 1.915, loss_o: 4.369
Episode: 6262, episode steps: 10, avg. reward: 9.34, loss_i: 38.864, loss_h: 1.736, loss_o: 4.955
Episode: 6263, episode steps: 9, avg. reward: 9.33, loss_i: 33.843, loss_h: 1.725, loss_o: 4.458
Episode: 6264, episode steps: 10

Episode: 6339, episode steps: 8, avg. reward: 9.37, loss_i: 42.015, loss_h: 2.076, loss_o: 4.598
Episode: 6340, episode steps: 10, avg. reward: 9.38, loss_i: 38.144, loss_h: 2.079, loss_o: 4.558
Episode: 6341, episode steps: 9, avg. reward: 9.38, loss_i: 41.323, loss_h: 1.982, loss_o: 3.938
Episode: 6342, episode steps: 10, avg. reward: 9.38, loss_i: 40.125, loss_h: 2.102, loss_o: 4.011
Episode: 6343, episode steps: 10, avg. reward: 9.38, loss_i: 42.906, loss_h: 1.984, loss_o: 3.972
Episode: 6344, episode steps: 10, avg. reward: 9.38, loss_i: 42.753, loss_h: 1.778, loss_o: 4.766
Episode: 6345, episode steps: 10, avg. reward: 9.38, loss_i: 35.949, loss_h: 2.158, loss_o: 6.179
Episode: 6346, episode steps: 10, avg. reward: 9.38, loss_i: 42.504, loss_h: 2.306, loss_o: 3.987
Episode: 6347, episode steps: 9, avg. reward: 9.38, loss_i: 39.193, loss_h: 1.774, loss_o: 4.243
Episode: 6348, episode steps: 9, avg. reward: 9.38, loss_i: 41.769, loss_h: 2.073, loss_o: 4.64
Episode: 6349, episode st

Episode: 6422, episode steps: 11, avg. reward: 9.39, loss_i: 22.041, loss_h: 0.125, loss_o: 3.26
Episode: 6423, episode steps: 9, avg. reward: 9.39, loss_i: 21.31, loss_h: 0.147, loss_o: 3.031
Episode: 6424, episode steps: 10, avg. reward: 9.39, loss_i: 21.356, loss_h: 0.147, loss_o: 3.481
Episode: 6425, episode steps: 10, avg. reward: 9.4, loss_i: 25.588, loss_h: 0.138, loss_o: 3.479
Episode: 6426, episode steps: 9, avg. reward: 9.39, loss_i: 18.964, loss_h: 0.126, loss_o: 3.297
Episode: 6427, episode steps: 8, avg. reward: 9.39, loss_i: 16.934, loss_h: 0.143, loss_o: 3.435
Episode: 6428, episode steps: 9, avg. reward: 9.39, loss_i: 21.698, loss_h: 0.132, loss_o: 3.817
Episode: 6429, episode steps: 10, avg. reward: 9.39, loss_i: 25.4, loss_h: 0.119, loss_o: 3.869
Episode: 6430, episode steps: 10, avg. reward: 9.39, loss_i: 27.449, loss_h: 0.15, loss_o: 3.467
Episode: 6431, episode steps: 9, avg. reward: 9.39, loss_i: 29.543, loss_h: 0.528, loss_o: 3.269
Episode: 6432, episode steps: 9

Episode: 6507, episode steps: 8, avg. reward: 9.4, loss_i: 23.159, loss_h: 0.842, loss_o: 2.451
Episode: 6508, episode steps: 9, avg. reward: 9.4, loss_i: 15.798, loss_h: 0.799, loss_o: 3.236
Episode: 6509, episode steps: 9, avg. reward: 9.4, loss_i: 16.954, loss_h: 1.041, loss_o: 3.199
Episode: 6510, episode steps: 10, avg. reward: 9.4, loss_i: 19.11, loss_h: 1.058, loss_o: 2.553
Episode: 6511, episode steps: 9, avg. reward: 9.4, loss_i: 18.389, loss_h: 0.785, loss_o: 3.583
Episode: 6512, episode steps: 9, avg. reward: 9.4, loss_i: 18.162, loss_h: 0.884, loss_o: 3.148
Episode: 6513, episode steps: 9, avg. reward: 9.4, loss_i: 15.984, loss_h: 0.909, loss_o: 3.437
Episode: 6514, episode steps: 8, avg. reward: 9.39, loss_i: 16.19, loss_h: 0.842, loss_o: 3.589
Episode: 6515, episode steps: 10, avg. reward: 9.4, loss_i: 16.599, loss_h: 0.742, loss_o: 2.426
Episode: 6516, episode steps: 10, avg. reward: 9.4, loss_i: 17.746, loss_h: 0.642, loss_o: 3.107
Episode: 6517, episode steps: 9, avg. 

Episode: 6592, episode steps: 8, avg. reward: 9.4, loss_i: 13.84, loss_h: 0.728, loss_o: 2.99
Episode: 6593, episode steps: 10, avg. reward: 9.4, loss_i: 10.879, loss_h: 0.737, loss_o: 3.496
Episode: 6594, episode steps: 9, avg. reward: 9.4, loss_i: 11.268, loss_h: 0.613, loss_o: 3.249
Episode: 6595, episode steps: 9, avg. reward: 9.4, loss_i: 17.205, loss_h: 0.879, loss_o: 3.405
Episode: 6596, episode steps: 10, avg. reward: 9.4, loss_i: 13.644, loss_h: 0.629, loss_o: 3.717
Episode: 6597, episode steps: 9, avg. reward: 9.4, loss_i: 13.397, loss_h: 0.698, loss_o: 3.406
Episode: 6598, episode steps: 9, avg. reward: 9.4, loss_i: 13.129, loss_h: 0.634, loss_o: 2.666
Episode: 6599, episode steps: 10, avg. reward: 9.4, loss_i: 13.19, loss_h: 0.778, loss_o: 3.098
Episode: 6600, episode steps: 10, avg. reward: 9.4, loss_i: 12.335, loss_h: 0.707, loss_o: 3.508
Episode: 6601, episode steps: 8, avg. reward: 9.4, loss_i: 13.666, loss_h: 0.797, loss_o: 2.986
Episode: 6602, episode steps: 10, avg. 

Episode: 6678, episode steps: 10, avg. reward: 9.4, loss_i: 10.342, loss_h: 0.818, loss_o: 3.295
Episode: 6679, episode steps: 9, avg. reward: 9.4, loss_i: 8.879, loss_h: 0.747, loss_o: 3.316
Episode: 6680, episode steps: 10, avg. reward: 9.4, loss_i: 15.54, loss_h: 0.858, loss_o: 3.271
Episode: 6681, episode steps: 10, avg. reward: 9.4, loss_i: 14.791, loss_h: 0.899, loss_o: 3.73
Episode: 6682, episode steps: 10, avg. reward: 9.4, loss_i: 13.046, loss_h: 0.912, loss_o: 2.699
Episode: 6683, episode steps: 8, avg. reward: 9.4, loss_i: 10.462, loss_h: 1.074, loss_o: 2.701
Episode: 6684, episode steps: 9, avg. reward: 9.4, loss_i: 14.051, loss_h: 0.794, loss_o: 2.835
Episode: 6685, episode steps: 10, avg. reward: 9.4, loss_i: 12.274, loss_h: 0.934, loss_o: 3.27
Episode: 6686, episode steps: 10, avg. reward: 9.4, loss_i: 11.122, loss_h: 0.649, loss_o: 3.237
Episode: 6687, episode steps: 8, avg. reward: 9.4, loss_i: 10.727, loss_h: 0.717, loss_o: 3.805
Episode: 6688, episode steps: 9, avg. 

Episode: 6764, episode steps: 10, avg. reward: 9.4, loss_i: 9.81, loss_h: 0.755, loss_o: 3.487
Episode: 6765, episode steps: 9, avg. reward: 9.4, loss_i: 14.299, loss_h: 0.77, loss_o: 2.941
Episode: 6766, episode steps: 8, avg. reward: 9.4, loss_i: 13.06, loss_h: 0.864, loss_o: 3.334
Episode: 6767, episode steps: 9, avg. reward: 9.4, loss_i: 11.681, loss_h: 0.881, loss_o: 3.225
Episode: 6768, episode steps: 8, avg. reward: 9.4, loss_i: 9.446, loss_h: 0.69, loss_o: 3.556
Episode: 6769, episode steps: 10, avg. reward: 9.4, loss_i: 12.765, loss_h: 0.786, loss_o: 2.684
Episode: 6770, episode steps: 10, avg. reward: 9.4, loss_i: 10.181, loss_h: 0.614, loss_o: 2.568
Episode: 6771, episode steps: 10, avg. reward: 9.4, loss_i: 10.734, loss_h: 0.69, loss_o: 3.571
Episode: 6772, episode steps: 9, avg. reward: 9.4, loss_i: 9.661, loss_h: 0.801, loss_o: 3.237
Episode: 6773, episode steps: 9, avg. reward: 9.4, loss_i: 11.108, loss_h: 0.924, loss_o: 3.932
Episode: 6774, episode steps: 9, avg. reward

Episode: 6848, episode steps: 8, avg. reward: 9.4, loss_i: 9.22, loss_h: 0.581, loss_o: 3.563
Episode: 6849, episode steps: 10, avg. reward: 9.4, loss_i: 8.719, loss_h: 0.641, loss_o: 4.165
Episode: 6850, episode steps: 8, avg. reward: 9.4, loss_i: 10.262, loss_h: 0.288, loss_o: 3.433
Episode: 6851, episode steps: 8, avg. reward: 9.39, loss_i: 9.945, loss_h: 0.536, loss_o: 3.673
Episode: 6852, episode steps: 8, avg. reward: 9.39, loss_i: 8.781, loss_h: 0.503, loss_o: 3.427
Episode: 6853, episode steps: 9, avg. reward: 9.39, loss_i: 9.474, loss_h: 0.395, loss_o: 3.308
Episode: 6854, episode steps: 10, avg. reward: 9.39, loss_i: 10.961, loss_h: 0.514, loss_o: 3.825
Episode: 6855, episode steps: 8, avg. reward: 9.39, loss_i: 8.703, loss_h: 0.39, loss_o: 3.454
Episode: 6856, episode steps: 9, avg. reward: 9.39, loss_i: 10.914, loss_h: 0.527, loss_o: 3.438
Episode: 6857, episode steps: 10, avg. reward: 9.39, loss_i: 11.061, loss_h: 0.403, loss_o: 3.601
Episode: 6858, episode steps: 10, avg.

Episode: 6933, episode steps: 9, avg. reward: 9.38, loss_i: 12.558, loss_h: 0.614, loss_o: 4.071
Episode: 6934, episode steps: 9, avg. reward: 9.38, loss_i: 10.744, loss_h: 0.69, loss_o: 4.073
Episode: 6935, episode steps: 10, avg. reward: 9.38, loss_i: 9.857, loss_h: 0.944, loss_o: 3.771
Episode: 6936, episode steps: 10, avg. reward: 9.38, loss_i: 10.487, loss_h: 0.979, loss_o: 3.803
Episode: 6937, episode steps: 10, avg. reward: 9.38, loss_i: 10.948, loss_h: 0.655, loss_o: 3.752
Episode: 6938, episode steps: 10, avg. reward: 9.38, loss_i: 8.717, loss_h: 0.894, loss_o: 3.236
Episode: 6939, episode steps: 10, avg. reward: 9.38, loss_i: 8.886, loss_h: 0.861, loss_o: 4.024
Episode: 6940, episode steps: 9, avg. reward: 9.38, loss_i: 11.08, loss_h: 0.716, loss_o: 3.778
Episode: 6941, episode steps: 9, avg. reward: 9.38, loss_i: 11.951, loss_h: 0.767, loss_o: 3.691
Episode: 6942, episode steps: 10, avg. reward: 9.38, loss_i: 10.39, loss_h: 0.7, loss_o: 4.02
Episode: 6943, episode steps: 9, 

  0%|          | 1/1000 [00:00<01:52,  8.85it/s]

Episode: 7000, episode steps: 9, avg. reward: 9.39, loss_i: 8.144, loss_h: 0.812, loss_o: 3.147
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 8.076577436592844, 'critic_2_loss': 8.211233261558744, 'policy_loss': -13.644483341111076, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 0.813276097870322, 'critic_2_loss': 0.8097897584571041, 'policy_loss': -2.304301562239823, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 3.1478119823667736, 'critic_2_loss': 3.1454512543148465, 'policy_loss': -4.467504978179932, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [1.0, 0.23, 0.43, 0.18, 0.02, 0.35, 0.95, 0.39]
o: [1.0]
UPDATES
i: [1800, 1800, 1800, 1800]
h: [1800, 175, 807, 0, 0, 551, 1686, 509]
o: [1800]
MEMORY
i: [1865, 1865, 1865, 1865]
h: [1865, 118, 445, 32, 0, 349,

100%|██████████| 1000/1000 [01:54<00:00,  8.72it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.34, Max. Reward: 12.0
--------------------------------------------------------------------------------
Episode: 7001, episode steps: 9, avg. reward: 9.0, loss_i: 8.132, loss_h: 0.81, loss_o: 3.469
Episode: 7002, episode steps: 10, avg. reward: 9.5, loss_i: 8.011, loss_h: 0.493, loss_o: 3.927
Episode: 7003, episode steps: 10, avg. reward: 9.67, loss_i: 11.245, loss_h: 0.569, loss_o: 2.947
Episode: 7004, episode steps: 10, avg. reward: 9.75, loss_i: 7.367, loss_h: 0.739, loss_o: 3.729
Episode: 7005, episode steps: 9, avg. reward: 9.6, loss_i: 10.392, loss_h: 0.787, loss_o: 3.195
Episode: 7006, episode steps: 8, avg. reward: 9.33, loss_i: 8.82, loss_h: 0.669, loss_o: 3.258
Episode: 7007, episode steps: 9, avg. reward: 9.29, loss_i: 10.134, loss_h: 0.803, loss_o: 3.729
Episode: 7008, episode steps: 8, avg. reward: 9.12, loss_i: 8.644, loss_h: 0.796, loss_o: 3.422
Episode: 70

Episode: 7084, episode steps: 8, avg. reward: 9.39, loss_i: 9.778, loss_h: 0.644, loss_o: 3.305
Episode: 7085, episode steps: 10, avg. reward: 9.4, loss_i: 7.887, loss_h: 0.742, loss_o: 3.303
Episode: 7086, episode steps: 8, avg. reward: 9.38, loss_i: 8.194, loss_h: 0.648, loss_o: 3.222
Episode: 7087, episode steps: 10, avg. reward: 9.39, loss_i: 16.024, loss_h: 0.649, loss_o: 3.137
Episode: 7088, episode steps: 10, avg. reward: 9.4, loss_i: 8.541, loss_h: 0.622, loss_o: 3.855
Episode: 7089, episode steps: 9, avg. reward: 9.39, loss_i: 8.677, loss_h: 0.805, loss_o: 3.762
Episode: 7090, episode steps: 9, avg. reward: 9.39, loss_i: 7.744, loss_h: 0.734, loss_o: 3.429
Episode: 7091, episode steps: 8, avg. reward: 9.37, loss_i: 6.656, loss_h: 0.662, loss_o: 3.45
Episode: 7092, episode steps: 9, avg. reward: 9.37, loss_i: 7.135, loss_h: 0.914, loss_o: 2.908
Episode: 7093, episode steps: 9, avg. reward: 9.37, loss_i: 12.017, loss_h: 0.804, loss_o: 3.119
Episode: 7094, episode steps: 9, avg. 

Episode: 7170, episode steps: 9, avg. reward: 9.38, loss_i: 10.911, loss_h: 0.511, loss_o: 2.721
Episode: 7171, episode steps: 8, avg. reward: 9.37, loss_i: 8.567, loss_h: 0.602, loss_o: 3.205
Episode: 7172, episode steps: 10, avg. reward: 9.37, loss_i: 6.697, loss_h: 0.618, loss_o: 3.148
Episode: 7173, episode steps: 9, avg. reward: 9.37, loss_i: 8.174, loss_h: 0.669, loss_o: 3.134
Episode: 7174, episode steps: 10, avg. reward: 9.37, loss_i: 8.425, loss_h: 0.8, loss_o: 3.727
Episode: 7175, episode steps: 10, avg. reward: 9.38, loss_i: 8.037, loss_h: 0.6, loss_o: 3.817
Episode: 7176, episode steps: 9, avg. reward: 9.38, loss_i: 7.28, loss_h: 0.682, loss_o: 3.112
Episode: 7177, episode steps: 10, avg. reward: 9.38, loss_i: 8.885, loss_h: 0.733, loss_o: 3.521
Episode: 7178, episode steps: 9, avg. reward: 9.38, loss_i: 10.545, loss_h: 0.625, loss_o: 3.303
Episode: 7179, episode steps: 10, avg. reward: 9.38, loss_i: 12.479, loss_h: 0.696, loss_o: 3.446
Episode: 7180, episode steps: 9, avg.

Episode: 7254, episode steps: 11, avg. reward: 9.38, loss_i: 7.187, loss_h: 0.515, loss_o: 3.982
Episode: 7255, episode steps: 11, avg. reward: 9.38, loss_i: 11.345, loss_h: 0.57, loss_o: 3.443
Episode: 7256, episode steps: 10, avg. reward: 9.39, loss_i: 8.693, loss_h: 0.508, loss_o: 3.529
Episode: 7257, episode steps: 10, avg. reward: 9.39, loss_i: 6.165, loss_h: 0.355, loss_o: 3.309
Episode: 7258, episode steps: 8, avg. reward: 9.38, loss_i: 7.58, loss_h: 0.508, loss_o: 3.85
Episode: 7259, episode steps: 10, avg. reward: 9.39, loss_i: 7.295, loss_h: 0.446, loss_o: 3.951
Episode: 7260, episode steps: 10, avg. reward: 9.39, loss_i: 9.489, loss_h: 0.521, loss_o: 3.416
Episode: 7261, episode steps: 10, avg. reward: 9.39, loss_i: 7.181, loss_h: 0.557, loss_o: 3.359
Episode: 7262, episode steps: 9, avg. reward: 9.39, loss_i: 6.812, loss_h: 0.491, loss_o: 3.558
Episode: 7263, episode steps: 10, avg. reward: 9.39, loss_i: 7.562, loss_h: 0.48, loss_o: 3.638
Episode: 7264, episode steps: 10, a

Episode: 7340, episode steps: 9, avg. reward: 9.42, loss_i: 7.324, loss_h: 0.772, loss_o: 3.763
Episode: 7341, episode steps: 11, avg. reward: 9.42, loss_i: 5.67, loss_h: 0.417, loss_o: 4.003
Episode: 7342, episode steps: 8, avg. reward: 9.42, loss_i: 6.928, loss_h: 0.539, loss_o: 4.024
Episode: 7343, episode steps: 9, avg. reward: 9.42, loss_i: 7.567, loss_h: 0.564, loss_o: 4.067
Episode: 7344, episode steps: 10, avg. reward: 9.42, loss_i: 7.902, loss_h: 0.662, loss_o: 3.683
Episode: 7345, episode steps: 10, avg. reward: 9.42, loss_i: 6.9, loss_h: 0.539, loss_o: 3.906
Episode: 7346, episode steps: 10, avg. reward: 9.42, loss_i: 5.947, loss_h: 0.69, loss_o: 3.298
Episode: 7347, episode steps: 8, avg. reward: 9.42, loss_i: 7.159, loss_h: 0.548, loss_o: 3.742
Episode: 7348, episode steps: 10, avg. reward: 9.42, loss_i: 7.663, loss_h: 0.707, loss_o: 4.137
Episode: 7349, episode steps: 9, avg. reward: 9.42, loss_i: 7.935, loss_h: 0.763, loss_o: 3.993
Episode: 7350, episode steps: 10, avg. 

Episode: 7426, episode steps: 11, avg. reward: 9.43, loss_i: 8.843, loss_h: 0.631, loss_o: 3.631
Episode: 7427, episode steps: 9, avg. reward: 9.43, loss_i: 8.363, loss_h: 0.782, loss_o: 3.865
Episode: 7428, episode steps: 9, avg. reward: 9.43, loss_i: 6.894, loss_h: 0.633, loss_o: 3.825
Episode: 7429, episode steps: 10, avg. reward: 9.43, loss_i: 6.395, loss_h: 0.719, loss_o: 3.843
Episode: 7430, episode steps: 9, avg. reward: 9.43, loss_i: 9.116, loss_h: 0.645, loss_o: 4.456
Episode: 7431, episode steps: 9, avg. reward: 9.43, loss_i: 5.376, loss_h: 0.849, loss_o: 4.516
Episode: 7432, episode steps: 10, avg. reward: 9.43, loss_i: 7.232, loss_h: 0.692, loss_o: 4.189
Episode: 7433, episode steps: 9, avg. reward: 9.43, loss_i: 7.303, loss_h: 0.695, loss_o: 3.794
Episode: 7434, episode steps: 10, avg. reward: 9.43, loss_i: 6.381, loss_h: 0.757, loss_o: 3.67
Episode: 7435, episode steps: 9, avg. reward: 9.43, loss_i: 7.1, loss_h: 0.626, loss_o: 4.033
Episode: 7436, episode steps: 10, avg. 

Episode: 7512, episode steps: 8, avg. reward: 9.43, loss_i: 5.818, loss_h: 0.597, loss_o: 3.691
Episode: 7513, episode steps: 9, avg. reward: 9.43, loss_i: 5.085, loss_h: 0.73, loss_o: 3.725
Episode: 7514, episode steps: 10, avg. reward: 9.43, loss_i: 5.171, loss_h: 0.617, loss_o: 3.572
Episode: 7515, episode steps: 9, avg. reward: 9.43, loss_i: 8.138, loss_h: 0.592, loss_o: 3.677
Episode: 7516, episode steps: 9, avg. reward: 9.43, loss_i: 6.295, loss_h: 0.567, loss_o: 4.272
Episode: 7517, episode steps: 9, avg. reward: 9.43, loss_i: 6.068, loss_h: 0.703, loss_o: 4.411
Episode: 7518, episode steps: 10, avg. reward: 9.43, loss_i: 7.456, loss_h: 0.637, loss_o: 3.724
Episode: 7519, episode steps: 9, avg. reward: 9.43, loss_i: 7.11, loss_h: 0.855, loss_o: 3.831
Episode: 7520, episode steps: 10, avg. reward: 9.43, loss_i: 8.811, loss_h: 0.74, loss_o: 3.226
Episode: 7521, episode steps: 10, avg. reward: 9.43, loss_i: 9.149, loss_h: 0.843, loss_o: 3.658
Episode: 7522, episode steps: 10, avg. 

Episode: 7598, episode steps: 11, avg. reward: 9.42, loss_i: 7.987, loss_h: 0.876, loss_o: 4.004
Episode: 7599, episode steps: 10, avg. reward: 9.42, loss_i: 8.09, loss_h: 0.851, loss_o: 3.902
Episode: 7600, episode steps: 9, avg. reward: 9.42, loss_i: 7.347, loss_h: 0.668, loss_o: 4.003
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
Episode: 7601, episode steps: 9, avg. reward: 9.42, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 7602, episode steps: 10, avg. reward: 9.42, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 7603, episode steps: 9, avg. reward: 9.42, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 7604, episode steps: 9, avg. reward: 9.42, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 7605, episode steps: 9, avg. reward: 9.42, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 7606, episode steps: 1

Episode: 7681, episode steps: 9, avg. reward: 9.41, loss_i: 13.833, loss_h: 0.823, loss_o: 4.193
Episode: 7682, episode steps: 9, avg. reward: 9.41, loss_i: 12.255, loss_h: 0.603, loss_o: 4.417
Episode: 7683, episode steps: 10, avg. reward: 9.41, loss_i: 12.648, loss_h: 0.816, loss_o: 4.334
Episode: 7684, episode steps: 9, avg. reward: 9.41, loss_i: 11.413, loss_h: 0.473, loss_o: 3.959
Episode: 7685, episode steps: 9, avg. reward: 9.41, loss_i: 11.992, loss_h: 0.679, loss_o: 3.843
Episode: 7686, episode steps: 10, avg. reward: 9.41, loss_i: 11.56, loss_h: 0.785, loss_o: 4.13
Episode: 7687, episode steps: 10, avg. reward: 9.41, loss_i: 10.295, loss_h: 0.64, loss_o: 4.2
Episode: 7688, episode steps: 10, avg. reward: 9.41, loss_i: 11.865, loss_h: 0.428, loss_o: 4.039
Episode: 7689, episode steps: 10, avg. reward: 9.41, loss_i: 11.958, loss_h: 0.909, loss_o: 3.563
Episode: 7690, episode steps: 10, avg. reward: 9.41, loss_i: 12.289, loss_h: 0.915, loss_o: 4.022
Episode: 7691, episode steps:

Episode: 7766, episode steps: 9, avg. reward: 9.42, loss_i: 10.11, loss_h: 1.17, loss_o: 4.589
Episode: 7767, episode steps: 9, avg. reward: 9.42, loss_i: 11.634, loss_h: 0.727, loss_o: 3.974
Episode: 7768, episode steps: 9, avg. reward: 9.42, loss_i: 8.3, loss_h: 0.756, loss_o: 4.629
Episode: 7769, episode steps: 8, avg. reward: 9.41, loss_i: 11.516, loss_h: 0.814, loss_o: 4.528
Episode: 7770, episode steps: 8, avg. reward: 9.41, loss_i: 11.54, loss_h: 0.745, loss_o: 3.686
Episode: 7771, episode steps: 9, avg. reward: 9.41, loss_i: 10.178, loss_h: 0.869, loss_o: 3.546
Episode: 7772, episode steps: 8, avg. reward: 9.41, loss_i: 10.649, loss_h: 0.782, loss_o: 3.691
Episode: 7773, episode steps: 10, avg. reward: 9.41, loss_i: 11.106, loss_h: 0.744, loss_o: 3.944
Episode: 7774, episode steps: 10, avg. reward: 9.41, loss_i: 11.485, loss_h: 0.883, loss_o: 3.423
Episode: 7775, episode steps: 10, avg. reward: 9.41, loss_i: 8.637, loss_h: 0.885, loss_o: 3.675
Episode: 7776, episode steps: 10, 

Episode: 7851, episode steps: 9, avg. reward: 9.42, loss_i: 9.563, loss_h: 0.845, loss_o: 5.044
Episode: 7852, episode steps: 10, avg. reward: 9.42, loss_i: 10.659, loss_h: 0.867, loss_o: 4.178
Episode: 7853, episode steps: 10, avg. reward: 9.42, loss_i: 13.846, loss_h: 0.739, loss_o: 3.614
Episode: 7854, episode steps: 10, avg. reward: 9.42, loss_i: 10.555, loss_h: 0.678, loss_o: 4.935
Episode: 7855, episode steps: 10, avg. reward: 9.42, loss_i: 11.76, loss_h: 1.151, loss_o: 3.76
Episode: 7856, episode steps: 9, avg. reward: 9.42, loss_i: 9.786, loss_h: 0.854, loss_o: 4.192
Episode: 7857, episode steps: 10, avg. reward: 9.42, loss_i: 10.507, loss_h: 0.858, loss_o: 3.996
Episode: 7858, episode steps: 10, avg. reward: 9.42, loss_i: 10.351, loss_h: 0.963, loss_o: 3.279
Episode: 7859, episode steps: 10, avg. reward: 9.42, loss_i: 9.095, loss_h: 0.856, loss_o: 4.268
Episode: 7860, episode steps: 10, avg. reward: 9.42, loss_i: 10.899, loss_h: 1.088, loss_o: 4.776
Episode: 7861, episode step

Episode: 7936, episode steps: 10, avg. reward: 9.42, loss_i: 10.737, loss_h: 0.825, loss_o: 4.342
Episode: 7937, episode steps: 10, avg. reward: 9.42, loss_i: 10.636, loss_h: 1.143, loss_o: 4.454
Episode: 7938, episode steps: 10, avg. reward: 9.42, loss_i: 8.718, loss_h: 0.958, loss_o: 3.96
Episode: 7939, episode steps: 10, avg. reward: 9.42, loss_i: 10.038, loss_h: 0.975, loss_o: 4.857
Episode: 7940, episode steps: 8, avg. reward: 9.42, loss_i: 8.603, loss_h: 0.873, loss_o: 4.383
Episode: 7941, episode steps: 10, avg. reward: 9.42, loss_i: 9.924, loss_h: 1.247, loss_o: 3.946
Episode: 7942, episode steps: 11, avg. reward: 9.42, loss_i: 10.891, loss_h: 0.832, loss_o: 4.754
Episode: 7943, episode steps: 9, avg. reward: 9.42, loss_i: 11.138, loss_h: 0.976, loss_o: 4.346
Episode: 7944, episode steps: 8, avg. reward: 9.42, loss_i: 12.065, loss_h: 0.954, loss_o: 4.346
Episode: 7945, episode steps: 8, avg. reward: 9.41, loss_i: 10.923, loss_h: 0.856, loss_o: 4.601
Episode: 7946, episode steps

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 8000, episode steps: 11, avg. reward: 9.41, loss_i: 9.729, loss_h: 0.701, loss_o: 4.032
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 9.7862088565122, 'critic_2_loss': 9.671559057113798, 'policy_loss': -13.479743036356838, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 0.7025978454659062, 'critic_2_loss': 0.698777002072142, 'policy_loss': -2.493882263860395, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 4.0309059836647725, 'critic_2_loss': 4.033323125405745, 'policy_loss': -4.879408923062411, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.

100%|██████████| 1000/1000 [01:55<00:00,  8.63it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.34, Max. Reward: 11.0
--------------------------------------------------------------------------------
Episode: 8001, episode steps: 11, avg. reward: 11.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 8002, episode steps: 9, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 8003, episode steps: 10, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 8004, episode steps: 9, avg. reward: 9.75, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 8005, episode steps: 9, avg. reward: 9.6, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 8006, episode steps: 9, avg. reward: 9.5, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 8007, episode steps: 10, avg. reward: 9.57, loss_i: 135.807, loss_h: 0.447, loss_o: 20.5
Episode: 8008, episode steps: 9, avg. reward: 9.5, loss_i: 7.366, loss_h: 0.287, loss_o: 5.134
Episode: 8009, episode steps: 9, avg. reward: 9

Episode: 8084, episode steps: 8, avg. reward: 9.56, loss_i: 9.916, loss_h: 1.777, loss_o: 3.566
Episode: 8085, episode steps: 9, avg. reward: 9.55, loss_i: 10.896, loss_h: 1.581, loss_o: 4.801
Episode: 8086, episode steps: 10, avg. reward: 9.56, loss_i: 9.764, loss_h: 1.894, loss_o: 4.893
Episode: 8087, episode steps: 9, avg. reward: 9.55, loss_i: 9.79, loss_h: 1.851, loss_o: 4.045
Episode: 8088, episode steps: 10, avg. reward: 9.56, loss_i: 10.147, loss_h: 1.899, loss_o: 4.324
Episode: 8089, episode steps: 10, avg. reward: 9.56, loss_i: 11.576, loss_h: 1.625, loss_o: 4.874
Episode: 8090, episode steps: 10, avg. reward: 9.57, loss_i: 9.814, loss_h: 1.761, loss_o: 4.821
Episode: 8091, episode steps: 9, avg. reward: 9.56, loss_i: 9.776, loss_h: 1.588, loss_o: 3.808
Episode: 8092, episode steps: 9, avg. reward: 9.55, loss_i: 10.77, loss_h: 2.01, loss_o: 5.203
Episode: 8093, episode steps: 9, avg. reward: 9.55, loss_i: 8.982, loss_h: 1.919, loss_o: 4.966
Episode: 8094, episode steps: 9, av

Episode: 8169, episode steps: 10, avg. reward: 9.46, loss_i: 10.812, loss_h: 1.765, loss_o: 5.114
Episode: 8170, episode steps: 9, avg. reward: 9.46, loss_i: 10.821, loss_h: 2.529, loss_o: 4.856
Episode: 8171, episode steps: 10, avg. reward: 9.46, loss_i: 13.361, loss_h: 1.973, loss_o: 4.802
Episode: 8172, episode steps: 9, avg. reward: 9.46, loss_i: 11.565, loss_h: 2.66, loss_o: 4.784
Episode: 8173, episode steps: 10, avg. reward: 9.46, loss_i: 9.293, loss_h: 2.241, loss_o: 5.203
Episode: 8174, episode steps: 9, avg. reward: 9.46, loss_i: 12.035, loss_h: 2.102, loss_o: 4.319
Episode: 8175, episode steps: 9, avg. reward: 9.46, loss_i: 12.803, loss_h: 1.985, loss_o: 4.434
Episode: 8176, episode steps: 9, avg. reward: 9.45, loss_i: 11.149, loss_h: 2.492, loss_o: 5.022
Episode: 8177, episode steps: 10, avg. reward: 9.46, loss_i: 11.781, loss_h: 2.195, loss_o: 4.822
Episode: 8178, episode steps: 10, avg. reward: 9.46, loss_i: 10.809, loss_h: 2.302, loss_o: 4.992
Episode: 8179, episode step

Episode: 8254, episode steps: 10, avg. reward: 9.38, loss_i: 13.323, loss_h: 3.158, loss_o: 5.218
Episode: 8255, episode steps: 9, avg. reward: 9.38, loss_i: 14.042, loss_h: 2.342, loss_o: 5.202
Episode: 8256, episode steps: 9, avg. reward: 9.38, loss_i: 11.383, loss_h: 2.357, loss_o: 4.723
Episode: 8257, episode steps: 9, avg. reward: 9.37, loss_i: 9.823, loss_h: 2.834, loss_o: 5.118
Episode: 8258, episode steps: 10, avg. reward: 9.38, loss_i: 10.687, loss_h: 2.09, loss_o: 4.678
Episode: 8259, episode steps: 8, avg. reward: 9.37, loss_i: 12.133, loss_h: 2.243, loss_o: 4.942
Episode: 8260, episode steps: 9, avg. reward: 9.37, loss_i: 13.237, loss_h: 3.214, loss_o: 5.644
Episode: 8261, episode steps: 9, avg. reward: 9.37, loss_i: 12.198, loss_h: 3.43, loss_o: 4.779
Episode: 8262, episode steps: 8, avg. reward: 9.36, loss_i: 13.744, loss_h: 2.86, loss_o: 4.117
Episode: 8263, episode steps: 9, avg. reward: 9.36, loss_i: 12.902, loss_h: 2.335, loss_o: 4.873
Episode: 8264, episode steps: 10

Episode: 8339, episode steps: 9, avg. reward: 9.39, loss_i: 10.588, loss_h: 2.74, loss_o: 4.784
Episode: 8340, episode steps: 10, avg. reward: 9.39, loss_i: 11.207, loss_h: 2.219, loss_o: 4.115
Episode: 8341, episode steps: 9, avg. reward: 9.39, loss_i: 11.339, loss_h: 2.626, loss_o: 4.217
Episode: 8342, episode steps: 10, avg. reward: 9.39, loss_i: 12.334, loss_h: 2.124, loss_o: 4.407
Episode: 8343, episode steps: 10, avg. reward: 9.39, loss_i: 13.586, loss_h: 2.515, loss_o: 6.082
Episode: 8344, episode steps: 10, avg. reward: 9.39, loss_i: 12.905, loss_h: 2.711, loss_o: 5.04
Episode: 8345, episode steps: 9, avg. reward: 9.39, loss_i: 11.443, loss_h: 2.379, loss_o: 4.468
Episode: 8346, episode steps: 8, avg. reward: 9.39, loss_i: 11.611, loss_h: 2.258, loss_o: 4.927
Episode: 8347, episode steps: 8, avg. reward: 9.38, loss_i: 13.819, loss_h: 3.265, loss_o: 4.577
Episode: 8348, episode steps: 10, avg. reward: 9.39, loss_i: 11.619, loss_h: 2.881, loss_o: 5.077
Episode: 8349, episode step

Episode: 8422, episode steps: 8, avg. reward: 9.38, loss_i: 10.914, loss_h: 6.315, loss_o: 3.008
Episode: 8423, episode steps: 10, avg. reward: 9.38, loss_i: 12.151, loss_h: 6.319, loss_o: 2.845
Episode: 8424, episode steps: 8, avg. reward: 9.38, loss_i: 9.561, loss_h: 6.028, loss_o: 3.112
Episode: 8425, episode steps: 10, avg. reward: 9.38, loss_i: 12.672, loss_h: 5.727, loss_o: 3.103
Episode: 8426, episode steps: 9, avg. reward: 9.38, loss_i: 13.154, loss_h: 6.395, loss_o: 3.012
Episode: 8427, episode steps: 10, avg. reward: 9.38, loss_i: 12.309, loss_h: 6.095, loss_o: 2.862
Episode: 8428, episode steps: 10, avg. reward: 9.38, loss_i: 11.921, loss_h: 6.7, loss_o: 2.813
Episode: 8429, episode steps: 9, avg. reward: 9.38, loss_i: 12.626, loss_h: 5.551, loss_o: 2.709
Episode: 8430, episode steps: 10, avg. reward: 9.38, loss_i: 10.638, loss_h: 6.359, loss_o: 2.897
Episode: 8431, episode steps: 10, avg. reward: 9.38, loss_i: 11.767, loss_h: 5.956, loss_o: 2.704
Episode: 8432, episode step

Episode: 8507, episode steps: 10, avg. reward: 9.38, loss_i: 10.972, loss_h: 7.918, loss_o: 2.721
Episode: 8508, episode steps: 10, avg. reward: 9.38, loss_i: 11.392, loss_h: 7.437, loss_o: 2.605
Episode: 8509, episode steps: 10, avg. reward: 9.38, loss_i: 9.805, loss_h: 8.267, loss_o: 2.926
Episode: 8510, episode steps: 10, avg. reward: 9.38, loss_i: 12.894, loss_h: 7.727, loss_o: 2.631
Episode: 8511, episode steps: 10, avg. reward: 9.39, loss_i: 10.547, loss_h: 9.023, loss_o: 2.725
Episode: 8512, episode steps: 10, avg. reward: 9.39, loss_i: 12.968, loss_h: 8.373, loss_o: 2.701
Episode: 8513, episode steps: 10, avg. reward: 9.39, loss_i: 12.155, loss_h: 9.223, loss_o: 2.553
Episode: 8514, episode steps: 10, avg. reward: 9.39, loss_i: 11.132, loss_h: 8.332, loss_o: 2.768
Episode: 8515, episode steps: 10, avg. reward: 9.39, loss_i: 10.561, loss_h: 9.963, loss_o: 2.506
Episode: 8516, episode steps: 10, avg. reward: 9.39, loss_i: 10.545, loss_h: 9.282, loss_o: 2.769
Episode: 8517, episod

Episode: 8592, episode steps: 9, avg. reward: 9.37, loss_i: 10.747, loss_h: 12.117, loss_o: 2.839
Episode: 8593, episode steps: 10, avg. reward: 9.37, loss_i: 11.583, loss_h: 11.604, loss_o: 3.092
Episode: 8594, episode steps: 9, avg. reward: 9.37, loss_i: 13.03, loss_h: 10.373, loss_o: 3.083
Episode: 8595, episode steps: 9, avg. reward: 9.37, loss_i: 14.543, loss_h: 12.138, loss_o: 2.98
Episode: 8596, episode steps: 10, avg. reward: 9.37, loss_i: 13.61, loss_h: 10.987, loss_o: 3.015
Episode: 8597, episode steps: 10, avg. reward: 9.37, loss_i: 11.451, loss_h: 11.78, loss_o: 2.909
Episode: 8598, episode steps: 9, avg. reward: 9.37, loss_i: 10.437, loss_h: 10.713, loss_o: 2.931
Episode: 8599, episode steps: 9, avg. reward: 9.37, loss_i: 12.065, loss_h: 11.444, loss_o: 2.807
Episode: 8600, episode steps: 10, avg. reward: 9.37, loss_i: 11.203, loss_h: 12.117, loss_o: 2.951
Episode: 8601, episode steps: 9, avg. reward: 9.37, loss_i: 14.349, loss_h: 10.966, loss_o: 2.932
Episode: 8602, episo

Episode: 8676, episode steps: 10, avg. reward: 9.37, loss_i: 11.999, loss_h: 11.348, loss_o: 2.932
Episode: 8677, episode steps: 10, avg. reward: 9.37, loss_i: 12.501, loss_h: 10.567, loss_o: 3.253
Episode: 8678, episode steps: 10, avg. reward: 9.37, loss_i: 13.463, loss_h: 12.38, loss_o: 3.007
Episode: 8679, episode steps: 9, avg. reward: 9.37, loss_i: 13.462, loss_h: 9.485, loss_o: 3.136
Episode: 8680, episode steps: 9, avg. reward: 9.37, loss_i: 10.904, loss_h: 9.789, loss_o: 3.11
Episode: 8681, episode steps: 10, avg. reward: 9.37, loss_i: 10.048, loss_h: 9.323, loss_o: 3.122
Episode: 8682, episode steps: 9, avg. reward: 9.37, loss_i: 12.576, loss_h: 11.229, loss_o: 3.163
Episode: 8683, episode steps: 9, avg. reward: 9.37, loss_i: 14.923, loss_h: 12.47, loss_o: 3.139
Episode: 8684, episode steps: 9, avg. reward: 9.37, loss_i: 14.186, loss_h: 10.014, loss_o: 3.036
Episode: 8685, episode steps: 10, avg. reward: 9.37, loss_i: 11.034, loss_h: 9.517, loss_o: 3.197
Episode: 8686, episode

Episode: 8760, episode steps: 10, avg. reward: 9.39, loss_i: 11.721, loss_h: 11.248, loss_o: 3.135
Episode: 8761, episode steps: 10, avg. reward: 9.39, loss_i: 12.91, loss_h: 9.291, loss_o: 3.248
Episode: 8762, episode steps: 10, avg. reward: 9.39, loss_i: 13.063, loss_h: 11.973, loss_o: 3.148
Episode: 8763, episode steps: 10, avg. reward: 9.39, loss_i: 12.174, loss_h: 10.924, loss_o: 3.183
Episode: 8764, episode steps: 9, avg. reward: 9.39, loss_i: 11.391, loss_h: 10.579, loss_o: 3.169
Episode: 8765, episode steps: 11, avg. reward: 9.39, loss_i: 12.205, loss_h: 11.903, loss_o: 3.176
Episode: 8766, episode steps: 9, avg. reward: 9.39, loss_i: 12.401, loss_h: 12.825, loss_o: 3.234
Episode: 8767, episode steps: 10, avg. reward: 9.39, loss_i: 9.834, loss_h: 10.382, loss_o: 3.136
Episode: 8768, episode steps: 9, avg. reward: 9.39, loss_i: 13.725, loss_h: 12.771, loss_o: 3.007
Episode: 8769, episode steps: 10, avg. reward: 9.39, loss_i: 11.785, loss_h: 8.029, loss_o: 3.18
Episode: 8770, epi

Episode: 8843, episode steps: 8, avg. reward: 9.38, loss_i: 11.725, loss_h: 8.572, loss_o: 1.214
Episode: 8844, episode steps: 10, avg. reward: 9.38, loss_i: 10.482, loss_h: 8.384, loss_o: 1.048
Episode: 8845, episode steps: 8, avg. reward: 9.38, loss_i: 9.679, loss_h: 7.03, loss_o: 1.308
Episode: 8846, episode steps: 9, avg. reward: 9.38, loss_i: 11.959, loss_h: 7.783, loss_o: 1.11
Episode: 8847, episode steps: 10, avg. reward: 9.38, loss_i: 10.959, loss_h: 8.154, loss_o: 1.245
Episode: 8848, episode steps: 9, avg. reward: 9.38, loss_i: 9.742, loss_h: 7.822, loss_o: 1.064
Episode: 8849, episode steps: 9, avg. reward: 9.38, loss_i: 12.474, loss_h: 7.379, loss_o: 1.136
Episode: 8850, episode steps: 10, avg. reward: 9.38, loss_i: 11.107, loss_h: 8.069, loss_o: 1.145
Episode: 8851, episode steps: 9, avg. reward: 9.38, loss_i: 10.657, loss_h: 7.61, loss_o: 1.198
Episode: 8852, episode steps: 10, avg. reward: 9.38, loss_i: 10.076, loss_h: 8.303, loss_o: 1.233
Episode: 8853, episode steps: 9

Episode: 8928, episode steps: 10, avg. reward: 9.38, loss_i: 9.858, loss_h: 6.965, loss_o: 1.204
Episode: 8929, episode steps: 9, avg. reward: 9.38, loss_i: 11.418, loss_h: 7.235, loss_o: 1.187
Episode: 8930, episode steps: 9, avg. reward: 9.38, loss_i: 10.364, loss_h: 7.616, loss_o: 1.304
Episode: 8931, episode steps: 10, avg. reward: 9.38, loss_i: 13.5, loss_h: 6.359, loss_o: 1.192
Episode: 8932, episode steps: 9, avg. reward: 9.38, loss_i: 11.472, loss_h: 6.381, loss_o: 1.091
Episode: 8933, episode steps: 9, avg. reward: 9.38, loss_i: 10.035, loss_h: 7.577, loss_o: 1.177
Episode: 8934, episode steps: 10, avg. reward: 9.38, loss_i: 9.848, loss_h: 6.383, loss_o: 1.259
Episode: 8935, episode steps: 10, avg. reward: 9.38, loss_i: 9.518, loss_h: 7.712, loss_o: 1.199
Episode: 8936, episode steps: 10, avg. reward: 9.38, loss_i: 11.511, loss_h: 6.197, loss_o: 1.305
Episode: 8937, episode steps: 8, avg. reward: 9.38, loss_i: 10.547, loss_h: 6.042, loss_o: 1.262
Episode: 8938, episode steps: 

  0%|          | 1/1000 [00:00<01:53,  8.81it/s]

Episode: 9000, episode steps: 9, avg. reward: 9.38, loss_i: 11.203, loss_h: 7.403, loss_o: 1.021
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 11.320019353595045, 'critic_2_loss': 11.085979537417492, 'policy_loss': -13.153552558686998, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 7.416205788281457, 'critic_2_loss': 7.389267635425865, 'policy_loss': -3.9976354849104787, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 1.0208346115218268, 'critic_2_loss': 1.0201789140701294, 'policy_loss': -2.5741979016198053, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [1.0, 0.13, 0.59, 0.24, 0.0, 0.24, 1.0, 0.37]
o: [1.0]
UPDATES
i: [1799, 1799, 1799, 1799]
h: [1799, 0, 1381, 188, 0, 0, 1799, 390]
o: [1799]
MEMORY
i: [1864, 1864, 1864, 1864]
h: [1864, 18, 1207, 113, 0, 26

100%|██████████| 1000/1000 [01:53<00:00,  8.80it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.37, Max. Reward: 11.0
--------------------------------------------------------------------------------
Episode: 9001, episode steps: 8, avg. reward: 8.0, loss_i: 11.129, loss_h: 6.276, loss_o: 1.292
Episode: 9002, episode steps: 8, avg. reward: 8.0, loss_i: 9.687, loss_h: 7.051, loss_o: 1.164
Episode: 9003, episode steps: 9, avg. reward: 8.33, loss_i: 12.513, loss_h: 6.58, loss_o: 1.194
Episode: 9004, episode steps: 10, avg. reward: 8.75, loss_i: 12.141, loss_h: 6.546, loss_o: 1.225
Episode: 9005, episode steps: 9, avg. reward: 8.8, loss_i: 9.902, loss_h: 7.213, loss_o: 1.156
Episode: 9006, episode steps: 8, avg. reward: 8.67, loss_i: 10.746, loss_h: 5.058, loss_o: 1.184
Episode: 9007, episode steps: 10, avg. reward: 8.86, loss_i: 11.025, loss_h: 6.184, loss_o: 1.243
Episode: 9008, episode steps: 9, avg. reward: 8.88, loss_i: 11.05, loss_h: 5.68, loss_o: 1.224
Episode: 9

Episode: 9084, episode steps: 10, avg. reward: 9.25, loss_i: 9.732, loss_h: 6.573, loss_o: 1.137
Episode: 9085, episode steps: 8, avg. reward: 9.24, loss_i: 10.709, loss_h: 5.618, loss_o: 1.072
Episode: 9086, episode steps: 9, avg. reward: 9.23, loss_i: 9.432, loss_h: 5.443, loss_o: 1.153
Episode: 9087, episode steps: 8, avg. reward: 9.22, loss_i: 11.288, loss_h: 6.142, loss_o: 1.185
Episode: 9088, episode steps: 10, avg. reward: 9.23, loss_i: 10.402, loss_h: 5.96, loss_o: 1.256
Episode: 9089, episode steps: 10, avg. reward: 9.24, loss_i: 11.085, loss_h: 6.33, loss_o: 1.288
Episode: 9090, episode steps: 10, avg. reward: 9.24, loss_i: 11.369, loss_h: 7.414, loss_o: 1.199
Episode: 9091, episode steps: 9, avg. reward: 9.24, loss_i: 9.196, loss_h: 6.594, loss_o: 1.187
Episode: 9092, episode steps: 9, avg. reward: 9.24, loss_i: 9.409, loss_h: 6.126, loss_o: 1.219
Episode: 9093, episode steps: 10, avg. reward: 9.25, loss_i: 10.602, loss_h: 6.999, loss_o: 1.254
Episode: 9094, episode steps: 1

Episode: 9169, episode steps: 10, avg. reward: 9.31, loss_i: 10.978, loss_h: 7.413, loss_o: 1.073
Episode: 9170, episode steps: 8, avg. reward: 9.31, loss_i: 9.366, loss_h: 6.765, loss_o: 1.151
Episode: 9171, episode steps: 10, avg. reward: 9.31, loss_i: 9.25, loss_h: 5.772, loss_o: 1.285
Episode: 9172, episode steps: 10, avg. reward: 9.31, loss_i: 11.502, loss_h: 6.637, loss_o: 1.073
Episode: 9173, episode steps: 10, avg. reward: 9.32, loss_i: 11.365, loss_h: 6.62, loss_o: 1.113
Episode: 9174, episode steps: 9, avg. reward: 9.32, loss_i: 10.516, loss_h: 6.421, loss_o: 1.238
Episode: 9175, episode steps: 10, avg. reward: 9.32, loss_i: 10.877, loss_h: 7.211, loss_o: 1.107
Episode: 9176, episode steps: 10, avg. reward: 9.32, loss_i: 9.923, loss_h: 6.775, loss_o: 1.269
Episode: 9177, episode steps: 10, avg. reward: 9.33, loss_i: 10.396, loss_h: 6.847, loss_o: 1.166
Episode: 9178, episode steps: 10, avg. reward: 9.33, loss_i: 11.153, loss_h: 7.734, loss_o: 1.201
Episode: 9179, episode step

Episode: 9252, episode steps: 9, avg. reward: 9.32, loss_i: 8.677, loss_h: 5.378, loss_o: 1.264
Episode: 9253, episode steps: 8, avg. reward: 9.31, loss_i: 10.052, loss_h: 4.492, loss_o: 1.407
Episode: 9254, episode steps: 10, avg. reward: 9.31, loss_i: 8.72, loss_h: 4.953, loss_o: 1.358
Episode: 9255, episode steps: 10, avg. reward: 9.32, loss_i: 9.306, loss_h: 5.207, loss_o: 1.362
Episode: 9256, episode steps: 9, avg. reward: 9.32, loss_i: 8.276, loss_h: 5.289, loss_o: 1.197
Episode: 9257, episode steps: 9, avg. reward: 9.32, loss_i: 8.325, loss_h: 5.09, loss_o: 1.241
Episode: 9258, episode steps: 10, avg. reward: 9.32, loss_i: 9.853, loss_h: 5.042, loss_o: 1.119
Episode: 9259, episode steps: 10, avg. reward: 9.32, loss_i: 9.547, loss_h: 5.825, loss_o: 1.175
Episode: 9260, episode steps: 8, avg. reward: 9.32, loss_i: 9.519, loss_h: 5.273, loss_o: 1.403
Episode: 9261, episode steps: 9, avg. reward: 9.31, loss_i: 9.164, loss_h: 5.936, loss_o: 1.191
Episode: 9262, episode steps: 9, avg.

Episode: 9338, episode steps: 9, avg. reward: 9.3, loss_i: 11.413, loss_h: 4.372, loss_o: 1.175
Episode: 9339, episode steps: 10, avg. reward: 9.3, loss_i: 10.826, loss_h: 4.932, loss_o: 1.376
Episode: 9340, episode steps: 9, avg. reward: 9.3, loss_i: 10.48, loss_h: 4.721, loss_o: 1.257
Episode: 9341, episode steps: 10, avg. reward: 9.3, loss_i: 12.616, loss_h: 5.429, loss_o: 1.362
Episode: 9342, episode steps: 9, avg. reward: 9.3, loss_i: 10.086, loss_h: 6.277, loss_o: 1.221
Episode: 9343, episode steps: 10, avg. reward: 9.3, loss_i: 9.91, loss_h: 4.923, loss_o: 1.217
Episode: 9344, episode steps: 10, avg. reward: 9.3, loss_i: 11.817, loss_h: 5.79, loss_o: 1.276
Episode: 9345, episode steps: 8, avg. reward: 9.3, loss_i: 10.36, loss_h: 4.235, loss_o: 1.329
Episode: 9346, episode steps: 10, avg. reward: 9.3, loss_i: 10.074, loss_h: 5.241, loss_o: 1.38
Episode: 9347, episode steps: 9, avg. reward: 9.3, loss_i: 9.894, loss_h: 4.455, loss_o: 1.388
Episode: 9348, episode steps: 9, avg. rewa

Episode: 9424, episode steps: 9, avg. reward: 9.3, loss_i: 11.263, loss_h: 5.037, loss_o: 1.425
Episode: 9425, episode steps: 8, avg. reward: 9.3, loss_i: 10.452, loss_h: 5.934, loss_o: 1.203
Episode: 9426, episode steps: 10, avg. reward: 9.3, loss_i: 9.59, loss_h: 4.583, loss_o: 1.283
Episode: 9427, episode steps: 11, avg. reward: 9.31, loss_i: 12.222, loss_h: 5.295, loss_o: 1.432
Episode: 9428, episode steps: 10, avg. reward: 9.31, loss_i: 10.529, loss_h: 5.782, loss_o: 1.461
Episode: 9429, episode steps: 9, avg. reward: 9.31, loss_i: 9.786, loss_h: 5.014, loss_o: 1.423
Episode: 9430, episode steps: 11, avg. reward: 9.31, loss_i: 10.542, loss_h: 5.851, loss_o: 1.384
Episode: 9431, episode steps: 9, avg. reward: 9.31, loss_i: 10.393, loss_h: 5.22, loss_o: 1.336
Episode: 9432, episode steps: 9, avg. reward: 9.31, loss_i: 11.367, loss_h: 5.459, loss_o: 1.38
Episode: 9433, episode steps: 10, avg. reward: 9.31, loss_i: 10.697, loss_h: 4.356, loss_o: 1.026
Episode: 9434, episode steps: 9, 

Episode: 9510, episode steps: 8, avg. reward: 9.3, loss_i: 11.369, loss_h: 5.356, loss_o: 1.223
Episode: 9511, episode steps: 9, avg. reward: 9.3, loss_i: 13.86, loss_h: 6.018, loss_o: 1.282
Episode: 9512, episode steps: 10, avg. reward: 9.3, loss_i: 13.513, loss_h: 5.029, loss_o: 1.196
Episode: 9513, episode steps: 9, avg. reward: 9.3, loss_i: 12.849, loss_h: 5.203, loss_o: 1.378
Episode: 9514, episode steps: 10, avg. reward: 9.31, loss_i: 10.553, loss_h: 4.845, loss_o: 1.412
Episode: 9515, episode steps: 9, avg. reward: 9.3, loss_i: 12.818, loss_h: 4.386, loss_o: 1.255
Episode: 9516, episode steps: 10, avg. reward: 9.31, loss_i: 11.169, loss_h: 4.828, loss_o: 1.343
Episode: 9517, episode steps: 10, avg. reward: 9.31, loss_i: 11.041, loss_h: 4.802, loss_o: 1.319
Episode: 9518, episode steps: 10, avg. reward: 9.31, loss_i: 11.545, loss_h: 5.398, loss_o: 1.309
Episode: 9519, episode steps: 9, avg. reward: 9.31, loss_i: 10.994, loss_h: 4.871, loss_o: 0.936
Episode: 9520, episode steps: 8

Episode: 9595, episode steps: 9, avg. reward: 9.31, loss_i: 9.894, loss_h: 4.761, loss_o: 1.378
Episode: 9596, episode steps: 8, avg. reward: 9.31, loss_i: 12.548, loss_h: 5.567, loss_o: 1.077
Episode: 9597, episode steps: 9, avg. reward: 9.31, loss_i: 11.846, loss_h: 5.45, loss_o: 1.26
Episode: 9598, episode steps: 10, avg. reward: 9.31, loss_i: 9.8, loss_h: 4.384, loss_o: 1.417
Episode: 9599, episode steps: 10, avg. reward: 9.31, loss_i: 10.781, loss_h: 4.912, loss_o: 1.265
Episode: 9600, episode steps: 8, avg. reward: 9.31, loss_i: 10.662, loss_h: 4.92, loss_o: 1.427
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
Episode: 9601, episode steps: 9, avg. reward: 9.31, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 9602, episode steps: 8, avg. reward: 9.3, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 9603, 

Episode: 9679, episode steps: 10, avg. reward: 9.33, loss_i: 10.01, loss_h: 4.323, loss_o: 1.703
Episode: 9680, episode steps: 10, avg. reward: 9.33, loss_i: 10.935, loss_h: 4.275, loss_o: 1.429
Episode: 9681, episode steps: 9, avg. reward: 9.33, loss_i: 10.502, loss_h: 4.427, loss_o: 1.387
Episode: 9682, episode steps: 9, avg. reward: 9.33, loss_i: 9.963, loss_h: 4.488, loss_o: 1.41
Episode: 9683, episode steps: 8, avg. reward: 9.33, loss_i: 9.657, loss_h: 4.358, loss_o: 1.418
Episode: 9684, episode steps: 9, avg. reward: 9.33, loss_i: 10.441, loss_h: 4.623, loss_o: 1.321
Episode: 9685, episode steps: 10, avg. reward: 9.33, loss_i: 11.071, loss_h: 5.271, loss_o: 1.422
Episode: 9686, episode steps: 10, avg. reward: 9.33, loss_i: 9.62, loss_h: 4.076, loss_o: 1.442
Episode: 9687, episode steps: 10, avg. reward: 9.33, loss_i: 10.589, loss_h: 5.253, loss_o: 1.78
Episode: 9688, episode steps: 9, avg. reward: 9.33, loss_i: 11.963, loss_h: 4.561, loss_o: 1.603
Episode: 9689, episode steps: 10

Episode: 9764, episode steps: 9, avg. reward: 9.33, loss_i: 10.813, loss_h: 5.331, loss_o: 1.779
Episode: 9765, episode steps: 9, avg. reward: 9.33, loss_i: 9.729, loss_h: 4.757, loss_o: 1.497
Episode: 9766, episode steps: 10, avg. reward: 9.33, loss_i: 9.103, loss_h: 4.359, loss_o: 1.698
Episode: 9767, episode steps: 8, avg. reward: 9.33, loss_i: 10.865, loss_h: 4.588, loss_o: 1.349
Episode: 9768, episode steps: 10, avg. reward: 9.33, loss_i: 11.335, loss_h: 4.477, loss_o: 1.662
Episode: 9769, episode steps: 10, avg. reward: 9.33, loss_i: 9.706, loss_h: 5.362, loss_o: 1.662
Episode: 9770, episode steps: 9, avg. reward: 9.33, loss_i: 9.943, loss_h: 4.362, loss_o: 1.38
Episode: 9771, episode steps: 10, avg. reward: 9.33, loss_i: 9.282, loss_h: 4.842, loss_o: 1.79
Episode: 9772, episode steps: 9, avg. reward: 9.33, loss_i: 10.672, loss_h: 4.193, loss_o: 1.466
Episode: 9773, episode steps: 10, avg. reward: 9.33, loss_i: 11.035, loss_h: 4.528, loss_o: 1.492
Episode: 9774, episode steps: 9,

Episode: 9849, episode steps: 9, avg. reward: 9.33, loss_i: 11.007, loss_h: 5.009, loss_o: 1.419
Episode: 9850, episode steps: 9, avg. reward: 9.33, loss_i: 10.72, loss_h: 5.282, loss_o: 1.47
Episode: 9851, episode steps: 8, avg. reward: 9.33, loss_i: 9.932, loss_h: 5.259, loss_o: 1.417
Episode: 9852, episode steps: 9, avg. reward: 9.33, loss_i: 8.435, loss_h: 3.625, loss_o: 1.459
Episode: 9853, episode steps: 10, avg. reward: 9.33, loss_i: 8.897, loss_h: 4.474, loss_o: 1.17
Episode: 9854, episode steps: 9, avg. reward: 9.33, loss_i: 11.422, loss_h: 4.923, loss_o: 1.502
Episode: 9855, episode steps: 9, avg. reward: 9.33, loss_i: 9.965, loss_h: 4.841, loss_o: 1.61
Episode: 9856, episode steps: 10, avg. reward: 9.33, loss_i: 11.364, loss_h: 4.978, loss_o: 1.428
Episode: 9857, episode steps: 9, avg. reward: 9.33, loss_i: 9.585, loss_h: 4.352, loss_o: 1.993
Episode: 9858, episode steps: 8, avg. reward: 9.33, loss_i: 9.555, loss_h: 5.457, loss_o: 1.542
Episode: 9859, episode steps: 10, avg.

Episode: 9934, episode steps: 9, avg. reward: 9.32, loss_i: 10.859, loss_h: 4.986, loss_o: 1.617
Episode: 9935, episode steps: 9, avg. reward: 9.32, loss_i: 10.916, loss_h: 4.828, loss_o: 1.254
Episode: 9936, episode steps: 10, avg. reward: 9.32, loss_i: 9.958, loss_h: 5.394, loss_o: 1.457
Episode: 9937, episode steps: 9, avg. reward: 9.32, loss_i: 9.826, loss_h: 5.678, loss_o: 1.656
Episode: 9938, episode steps: 9, avg. reward: 9.32, loss_i: 11.61, loss_h: 5.711, loss_o: 1.425
Episode: 9939, episode steps: 10, avg. reward: 9.32, loss_i: 10.6, loss_h: 4.648, loss_o: 1.733
Episode: 9940, episode steps: 8, avg. reward: 9.32, loss_i: 11.617, loss_h: 5.497, loss_o: 1.642
Episode: 9941, episode steps: 9, avg. reward: 9.32, loss_i: 10.739, loss_h: 4.2, loss_o: 1.474
Episode: 9942, episode steps: 10, avg. reward: 9.32, loss_i: 9.415, loss_h: 4.454, loss_o: 1.617
Episode: 9943, episode steps: 11, avg. reward: 9.33, loss_i: 10.053, loss_h: 5.521, loss_o: 1.42
Episode: 9944, episode steps: 10, a

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 10000, episode steps: 9, avg. reward: 9.33, loss_i: 12.652, loss_h: 5.417, loss_o: 1.642
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 12.483811847865582, 'critic_2_loss': 12.820472341444757, 'policy_loss': -13.182654658953348, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 5.430257027863331, 'critic_2_loss': 5.402789892031337, 'policy_loss': -3.8895908121792777, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 1.6426187952359517, 'critic_2_loss': 1.640588899453481, 'policy_loss': -3.0049577289157443, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1

100%|██████████| 1000/1000 [01:55<00:00,  8.64it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.34, Max. Reward: 11.0
--------------------------------------------------------------------------------
Episode: 10001, episode steps: 10, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 10002, episode steps: 9, avg. reward: 9.5, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 10003, episode steps: 9, avg. reward: 9.33, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 10004, episode steps: 9, avg. reward: 9.25, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 10005, episode steps: 8, avg. reward: 9.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 10006, episode steps: 10, avg. reward: 9.17, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 10007, episode steps: 10, avg. reward: 9.29, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 10008, episode steps: 10, avg. reward: 9.38, loss_i: 7.013, loss_h: 2.574, loss_o: 1.582
Episode: 10009, episode steps: 10, avg. rewa

Episode: 10084, episode steps: 10, avg. reward: 9.33, loss_i: 8.879, loss_h: 4.404, loss_o: 1.578
Episode: 10085, episode steps: 9, avg. reward: 9.33, loss_i: 9.069, loss_h: 3.695, loss_o: 1.631
Episode: 10086, episode steps: 8, avg. reward: 9.31, loss_i: 9.872, loss_h: 4.06, loss_o: 1.447
Episode: 10087, episode steps: 10, avg. reward: 9.32, loss_i: 8.783, loss_h: 3.908, loss_o: 1.785
Episode: 10088, episode steps: 10, avg. reward: 9.33, loss_i: 9.102, loss_h: 4.074, loss_o: 1.755
Episode: 10089, episode steps: 10, avg. reward: 9.34, loss_i: 9.304, loss_h: 4.023, loss_o: 1.898
Episode: 10090, episode steps: 9, avg. reward: 9.33, loss_i: 10.06, loss_h: 3.694, loss_o: 1.798
Episode: 10091, episode steps: 9, avg. reward: 9.33, loss_i: 11.737, loss_h: 4.179, loss_o: 1.823
Episode: 10092, episode steps: 9, avg. reward: 9.33, loss_i: 9.426, loss_h: 4.163, loss_o: 1.393
Episode: 10093, episode steps: 10, avg. reward: 9.33, loss_i: 10.67, loss_h: 3.623, loss_o: 1.657
Episode: 10094, episode s

Episode: 10168, episode steps: 9, avg. reward: 9.32, loss_i: 13.038, loss_h: 5.116, loss_o: 1.217
Episode: 10169, episode steps: 10, avg. reward: 9.33, loss_i: 10.265, loss_h: 4.079, loss_o: 1.791
Episode: 10170, episode steps: 9, avg. reward: 9.32, loss_i: 9.595, loss_h: 4.225, loss_o: 1.6
Episode: 10171, episode steps: 10, avg. reward: 9.33, loss_i: 10.857, loss_h: 4.438, loss_o: 1.727
Episode: 10172, episode steps: 9, avg. reward: 9.33, loss_i: 11.159, loss_h: 4.455, loss_o: 1.559
Episode: 10173, episode steps: 9, avg. reward: 9.32, loss_i: 8.437, loss_h: 4.982, loss_o: 1.579
Episode: 10174, episode steps: 8, avg. reward: 9.32, loss_i: 10.037, loss_h: 4.03, loss_o: 1.934
Episode: 10175, episode steps: 10, avg. reward: 9.32, loss_i: 9.913, loss_h: 4.597, loss_o: 1.694
Episode: 10176, episode steps: 9, avg. reward: 9.32, loss_i: 11.774, loss_h: 3.802, loss_o: 1.733
Episode: 10177, episode steps: 8, avg. reward: 9.31, loss_i: 11.046, loss_h: 4.456, loss_o: 1.471
Episode: 10178, episode

Episode: 10252, episode steps: 10, avg. reward: 9.36, loss_i: 9.911, loss_h: 4.312, loss_o: 1.567
Episode: 10253, episode steps: 10, avg. reward: 9.36, loss_i: 10.941, loss_h: 4.738, loss_o: 1.783
Episode: 10254, episode steps: 9, avg. reward: 9.36, loss_i: 10.915, loss_h: 4.569, loss_o: 1.933
Episode: 10255, episode steps: 8, avg. reward: 9.35, loss_i: 10.348, loss_h: 5.324, loss_o: 1.718
Episode: 10256, episode steps: 10, avg. reward: 9.36, loss_i: 10.322, loss_h: 5.109, loss_o: 1.757
Episode: 10257, episode steps: 10, avg. reward: 9.36, loss_i: 10.029, loss_h: 4.764, loss_o: 1.746
Episode: 10258, episode steps: 9, avg. reward: 9.36, loss_i: 10.477, loss_h: 4.486, loss_o: 1.46
Episode: 10259, episode steps: 10, avg. reward: 9.36, loss_i: 10.265, loss_h: 4.563, loss_o: 1.483
Episode: 10260, episode steps: 10, avg. reward: 9.36, loss_i: 11.305, loss_h: 5.154, loss_o: 1.574
Episode: 10261, episode steps: 10, avg. reward: 9.36, loss_i: 9.943, loss_h: 5.123, loss_o: 1.379
Episode: 10262, 

Episode: 10336, episode steps: 10, avg. reward: 9.36, loss_i: 10.123, loss_h: 4.529, loss_o: 1.62
Episode: 10337, episode steps: 10, avg. reward: 9.36, loss_i: 9.708, loss_h: 5.137, loss_o: 1.708
Episode: 10338, episode steps: 10, avg. reward: 9.37, loss_i: 10.364, loss_h: 4.731, loss_o: 1.435
Episode: 10339, episode steps: 9, avg. reward: 9.37, loss_i: 9.932, loss_h: 4.11, loss_o: 1.654
Episode: 10340, episode steps: 11, avg. reward: 9.37, loss_i: 9.497, loss_h: 4.552, loss_o: 1.718
Episode: 10341, episode steps: 9, avg. reward: 9.37, loss_i: 8.959, loss_h: 4.899, loss_o: 1.643
Episode: 10342, episode steps: 10, avg. reward: 9.37, loss_i: 9.368, loss_h: 4.759, loss_o: 1.655
Episode: 10343, episode steps: 10, avg. reward: 9.37, loss_i: 7.558, loss_h: 4.681, loss_o: 1.48
Episode: 10344, episode steps: 9, avg. reward: 9.37, loss_i: 10.274, loss_h: 4.256, loss_o: 1.489
Episode: 10345, episode steps: 8, avg. reward: 9.37, loss_i: 11.026, loss_h: 4.681, loss_o: 1.753
Episode: 10346, episode

Episode: 10418, episode steps: 8, avg. reward: 9.37, loss_i: 6.633, loss_h: 3.519, loss_o: 1.847
Episode: 10419, episode steps: 10, avg. reward: 9.37, loss_i: 8.103, loss_h: 4.139, loss_o: 1.757
Episode: 10420, episode steps: 9, avg. reward: 9.37, loss_i: 7.209, loss_h: 4.508, loss_o: 1.794
Episode: 10421, episode steps: 8, avg. reward: 9.37, loss_i: 8.41, loss_h: 4.005, loss_o: 1.926
Episode: 10422, episode steps: 9, avg. reward: 9.37, loss_i: 7.376, loss_h: 3.52, loss_o: 1.678
Episode: 10423, episode steps: 9, avg. reward: 9.37, loss_i: 7.343, loss_h: 4.137, loss_o: 1.729
Episode: 10424, episode steps: 8, avg. reward: 9.36, loss_i: 7.05, loss_h: 3.925, loss_o: 1.892
Episode: 10425, episode steps: 9, avg. reward: 9.36, loss_i: 8.807, loss_h: 4.651, loss_o: 1.667
Episode: 10426, episode steps: 9, avg. reward: 9.36, loss_i: 9.371, loss_h: 5.484, loss_o: 1.763
Episode: 10427, episode steps: 10, avg. reward: 9.36, loss_i: 8.949, loss_h: 4.624, loss_o: 1.936
Episode: 10428, episode steps: 

Episode: 10503, episode steps: 9, avg. reward: 9.37, loss_i: 8.444, loss_h: 4.253, loss_o: 1.784
Episode: 10504, episode steps: 9, avg. reward: 9.37, loss_i: 9.534, loss_h: 4.092, loss_o: 1.705
Episode: 10505, episode steps: 9, avg. reward: 9.37, loss_i: 9.033, loss_h: 3.942, loss_o: 1.552
Episode: 10506, episode steps: 10, avg. reward: 9.37, loss_i: 9.468, loss_h: 4.418, loss_o: 1.614
Episode: 10507, episode steps: 9, avg. reward: 9.37, loss_i: 7.07, loss_h: 4.304, loss_o: 1.205
Episode: 10508, episode steps: 9, avg. reward: 9.37, loss_i: 8.85, loss_h: 4.103, loss_o: 1.471
Episode: 10509, episode steps: 8, avg. reward: 9.37, loss_i: 9.933, loss_h: 4.754, loss_o: 1.945
Episode: 10510, episode steps: 9, avg. reward: 9.37, loss_i: 8.928, loss_h: 4.599, loss_o: 1.746
Episode: 10511, episode steps: 9, avg. reward: 9.37, loss_i: 8.788, loss_h: 4.927, loss_o: 1.759
Episode: 10512, episode steps: 9, avg. reward: 9.37, loss_i: 8.126, loss_h: 4.81, loss_o: 1.647
Episode: 10513, episode steps: 1

Episode: 10588, episode steps: 10, avg. reward: 9.37, loss_i: 9.399, loss_h: 4.54, loss_o: 1.963
Episode: 10589, episode steps: 9, avg. reward: 9.37, loss_i: 8.665, loss_h: 4.952, loss_o: 1.516
Episode: 10590, episode steps: 9, avg. reward: 9.37, loss_i: 9.747, loss_h: 3.731, loss_o: 1.642
Episode: 10591, episode steps: 10, avg. reward: 9.37, loss_i: 10.205, loss_h: 4.404, loss_o: 2.019
Episode: 10592, episode steps: 10, avg. reward: 9.37, loss_i: 9.881, loss_h: 5.078, loss_o: 1.893
Episode: 10593, episode steps: 10, avg. reward: 9.37, loss_i: 9.738, loss_h: 4.751, loss_o: 1.874
Episode: 10594, episode steps: 10, avg. reward: 9.37, loss_i: 8.697, loss_h: 3.959, loss_o: 1.755
Episode: 10595, episode steps: 9, avg. reward: 9.37, loss_i: 7.928, loss_h: 4.885, loss_o: 1.757
Episode: 10596, episode steps: 10, avg. reward: 9.37, loss_i: 8.184, loss_h: 4.936, loss_o: 2.112
Episode: 10597, episode steps: 10, avg. reward: 9.37, loss_i: 9.096, loss_h: 5.498, loss_o: 1.898
Episode: 10598, episode

Episode: 10673, episode steps: 10, avg. reward: 9.37, loss_i: 8.869, loss_h: 4.729, loss_o: 1.985
Episode: 10674, episode steps: 9, avg. reward: 9.37, loss_i: 8.142, loss_h: 5.321, loss_o: 1.657
Episode: 10675, episode steps: 9, avg. reward: 9.37, loss_i: 7.939, loss_h: 4.979, loss_o: 2.228
Episode: 10676, episode steps: 10, avg. reward: 9.37, loss_i: 9.908, loss_h: 4.736, loss_o: 1.685
Episode: 10677, episode steps: 9, avg. reward: 9.37, loss_i: 10.251, loss_h: 5.38, loss_o: 1.916
Episode: 10678, episode steps: 9, avg. reward: 9.37, loss_i: 7.297, loss_h: 4.698, loss_o: 1.603
Episode: 10679, episode steps: 8, avg. reward: 9.37, loss_i: 9.303, loss_h: 4.936, loss_o: 2.11
Episode: 10680, episode steps: 11, avg. reward: 9.37, loss_i: 10.05, loss_h: 5.815, loss_o: 2.268
Episode: 10681, episode steps: 9, avg. reward: 9.37, loss_i: 10.502, loss_h: 4.222, loss_o: 1.582
Episode: 10682, episode steps: 9, avg. reward: 9.37, loss_i: 9.704, loss_h: 5.207, loss_o: 1.806
Episode: 10683, episode ste

Episode: 10758, episode steps: 10, avg. reward: 9.37, loss_i: 9.561, loss_h: 5.291, loss_o: 1.845
Episode: 10759, episode steps: 8, avg. reward: 9.37, loss_i: 10.379, loss_h: 5.013, loss_o: 1.771
Episode: 10760, episode steps: 9, avg. reward: 9.37, loss_i: 9.011, loss_h: 5.51, loss_o: 1.945
Episode: 10761, episode steps: 9, avg. reward: 9.37, loss_i: 9.996, loss_h: 6.043, loss_o: 1.8
Episode: 10762, episode steps: 9, avg. reward: 9.36, loss_i: 10.38, loss_h: 4.499, loss_o: 1.799
Episode: 10763, episode steps: 9, avg. reward: 9.36, loss_i: 9.904, loss_h: 4.896, loss_o: 1.778
Episode: 10764, episode steps: 9, avg. reward: 9.36, loss_i: 8.362, loss_h: 5.418, loss_o: 1.5
Episode: 10765, episode steps: 9, avg. reward: 9.36, loss_i: 9.547, loss_h: 5.716, loss_o: 1.983
Episode: 10766, episode steps: 10, avg. reward: 9.36, loss_i: 8.557, loss_h: 5.195, loss_o: 1.839
Episode: 10767, episode steps: 9, avg. reward: 9.36, loss_i: 10.537, loss_h: 4.927, loss_o: 2.106
Episode: 10768, episode steps: 

Episode: 10841, episode steps: 8, avg. reward: 9.37, loss_i: 9.523, loss_h: 3.491, loss_o: 1.673
Episode: 10842, episode steps: 10, avg. reward: 9.37, loss_i: 9.5, loss_h: 3.411, loss_o: 1.561
Episode: 10843, episode steps: 9, avg. reward: 9.37, loss_i: 9.241, loss_h: 3.796, loss_o: 1.532
Episode: 10844, episode steps: 9, avg. reward: 9.36, loss_i: 10.402, loss_h: 3.796, loss_o: 1.858
Episode: 10845, episode steps: 8, avg. reward: 9.36, loss_i: 10.767, loss_h: 3.091, loss_o: 1.505
Episode: 10846, episode steps: 9, avg. reward: 9.36, loss_i: 10.218, loss_h: 4.026, loss_o: 1.725
Episode: 10847, episode steps: 9, avg. reward: 9.36, loss_i: 8.887, loss_h: 3.778, loss_o: 1.603
Episode: 10848, episode steps: 10, avg. reward: 9.36, loss_i: 11.257, loss_h: 3.896, loss_o: 1.581
Episode: 10849, episode steps: 10, avg. reward: 9.36, loss_i: 10.985, loss_h: 4.487, loss_o: 1.401
Episode: 10850, episode steps: 10, avg. reward: 9.36, loss_i: 9.485, loss_h: 4.244, loss_o: 1.618
Episode: 10851, episode

Episode: 10926, episode steps: 10, avg. reward: 9.37, loss_i: 8.565, loss_h: 3.763, loss_o: 1.255
Episode: 10927, episode steps: 10, avg. reward: 9.37, loss_i: 8.322, loss_h: 4.771, loss_o: 1.289
Episode: 10928, episode steps: 9, avg. reward: 9.37, loss_i: 9.374, loss_h: 4.099, loss_o: 1.37
Episode: 10929, episode steps: 10, avg. reward: 9.37, loss_i: 6.927, loss_h: 4.223, loss_o: 1.499
Episode: 10930, episode steps: 9, avg. reward: 9.37, loss_i: 8.643, loss_h: 4.239, loss_o: 1.217
Episode: 10931, episode steps: 9, avg. reward: 9.37, loss_i: 7.118, loss_h: 4.382, loss_o: 1.311
Episode: 10932, episode steps: 9, avg. reward: 9.37, loss_i: 9.192, loss_h: 3.869, loss_o: 1.449
Episode: 10933, episode steps: 8, avg. reward: 9.37, loss_i: 7.571, loss_h: 4.32, loss_o: 1.384
Episode: 10934, episode steps: 11, avg. reward: 9.37, loss_i: 8.609, loss_h: 5.194, loss_o: 1.477
Episode: 10935, episode steps: 10, avg. reward: 9.37, loss_i: 7.837, loss_h: 4.236, loss_o: 1.661
Episode: 10936, episode ste

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 11000, episode steps: 9, avg. reward: 9.36, loss_i: 8.668, loss_h: 3.351, loss_o: 1.408
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 8.661324022337794, 'critic_2_loss': 8.674982425239351, 'policy_loss': -13.878466592894661, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 3.347844182442014, 'critic_2_loss': 3.353486209964523, 'policy_loss': -3.3297577924453297, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 1.4067391355832417, 'critic_2_loss': 1.4086901744206746, 'policy_loss': -3.344737821155124, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [1.0, 0.22, 0.81, 0.0, 0.18, 0.08, 1.0, 0.22]
o: [1.0]
UPDATES
i: [1796, 1796, 1796, 1796]
h: [1792, 412, 1499, 0, 0, 0, 1795, 314]
o: [1796]
MEMORY
i: [1861, 1861, 1861, 1861]
h: [1847, 187, 1299, 0, 4, 23, 18

100%|██████████| 1000/1000 [01:54<00:00,  8.72it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.39, Max. Reward: 12.0
--------------------------------------------------------------------------------
Episode: 11001, episode steps: 9, avg. reward: 9.0, loss_i: 8.597, loss_h: 4.396, loss_o: 1.375
Episode: 11002, episode steps: 9, avg. reward: 9.0, loss_i: 8.929, loss_h: 4.486, loss_o: 1.589
Episode: 11003, episode steps: 9, avg. reward: 9.0, loss_i: 8.617, loss_h: 4.218, loss_o: 1.406
Episode: 11004, episode steps: 9, avg. reward: 9.0, loss_i: 10.656, loss_h: 3.608, loss_o: 1.687
Episode: 11005, episode steps: 9, avg. reward: 9.0, loss_i: 10.359, loss_h: 4.31, loss_o: 1.31
Episode: 11006, episode steps: 8, avg. reward: 8.83, loss_i: 8.897, loss_h: 4.235, loss_o: 1.377
Episode: 11007, episode steps: 9, avg. reward: 8.86, loss_i: 9.1, loss_h: 4.426, loss_o: 1.246
Episode: 11008, episode steps: 8, avg. reward: 8.75, loss_i: 9.22, loss_h: 3.849, loss_o: 1.585
Episode: 110

Episode: 11084, episode steps: 9, avg. reward: 9.21, loss_i: 8.936, loss_h: 3.609, loss_o: 1.744
Episode: 11085, episode steps: 10, avg. reward: 9.22, loss_i: 8.448, loss_h: 3.941, loss_o: 1.689
Episode: 11086, episode steps: 9, avg. reward: 9.22, loss_i: 9.863, loss_h: 3.503, loss_o: 1.455
Episode: 11087, episode steps: 10, avg. reward: 9.23, loss_i: 9.588, loss_h: 3.688, loss_o: 1.294
Episode: 11088, episode steps: 10, avg. reward: 9.24, loss_i: 8.663, loss_h: 4.166, loss_o: 1.455
Episode: 11089, episode steps: 10, avg. reward: 9.25, loss_i: 8.242, loss_h: 3.217, loss_o: 1.472
Episode: 11090, episode steps: 9, avg. reward: 9.24, loss_i: 10.04, loss_h: 3.834, loss_o: 1.393
Episode: 11091, episode steps: 10, avg. reward: 9.25, loss_i: 9.304, loss_h: 3.94, loss_o: 1.494
Episode: 11092, episode steps: 10, avg. reward: 9.26, loss_i: 8.623, loss_h: 4.029, loss_o: 1.658
Episode: 11093, episode steps: 9, avg. reward: 9.26, loss_i: 8.752, loss_h: 3.903, loss_o: 1.199
Episode: 11094, episode s

Episode: 11169, episode steps: 10, avg. reward: 9.28, loss_i: 8.211, loss_h: 4.116, loss_o: 1.549
Episode: 11170, episode steps: 9, avg. reward: 9.28, loss_i: 9.717, loss_h: 4.269, loss_o: 1.521
Episode: 11171, episode steps: 10, avg. reward: 9.29, loss_i: 8.811, loss_h: 3.902, loss_o: 1.602
Episode: 11172, episode steps: 10, avg. reward: 9.29, loss_i: 9.252, loss_h: 3.788, loss_o: 1.79
Episode: 11173, episode steps: 8, avg. reward: 9.28, loss_i: 9.723, loss_h: 3.547, loss_o: 1.672
Episode: 11174, episode steps: 10, avg. reward: 9.29, loss_i: 9.582, loss_h: 4.072, loss_o: 1.548
Episode: 11175, episode steps: 8, avg. reward: 9.28, loss_i: 8.816, loss_h: 4.421, loss_o: 1.544
Episode: 11176, episode steps: 9, avg. reward: 9.28, loss_i: 9.945, loss_h: 4.495, loss_o: 1.447
Episode: 11177, episode steps: 9, avg. reward: 9.28, loss_i: 9.343, loss_h: 4.059, loss_o: 1.956
Episode: 11178, episode steps: 9, avg. reward: 9.28, loss_i: 10.461, loss_h: 3.835, loss_o: 1.782
Episode: 11179, episode st

Episode: 11252, episode steps: 9, avg. reward: 9.31, loss_i: 7.553, loss_h: 3.061, loss_o: 2.164
Episode: 11253, episode steps: 11, avg. reward: 9.32, loss_i: 7.028, loss_h: 3.505, loss_o: 1.8
Episode: 11254, episode steps: 10, avg. reward: 9.32, loss_i: 6.598, loss_h: 4.027, loss_o: 1.918
Episode: 11255, episode steps: 10, avg. reward: 9.32, loss_i: 8.296, loss_h: 3.671, loss_o: 2.154
Episode: 11256, episode steps: 10, avg. reward: 9.32, loss_i: 7.611, loss_h: 3.978, loss_o: 1.947
Episode: 11257, episode steps: 10, avg. reward: 9.33, loss_i: 7.777, loss_h: 3.54, loss_o: 2.308
Episode: 11258, episode steps: 10, avg. reward: 9.33, loss_i: 6.878, loss_h: 3.407, loss_o: 2.074
Episode: 11259, episode steps: 8, avg. reward: 9.32, loss_i: 6.719, loss_h: 3.693, loss_o: 1.837
Episode: 11260, episode steps: 10, avg. reward: 9.33, loss_i: 7.081, loss_h: 3.778, loss_o: 2.472
Episode: 11261, episode steps: 10, avg. reward: 9.33, loss_i: 7.708, loss_h: 3.722, loss_o: 1.988
Episode: 11262, episode s

Episode: 11337, episode steps: 9, avg. reward: 9.35, loss_i: 7.145, loss_h: 3.488, loss_o: 2.097
Episode: 11338, episode steps: 9, avg. reward: 9.35, loss_i: 6.193, loss_h: 3.113, loss_o: 2.139
Episode: 11339, episode steps: 10, avg. reward: 9.35, loss_i: 8.107, loss_h: 3.476, loss_o: 1.887
Episode: 11340, episode steps: 11, avg. reward: 9.35, loss_i: 7.992, loss_h: 3.268, loss_o: 2.241
Episode: 11341, episode steps: 10, avg. reward: 9.35, loss_i: 7.106, loss_h: 3.812, loss_o: 2.78
Episode: 11342, episode steps: 11, avg. reward: 9.36, loss_i: 8.069, loss_h: 3.497, loss_o: 1.882
Episode: 11343, episode steps: 10, avg. reward: 9.36, loss_i: 9.021, loss_h: 3.248, loss_o: 2.585
Episode: 11344, episode steps: 9, avg. reward: 9.36, loss_i: 7.427, loss_h: 2.847, loss_o: 2.133
Episode: 11345, episode steps: 8, avg. reward: 9.36, loss_i: 6.712, loss_h: 4.411, loss_o: 1.689
Episode: 11346, episode steps: 8, avg. reward: 9.35, loss_i: 7.739, loss_h: 3.444, loss_o: 2.386
Episode: 11347, episode st

Episode: 11422, episode steps: 10, avg. reward: 9.35, loss_i: 6.853, loss_h: 3.863, loss_o: 2.171
Episode: 11423, episode steps: 8, avg. reward: 9.35, loss_i: 9.165, loss_h: 3.357, loss_o: 2.161
Episode: 11424, episode steps: 8, avg. reward: 9.35, loss_i: 7.784, loss_h: 3.817, loss_o: 1.992
Episode: 11425, episode steps: 9, avg. reward: 9.35, loss_i: 6.865, loss_h: 3.387, loss_o: 2.168
Episode: 11426, episode steps: 10, avg. reward: 9.35, loss_i: 7.519, loss_h: 4.31, loss_o: 2.404
Episode: 11427, episode steps: 9, avg. reward: 9.35, loss_i: 8.024, loss_h: 3.303, loss_o: 2.265
Episode: 11428, episode steps: 8, avg. reward: 9.34, loss_i: 6.89, loss_h: 3.243, loss_o: 2.511
Episode: 11429, episode steps: 9, avg. reward: 9.34, loss_i: 8.483, loss_h: 3.401, loss_o: 2.389
Episode: 11430, episode steps: 11, avg. reward: 9.35, loss_i: 7.313, loss_h: 3.729, loss_o: 2.564
Episode: 11431, episode steps: 9, avg. reward: 9.35, loss_i: 7.669, loss_h: 3.815, loss_o: 2.123
Episode: 11432, episode steps

Episode: 11507, episode steps: 8, avg. reward: 9.34, loss_i: 10.135, loss_h: 3.214, loss_o: 2.422
Episode: 11508, episode steps: 9, avg. reward: 9.33, loss_i: 10.401, loss_h: 3.434, loss_o: 2.346
Episode: 11509, episode steps: 10, avg. reward: 9.34, loss_i: 8.484, loss_h: 3.302, loss_o: 2.472
Episode: 11510, episode steps: 8, avg. reward: 9.33, loss_i: 6.931, loss_h: 3.613, loss_o: 2.156
Episode: 11511, episode steps: 10, avg. reward: 9.33, loss_i: 8.437, loss_h: 3.838, loss_o: 2.073
Episode: 11512, episode steps: 8, avg. reward: 9.33, loss_i: 7.501, loss_h: 4.524, loss_o: 2.246
Episode: 11513, episode steps: 10, avg. reward: 9.33, loss_i: 8.475, loss_h: 3.972, loss_o: 2.01
Episode: 11514, episode steps: 10, avg. reward: 9.33, loss_i: 8.583, loss_h: 3.438, loss_o: 2.145
Episode: 11515, episode steps: 10, avg. reward: 9.34, loss_i: 7.609, loss_h: 3.295, loss_o: 2.232
Episode: 11516, episode steps: 10, avg. reward: 9.34, loss_i: 7.071, loss_h: 3.541, loss_o: 2.138
Episode: 11517, episode

Episode: 11592, episode steps: 8, avg. reward: 9.33, loss_i: 7.478, loss_h: 3.901, loss_o: 2.184
Episode: 11593, episode steps: 9, avg. reward: 9.33, loss_i: 8.415, loss_h: 3.642, loss_o: 2.242
Episode: 11594, episode steps: 10, avg. reward: 9.33, loss_i: 8.992, loss_h: 3.566, loss_o: 1.967
Episode: 11595, episode steps: 9, avg. reward: 9.33, loss_i: 7.028, loss_h: 3.569, loss_o: 2.38
Episode: 11596, episode steps: 9, avg. reward: 9.33, loss_i: 8.122, loss_h: 3.383, loss_o: 1.954
Episode: 11597, episode steps: 9, avg. reward: 9.33, loss_i: 8.285, loss_h: 3.557, loss_o: 2.166
Episode: 11598, episode steps: 9, avg. reward: 9.33, loss_i: 7.386, loss_h: 3.326, loss_o: 2.748
Episode: 11599, episode steps: 9, avg. reward: 9.33, loss_i: 8.801, loss_h: 3.487, loss_o: 2.055
Episode: 11600, episode steps: 11, avg. reward: 9.33, loss_i: 7.37, loss_h: 2.895, loss_o: 1.893
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True  True  Tr

Episode: 11675, episode steps: 8, avg. reward: 9.33, loss_i: 9.035, loss_h: 2.961, loss_o: 2.591
Episode: 11676, episode steps: 9, avg. reward: 9.33, loss_i: 10.139, loss_h: 2.959, loss_o: 2.102
Episode: 11677, episode steps: 9, avg. reward: 9.33, loss_i: 8.322, loss_h: 2.653, loss_o: 2.544
Episode: 11678, episode steps: 10, avg. reward: 9.33, loss_i: 9.306, loss_h: 3.122, loss_o: 2.559
Episode: 11679, episode steps: 10, avg. reward: 9.33, loss_i: 8.184, loss_h: 2.952, loss_o: 2.691
Episode: 11680, episode steps: 9, avg. reward: 9.33, loss_i: 8.007, loss_h: 2.749, loss_o: 2.348
Episode: 11681, episode steps: 8, avg. reward: 9.33, loss_i: 9.439, loss_h: 3.0, loss_o: 2.515
Episode: 11682, episode steps: 8, avg. reward: 9.33, loss_i: 9.799, loss_h: 2.456, loss_o: 2.339
Episode: 11683, episode steps: 11, avg. reward: 9.33, loss_i: 8.67, loss_h: 3.651, loss_o: 2.397
Episode: 11684, episode steps: 9, avg. reward: 9.33, loss_i: 8.957, loss_h: 2.576, loss_o: 2.695
Episode: 11685, episode steps

Episode: 11760, episode steps: 9, avg. reward: 9.32, loss_i: 9.626, loss_h: 3.105, loss_o: 2.416
Episode: 11761, episode steps: 9, avg. reward: 9.32, loss_i: 8.834, loss_h: 2.622, loss_o: 2.652
Episode: 11762, episode steps: 10, avg. reward: 9.32, loss_i: 8.256, loss_h: 3.172, loss_o: 2.318
Episode: 11763, episode steps: 9, avg. reward: 9.32, loss_i: 11.11, loss_h: 2.61, loss_o: 2.356
Episode: 11764, episode steps: 10, avg. reward: 9.32, loss_i: 9.094, loss_h: 2.87, loss_o: 2.295
Episode: 11765, episode steps: 9, avg. reward: 9.32, loss_i: 9.922, loss_h: 2.725, loss_o: 2.248
Episode: 11766, episode steps: 10, avg. reward: 9.32, loss_i: 8.594, loss_h: 3.188, loss_o: 2.333
Episode: 11767, episode steps: 9, avg. reward: 9.32, loss_i: 8.048, loss_h: 3.384, loss_o: 2.484
Episode: 11768, episode steps: 9, avg. reward: 9.32, loss_i: 8.976, loss_h: 2.946, loss_o: 3.064
Episode: 11769, episode steps: 9, avg. reward: 9.32, loss_i: 8.601, loss_h: 2.702, loss_o: 2.404
Episode: 11770, episode steps

Episode: 11845, episode steps: 9, avg. reward: 9.32, loss_i: 8.527, loss_h: 2.675, loss_o: 2.247
Episode: 11846, episode steps: 10, avg. reward: 9.33, loss_i: 7.351, loss_h: 2.993, loss_o: 2.883
Episode: 11847, episode steps: 11, avg. reward: 9.33, loss_i: 8.885, loss_h: 2.688, loss_o: 2.4
Episode: 11848, episode steps: 9, avg. reward: 9.33, loss_i: 8.221, loss_h: 3.131, loss_o: 2.578
Episode: 11849, episode steps: 10, avg. reward: 9.33, loss_i: 8.348, loss_h: 2.965, loss_o: 2.698
Episode: 11850, episode steps: 9, avg. reward: 9.33, loss_i: 9.404, loss_h: 3.269, loss_o: 2.405
Episode: 11851, episode steps: 9, avg. reward: 9.33, loss_i: 8.696, loss_h: 3.499, loss_o: 2.197
Episode: 11852, episode steps: 9, avg. reward: 9.33, loss_i: 8.643, loss_h: 2.896, loss_o: 2.417
Episode: 11853, episode steps: 10, avg. reward: 9.33, loss_i: 9.175, loss_h: 2.855, loss_o: 2.747
Episode: 11854, episode steps: 10, avg. reward: 9.33, loss_i: 7.377, loss_h: 2.907, loss_o: 2.487
Episode: 11855, episode ste

Episode: 11930, episode steps: 10, avg. reward: 9.33, loss_i: 8.202, loss_h: 2.618, loss_o: 2.635
Episode: 11931, episode steps: 10, avg. reward: 9.33, loss_i: 7.076, loss_h: 3.257, loss_o: 2.606
Episode: 11932, episode steps: 9, avg. reward: 9.33, loss_i: 10.004, loss_h: 2.88, loss_o: 2.52
Episode: 11933, episode steps: 10, avg. reward: 9.33, loss_i: 8.852, loss_h: 2.835, loss_o: 2.309
Episode: 11934, episode steps: 10, avg. reward: 9.34, loss_i: 8.489, loss_h: 3.228, loss_o: 2.284
Episode: 11935, episode steps: 9, avg. reward: 9.33, loss_i: 8.97, loss_h: 3.362, loss_o: 2.345
Episode: 11936, episode steps: 9, avg. reward: 9.33, loss_i: 8.724, loss_h: 3.532, loss_o: 2.229
Episode: 11937, episode steps: 10, avg. reward: 9.34, loss_i: 9.07, loss_h: 2.762, loss_o: 2.441
Episode: 11938, episode steps: 10, avg. reward: 9.34, loss_i: 9.067, loss_h: 3.008, loss_o: 2.394
Episode: 11939, episode steps: 9, avg. reward: 9.34, loss_i: 8.65, loss_h: 3.267, loss_o: 2.542
Episode: 11940, episode step

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 12000, episode steps: 9, avg. reward: 9.33, loss_i: 8.152, loss_h: 2.901, loss_o: 3.128
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 8.175843207372559, 'critic_2_loss': 8.127304861529005, 'policy_loss': -13.660631471210056, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 2.8980196165103904, 'critic_2_loss': 2.9039876730138268, 'policy_loss': -2.955681619077626, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 3.1303118069966636, 'critic_2_loss': 3.125262896219889, 'policy_loss': -3.6911857393052845, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0

100%|██████████| 1000/1000 [01:54<00:00,  8.72it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.36, Max. Reward: 11.0
--------------------------------------------------------------------------------
Episode: 12001, episode steps: 10, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 12002, episode steps: 8, avg. reward: 9.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 12003, episode steps: 10, avg. reward: 9.33, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 12004, episode steps: 10, avg. reward: 9.5, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 12005, episode steps: 9, avg. reward: 9.4, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 12006, episode steps: 8, avg. reward: 9.17, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 12007, episode steps: 9, avg. reward: 9.14, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 12008, episode steps: 8, avg. reward: 9.0, loss_i: 409.84, loss_h: 171.642, loss_o: 28.817
Episode: 12009, episode steps: 9, avg. rewa

Episode: 12084, episode steps: 9, avg. reward: 9.35, loss_i: 8.198, loss_h: 3.59, loss_o: 2.66
Episode: 12085, episode steps: 8, avg. reward: 9.33, loss_i: 7.19, loss_h: 3.158, loss_o: 2.311
Episode: 12086, episode steps: 9, avg. reward: 9.33, loss_i: 7.419, loss_h: 3.414, loss_o: 2.55
Episode: 12087, episode steps: 9, avg. reward: 9.32, loss_i: 8.423, loss_h: 3.029, loss_o: 2.367
Episode: 12088, episode steps: 9, avg. reward: 9.32, loss_i: 7.496, loss_h: 3.359, loss_o: 2.57
Episode: 12089, episode steps: 10, avg. reward: 9.33, loss_i: 8.378, loss_h: 3.298, loss_o: 2.01
Episode: 12090, episode steps: 9, avg. reward: 9.32, loss_i: 7.083, loss_h: 3.333, loss_o: 2.798
Episode: 12091, episode steps: 10, avg. reward: 9.33, loss_i: 7.525, loss_h: 3.142, loss_o: 2.557
Episode: 12092, episode steps: 9, avg. reward: 9.33, loss_i: 7.647, loss_h: 3.389, loss_o: 2.356
Episode: 12093, episode steps: 10, avg. reward: 9.33, loss_i: 8.354, loss_h: 3.301, loss_o: 2.593
Episode: 12094, episode steps: 9,

Episode: 12169, episode steps: 9, avg. reward: 9.31, loss_i: 8.671, loss_h: 3.656, loss_o: 2.642
Episode: 12170, episode steps: 11, avg. reward: 9.32, loss_i: 7.727, loss_h: 3.057, loss_o: 2.18
Episode: 12171, episode steps: 9, avg. reward: 9.32, loss_i: 8.32, loss_h: 2.916, loss_o: 2.814
Episode: 12172, episode steps: 9, avg. reward: 9.31, loss_i: 8.89, loss_h: 3.163, loss_o: 2.591
Episode: 12173, episode steps: 8, avg. reward: 9.31, loss_i: 7.808, loss_h: 3.108, loss_o: 2.224
Episode: 12174, episode steps: 10, avg. reward: 9.31, loss_i: 7.701, loss_h: 3.121, loss_o: 2.641
Episode: 12175, episode steps: 10, avg. reward: 9.31, loss_i: 7.731, loss_h: 3.101, loss_o: 2.384
Episode: 12176, episode steps: 8, avg. reward: 9.31, loss_i: 8.091, loss_h: 3.494, loss_o: 2.45
Episode: 12177, episode steps: 9, avg. reward: 9.31, loss_i: 8.643, loss_h: 3.0, loss_o: 1.965
Episode: 12178, episode steps: 11, avg. reward: 9.31, loss_i: 9.019, loss_h: 2.991, loss_o: 2.304
Episode: 12179, episode steps: 1

Episode: 12254, episode steps: 8, avg. reward: 9.3, loss_i: 7.673, loss_h: 2.533, loss_o: 2.707
Episode: 12255, episode steps: 9, avg. reward: 9.3, loss_i: 7.911, loss_h: 3.425, loss_o: 2.159
Episode: 12256, episode steps: 11, avg. reward: 9.3, loss_i: 8.025, loss_h: 2.792, loss_o: 2.522
Episode: 12257, episode steps: 9, avg. reward: 9.3, loss_i: 10.207, loss_h: 3.027, loss_o: 2.638
Episode: 12258, episode steps: 10, avg. reward: 9.31, loss_i: 8.896, loss_h: 3.444, loss_o: 2.316
Episode: 12259, episode steps: 9, avg. reward: 9.31, loss_i: 7.783, loss_h: 3.567, loss_o: 1.895
Episode: 12260, episode steps: 10, avg. reward: 9.31, loss_i: 8.147, loss_h: 3.108, loss_o: 2.363
Episode: 12261, episode steps: 9, avg. reward: 9.31, loss_i: 8.904, loss_h: 2.717, loss_o: 2.461
Episode: 12262, episode steps: 10, avg. reward: 9.31, loss_i: 7.254, loss_h: 3.661, loss_o: 2.834
Episode: 12263, episode steps: 9, avg. reward: 9.31, loss_i: 8.586, loss_h: 3.649, loss_o: 2.379
Episode: 12264, episode steps

Episode: 12339, episode steps: 10, avg. reward: 9.31, loss_i: 8.665, loss_h: 2.87, loss_o: 2.56
Episode: 12340, episode steps: 9, avg. reward: 9.31, loss_i: 8.248, loss_h: 3.131, loss_o: 2.446
Episode: 12341, episode steps: 10, avg. reward: 9.31, loss_i: 7.956, loss_h: 2.277, loss_o: 2.228
Episode: 12342, episode steps: 10, avg. reward: 9.32, loss_i: 7.573, loss_h: 2.983, loss_o: 2.541
Episode: 12343, episode steps: 10, avg. reward: 9.32, loss_i: 8.47, loss_h: 3.241, loss_o: 2.578
Episode: 12344, episode steps: 11, avg. reward: 9.32, loss_i: 6.467, loss_h: 3.161, loss_o: 2.529
Episode: 12345, episode steps: 9, avg. reward: 9.32, loss_i: 8.203, loss_h: 3.364, loss_o: 2.728
Episode: 12346, episode steps: 8, avg. reward: 9.32, loss_i: 10.352, loss_h: 3.464, loss_o: 2.463
Episode: 12347, episode steps: 9, avg. reward: 9.32, loss_i: 7.281, loss_h: 2.715, loss_o: 2.806
Episode: 12348, episode steps: 9, avg. reward: 9.32, loss_i: 8.645, loss_h: 3.038, loss_o: 2.372
Episode: 12349, episode ste

Episode: 12422, episode steps: 8, avg. reward: 9.34, loss_i: 9.184, loss_h: 2.594, loss_o: 2.599
Episode: 12423, episode steps: 9, avg. reward: 9.34, loss_i: 9.501, loss_h: 2.748, loss_o: 2.295
Episode: 12424, episode steps: 10, avg. reward: 9.34, loss_i: 7.593, loss_h: 2.169, loss_o: 2.232
Episode: 12425, episode steps: 10, avg. reward: 9.35, loss_i: 8.386, loss_h: 2.464, loss_o: 2.607
Episode: 12426, episode steps: 9, avg. reward: 9.35, loss_i: 9.309, loss_h: 2.951, loss_o: 2.26
Episode: 12427, episode steps: 8, avg. reward: 9.34, loss_i: 8.895, loss_h: 2.677, loss_o: 2.257
Episode: 12428, episode steps: 10, avg. reward: 9.34, loss_i: 8.713, loss_h: 2.526, loss_o: 2.451
Episode: 12429, episode steps: 8, avg. reward: 9.34, loss_i: 7.385, loss_h: 2.601, loss_o: 2.366
Episode: 12430, episode steps: 10, avg. reward: 9.34, loss_i: 9.052, loss_h: 3.057, loss_o: 2.287
Episode: 12431, episode steps: 9, avg. reward: 9.34, loss_i: 8.293, loss_h: 2.79, loss_o: 2.319
Episode: 12432, episode step

Episode: 12507, episode steps: 8, avg. reward: 9.34, loss_i: 8.543, loss_h: 3.451, loss_o: 2.229
Episode: 12508, episode steps: 9, avg. reward: 9.34, loss_i: 7.519, loss_h: 3.738, loss_o: 2.916
Episode: 12509, episode steps: 10, avg. reward: 9.34, loss_i: 7.903, loss_h: 3.217, loss_o: 2.563
Episode: 12510, episode steps: 11, avg. reward: 9.34, loss_i: 7.74, loss_h: 3.185, loss_o: 2.64
Episode: 12511, episode steps: 9, avg. reward: 9.34, loss_i: 7.984, loss_h: 3.742, loss_o: 2.883
Episode: 12512, episode steps: 10, avg. reward: 9.34, loss_i: 7.625, loss_h: 3.574, loss_o: 2.305
Episode: 12513, episode steps: 9, avg. reward: 9.34, loss_i: 7.696, loss_h: 3.134, loss_o: 2.389
Episode: 12514, episode steps: 9, avg. reward: 9.34, loss_i: 7.6, loss_h: 3.652, loss_o: 2.822
Episode: 12515, episode steps: 8, avg. reward: 9.34, loss_i: 7.182, loss_h: 2.942, loss_o: 2.931
Episode: 12516, episode steps: 11, avg. reward: 9.34, loss_i: 6.998, loss_h: 3.344, loss_o: 2.53
Episode: 12517, episode steps: 

Episode: 12592, episode steps: 9, avg. reward: 9.34, loss_i: 7.634, loss_h: 3.75, loss_o: 2.727
Episode: 12593, episode steps: 10, avg. reward: 9.34, loss_i: 8.767, loss_h: 3.854, loss_o: 2.82
Episode: 12594, episode steps: 11, avg. reward: 9.35, loss_i: 6.477, loss_h: 3.465, loss_o: 2.573
Episode: 12595, episode steps: 8, avg. reward: 9.34, loss_i: 8.598, loss_h: 323.212, loss_o: 2.776
Episode: 12596, episode steps: 10, avg. reward: 9.35, loss_i: 6.233, loss_h: 3.863, loss_o: 2.735
Episode: 12597, episode steps: 10, avg. reward: 9.35, loss_i: 6.901, loss_h: 4.57, loss_o: 2.344
Episode: 12598, episode steps: 11, avg. reward: 9.35, loss_i: 6.943, loss_h: 4.112, loss_o: 2.345
Episode: 12599, episode steps: 10, avg. reward: 9.35, loss_i: 7.858, loss_h: 3.949, loss_o: 2.174
Episode: 12600, episode steps: 9, avg. reward: 9.35, loss_i: 7.62, loss_h: 4.128, loss_o: 2.537
Episode: 12601, episode steps: 10, avg. reward: 9.35, loss_i: 8.659, loss_h: 4.596, loss_o: 2.683
Episode: 12602, episode s

Episode: 12677, episode steps: 8, avg. reward: 9.34, loss_i: 7.274, loss_h: 4.56, loss_o: 2.588
Episode: 12678, episode steps: 11, avg. reward: 9.35, loss_i: 6.545, loss_h: 4.509, loss_o: 2.667
Episode: 12679, episode steps: 8, avg. reward: 9.34, loss_i: 7.052, loss_h: 5.041, loss_o: 2.687
Episode: 12680, episode steps: 9, avg. reward: 9.34, loss_i: 7.04, loss_h: 4.224, loss_o: 2.598
Episode: 12681, episode steps: 9, avg. reward: 9.34, loss_i: 7.108, loss_h: 4.924, loss_o: 2.559
Episode: 12682, episode steps: 10, avg. reward: 9.34, loss_i: 7.672, loss_h: 4.471, loss_o: 2.929
Episode: 12683, episode steps: 10, avg. reward: 9.35, loss_i: 6.788, loss_h: 4.322, loss_o: 2.697
Episode: 12684, episode steps: 9, avg. reward: 9.35, loss_i: 5.871, loss_h: 4.455, loss_o: 2.391
Episode: 12685, episode steps: 10, avg. reward: 9.35, loss_i: 7.976, loss_h: 5.184, loss_o: 2.976
Episode: 12686, episode steps: 9, avg. reward: 9.35, loss_i: 7.279, loss_h: 4.107, loss_o: 2.855
Episode: 12687, episode step

Episode: 12762, episode steps: 10, avg. reward: 9.35, loss_i: 6.039, loss_h: 5.1, loss_o: 2.576
Episode: 12763, episode steps: 10, avg. reward: 9.35, loss_i: 7.198, loss_h: 5.607, loss_o: 2.774
Episode: 12764, episode steps: 9, avg. reward: 9.35, loss_i: 7.556, loss_h: 5.878, loss_o: 2.389
Episode: 12765, episode steps: 8, avg. reward: 9.34, loss_i: 7.241, loss_h: 5.37, loss_o: 2.462
Episode: 12766, episode steps: 9, avg. reward: 9.34, loss_i: 6.106, loss_h: 5.368, loss_o: 2.331
Episode: 12767, episode steps: 9, avg. reward: 9.34, loss_i: 6.94, loss_h: 5.837, loss_o: 2.232
Episode: 12768, episode steps: 10, avg. reward: 9.34, loss_i: 7.342, loss_h: 5.551, loss_o: 2.308
Episode: 12769, episode steps: 9, avg. reward: 9.34, loss_i: 8.23, loss_h: 5.631, loss_o: 2.451
Episode: 12770, episode steps: 8, avg. reward: 9.34, loss_i: 7.429, loss_h: 5.634, loss_o: 2.619
Episode: 12771, episode steps: 9, avg. reward: 9.34, loss_i: 6.948, loss_h: 6.333, loss_o: 2.322
Episode: 12772, episode steps: 9

Episode: 12845, episode steps: 9, avg. reward: 9.34, loss_i: 5.831, loss_h: 2.294, loss_o: 2.954
Episode: 12846, episode steps: 10, avg. reward: 9.35, loss_i: 6.739, loss_h: 2.331, loss_o: 2.494
Episode: 12847, episode steps: 10, avg. reward: 9.35, loss_i: 5.416, loss_h: 2.629, loss_o: 2.477
Episode: 12848, episode steps: 8, avg. reward: 9.34, loss_i: 5.483, loss_h: 2.303, loss_o: 2.639
Episode: 12849, episode steps: 10, avg. reward: 9.35, loss_i: 6.213, loss_h: 2.388, loss_o: 2.843
Episode: 12850, episode steps: 10, avg. reward: 9.35, loss_i: 6.463, loss_h: 2.813, loss_o: 2.286
Episode: 12851, episode steps: 10, avg. reward: 9.35, loss_i: 7.434, loss_h: 2.254, loss_o: 2.684
Episode: 12852, episode steps: 10, avg. reward: 9.35, loss_i: 5.586, loss_h: 2.47, loss_o: 2.557
Episode: 12853, episode steps: 10, avg. reward: 9.35, loss_i: 6.833, loss_h: 2.743, loss_o: 2.307
Episode: 12854, episode steps: 9, avg. reward: 9.35, loss_i: 7.005, loss_h: 2.332, loss_o: 2.542
Episode: 12855, episode 

Episode: 12930, episode steps: 9, avg. reward: 9.34, loss_i: 7.07, loss_h: 2.389, loss_o: 2.826
Episode: 12931, episode steps: 8, avg. reward: 9.34, loss_i: 6.483, loss_h: 2.49, loss_o: 2.606
Episode: 12932, episode steps: 8, avg. reward: 9.33, loss_i: 6.511, loss_h: 2.581, loss_o: 2.319
Episode: 12933, episode steps: 9, avg. reward: 9.33, loss_i: 7.29, loss_h: 2.72, loss_o: 2.589
Episode: 12934, episode steps: 10, avg. reward: 9.34, loss_i: 7.526, loss_h: 2.383, loss_o: 2.912
Episode: 12935, episode steps: 10, avg. reward: 9.34, loss_i: 6.446, loss_h: 2.464, loss_o: 2.614
Episode: 12936, episode steps: 8, avg. reward: 9.33, loss_i: 6.666, loss_h: 2.604, loss_o: 2.531
Episode: 12937, episode steps: 8, avg. reward: 9.33, loss_i: 6.959, loss_h: 2.483, loss_o: 3.278
Episode: 12938, episode steps: 9, avg. reward: 9.33, loss_i: 7.689, loss_h: 2.734, loss_o: 2.561
Episode: 12939, episode steps: 10, avg. reward: 9.33, loss_i: 5.903, loss_h: 2.372, loss_o: 2.797
Episode: 12940, episode steps: 

  0%|          | 1/1000 [00:00<01:52,  8.88it/s]

Episode: 13000, episode steps: 9, avg. reward: 9.33, loss_i: 6.278, loss_h: 2.371, loss_o: 2.881
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 6.253299829446608, 'critic_2_loss': 6.302172913733456, 'policy_loss': -14.45803870095147, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 2.3701447522377266, 'critic_2_loss': 2.372157480640739, 'policy_loss': -2.2192799548308053, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 2.8785620265536838, 'critic_2_loss': 2.8826390637291803, 'policy_loss': -3.7287402947743735, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [0.93, 0.4, 0.86, 0.09, 0.19, 0.07, 1.0, 0.01]
o: [1.0]
UPDATES
i: [1794, 1794, 1794, 1794]
h: [1527, 780, 1522, 225, 0, 0, 1789, 0]
o: [1794]
MEMORY
i: [1859, 1859, 1859, 1859]
h: [1397, 491, 1347, 206, 10, 1

100%|██████████| 1000/1000 [01:55<00:00,  8.69it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.35, Max. Reward: 11.0
--------------------------------------------------------------------------------
Episode: 13001, episode steps: 10, avg. reward: 10.0, loss_i: 6.232, loss_h: 2.317, loss_o: 3.049
Episode: 13002, episode steps: 9, avg. reward: 9.5, loss_i: 6.764, loss_h: 2.539, loss_o: 2.881
Episode: 13003, episode steps: 8, avg. reward: 9.0, loss_i: 7.457, loss_h: 2.617, loss_o: 2.574
Episode: 13004, episode steps: 10, avg. reward: 9.25, loss_i: 7.826, loss_h: 2.685, loss_o: 2.596
Episode: 13005, episode steps: 9, avg. reward: 9.2, loss_i: 7.753, loss_h: 2.362, loss_o: 2.682
Episode: 13006, episode steps: 9, avg. reward: 9.17, loss_i: 7.882, loss_h: 2.706, loss_o: 3.017
Episode: 13007, episode steps: 10, avg. reward: 9.29, loss_i: 7.408, loss_h: 2.705, loss_o: 2.822
Episode: 13008, episode steps: 9, avg. reward: 9.25, loss_i: 7.213, loss_h: 2.323, loss_o: 2.801
Epis

Episode: 13084, episode steps: 9, avg. reward: 9.35, loss_i: 7.043, loss_h: 2.894, loss_o: 2.69
Episode: 13085, episode steps: 8, avg. reward: 9.33, loss_i: 6.613, loss_h: 2.867, loss_o: 2.654
Episode: 13086, episode steps: 10, avg. reward: 9.34, loss_i: 6.584, loss_h: 2.413, loss_o: 2.69
Episode: 13087, episode steps: 9, avg. reward: 9.33, loss_i: 5.857, loss_h: 2.861, loss_o: 2.792
Episode: 13088, episode steps: 10, avg. reward: 9.34, loss_i: 8.221, loss_h: 2.389, loss_o: 2.348
Episode: 13089, episode steps: 10, avg. reward: 9.35, loss_i: 7.406, loss_h: 2.488, loss_o: 2.881
Episode: 13090, episode steps: 9, avg. reward: 9.34, loss_i: 7.398, loss_h: 2.526, loss_o: 2.916
Episode: 13091, episode steps: 9, avg. reward: 9.34, loss_i: 6.677, loss_h: 2.503, loss_o: 2.519
Episode: 13092, episode steps: 8, avg. reward: 9.33, loss_i: 6.554, loss_h: 3.11, loss_o: 2.645
Episode: 13093, episode steps: 10, avg. reward: 9.33, loss_i: 7.636, loss_h: 2.696, loss_o: 2.72
Episode: 13094, episode steps:

Episode: 13169, episode steps: 8, avg. reward: 9.28, loss_i: 7.84, loss_h: 247.174, loss_o: 2.745
Episode: 13170, episode steps: 9, avg. reward: 9.28, loss_i: 8.154, loss_h: 2.643, loss_o: 2.808
Episode: 13171, episode steps: 10, avg. reward: 9.29, loss_i: 6.916, loss_h: 2.3, loss_o: 2.855
Episode: 13172, episode steps: 10, avg. reward: 9.29, loss_i: 7.656, loss_h: 2.512, loss_o: 2.912
Episode: 13173, episode steps: 9, avg. reward: 9.29, loss_i: 6.105, loss_h: 2.324, loss_o: 2.961
Episode: 13174, episode steps: 10, avg. reward: 9.29, loss_i: 7.55, loss_h: 2.32, loss_o: 2.729
Episode: 13175, episode steps: 8, avg. reward: 9.29, loss_i: 7.107, loss_h: 2.293, loss_o: 2.598
Episode: 13176, episode steps: 10, avg. reward: 9.29, loss_i: 6.588, loss_h: 2.477, loss_o: 2.48
Episode: 13177, episode steps: 11, avg. reward: 9.3, loss_i: 6.896, loss_h: 2.631, loss_o: 2.403
Episode: 13178, episode steps: 10, avg. reward: 9.3, loss_i: 6.847, loss_h: 2.842, loss_o: 2.506
Episode: 13179, episode steps:

Episode: 13252, episode steps: 10, avg. reward: 9.31, loss_i: 6.612, loss_h: 2.092, loss_o: 2.267
Episode: 13253, episode steps: 10, avg. reward: 9.32, loss_i: 5.492, loss_h: 2.017, loss_o: 2.416
Episode: 13254, episode steps: 9, avg. reward: 9.31, loss_i: 5.862, loss_h: 1.76, loss_o: 2.335
Episode: 13255, episode steps: 10, avg. reward: 9.32, loss_i: 6.9, loss_h: 2.114, loss_o: 2.294
Episode: 13256, episode steps: 10, avg. reward: 9.32, loss_i: 5.655, loss_h: 1.671, loss_o: 2.654
Episode: 13257, episode steps: 11, avg. reward: 9.33, loss_i: 6.102, loss_h: 1.808, loss_o: 2.325
Episode: 13258, episode steps: 9, avg. reward: 9.33, loss_i: 6.225, loss_h: 1.626, loss_o: 2.304
Episode: 13259, episode steps: 9, avg. reward: 9.32, loss_i: 6.337, loss_h: 1.905, loss_o: 2.178
Episode: 13260, episode steps: 10, avg. reward: 9.33, loss_i: 5.458, loss_h: 1.675, loss_o: 2.414
Episode: 13261, episode steps: 9, avg. reward: 9.33, loss_i: 4.559, loss_h: 2.114, loss_o: 2.552
Episode: 13262, episode ste

Episode: 13337, episode steps: 9, avg. reward: 9.34, loss_i: 6.435, loss_h: 1.892, loss_o: 2.634
Episode: 13338, episode steps: 9, avg. reward: 9.34, loss_i: 6.193, loss_h: 1.853, loss_o: 2.513
Episode: 13339, episode steps: 9, avg. reward: 9.34, loss_i: 6.954, loss_h: 1.882, loss_o: 2.454
Episode: 13340, episode steps: 8, avg. reward: 9.33, loss_i: 6.925, loss_h: 184.078, loss_o: 2.689
Episode: 13341, episode steps: 9, avg. reward: 9.33, loss_i: 6.485, loss_h: 1.91, loss_o: 2.623
Episode: 13342, episode steps: 10, avg. reward: 9.33, loss_i: 6.514, loss_h: 2.368, loss_o: 2.255
Episode: 13343, episode steps: 10, avg. reward: 9.34, loss_i: 6.467, loss_h: 1.971, loss_o: 2.257
Episode: 13344, episode steps: 9, avg. reward: 9.33, loss_i: 6.354, loss_h: 1.853, loss_o: 2.708
Episode: 13345, episode steps: 10, avg. reward: 9.34, loss_i: 5.706, loss_h: 2.313, loss_o: 2.239
Episode: 13346, episode steps: 9, avg. reward: 9.34, loss_i: 6.039, loss_h: 1.812, loss_o: 2.642
Episode: 13347, episode st

Episode: 13422, episode steps: 9, avg. reward: 9.35, loss_i: 6.297, loss_h: 1.785, loss_o: 2.141
Episode: 13423, episode steps: 9, avg. reward: 9.35, loss_i: 6.055, loss_h: 1.807, loss_o: 2.183
Episode: 13424, episode steps: 9, avg. reward: 9.35, loss_i: 6.447, loss_h: 2.169, loss_o: 2.432
Episode: 13425, episode steps: 8, avg. reward: 9.35, loss_i: 6.921, loss_h: 2.045, loss_o: 2.219
Episode: 13426, episode steps: 8, avg. reward: 9.34, loss_i: 6.632, loss_h: 1.683, loss_o: 2.207
Episode: 13427, episode steps: 8, avg. reward: 9.34, loss_i: 5.425, loss_h: 2.311, loss_o: 2.372
Episode: 13428, episode steps: 10, avg. reward: 9.34, loss_i: 6.987, loss_h: 2.032, loss_o: 2.335
Episode: 13429, episode steps: 10, avg. reward: 9.34, loss_i: 6.719, loss_h: 1.82, loss_o: 2.075
Episode: 13430, episode steps: 9, avg. reward: 9.34, loss_i: 6.758, loss_h: 2.155, loss_o: 2.255
Episode: 13431, episode steps: 9, avg. reward: 9.34, loss_i: 6.664, loss_h: 2.13, loss_o: 2.46
Episode: 13432, episode steps: 

Episode: 13507, episode steps: 9, avg. reward: 9.37, loss_i: 6.13, loss_h: 1.779, loss_o: 2.657
Episode: 13508, episode steps: 10, avg. reward: 9.37, loss_i: 5.624, loss_h: 207.316, loss_o: 2.554
Episode: 13509, episode steps: 9, avg. reward: 9.37, loss_i: 6.431, loss_h: 2.179, loss_o: 2.303
Episode: 13510, episode steps: 10, avg. reward: 9.37, loss_i: 6.528, loss_h: 2.143, loss_o: 2.442
Episode: 13511, episode steps: 9, avg. reward: 9.37, loss_i: 6.186, loss_h: 1.913, loss_o: 2.387
Episode: 13512, episode steps: 9, avg. reward: 9.37, loss_i: 6.45, loss_h: 2.098, loss_o: 2.596
Episode: 13513, episode steps: 10, avg. reward: 9.37, loss_i: 7.009, loss_h: 1.949, loss_o: 2.616
Episode: 13514, episode steps: 9, avg. reward: 9.37, loss_i: 7.43, loss_h: 1.518, loss_o: 2.317
Episode: 13515, episode steps: 8, avg. reward: 9.37, loss_i: 6.68, loss_h: 1.772, loss_o: 2.311
Episode: 13516, episode steps: 9, avg. reward: 9.36, loss_i: 7.427, loss_h: 1.953, loss_o: 2.545
Episode: 13517, episode steps

Episode: 13592, episode steps: 10, avg. reward: 9.34, loss_i: 7.342, loss_h: 182.035, loss_o: 2.241
Episode: 13593, episode steps: 8, avg. reward: 9.34, loss_i: 6.375, loss_h: 1.843, loss_o: 2.501
Episode: 13594, episode steps: 8, avg. reward: 9.34, loss_i: 5.507, loss_h: 1.892, loss_o: 2.381
Episode: 13595, episode steps: 10, avg. reward: 9.34, loss_i: 7.04, loss_h: 1.887, loss_o: 2.166
Episode: 13596, episode steps: 9, avg. reward: 9.34, loss_i: 7.625, loss_h: 166.011, loss_o: 2.265
Episode: 13597, episode steps: 10, avg. reward: 9.34, loss_i: 6.64, loss_h: 2.031, loss_o: 2.062
Episode: 13598, episode steps: 8, avg. reward: 9.34, loss_i: 6.572, loss_h: 2.096, loss_o: 2.239
Episode: 13599, episode steps: 10, avg. reward: 9.34, loss_i: 5.467, loss_h: 213.523, loss_o: 2.474
Episode: 13600, episode steps: 10, avg. reward: 9.34, loss_i: 7.175, loss_h: 2.264, loss_o: 2.185
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True 

Episode: 13674, episode steps: 10, avg. reward: 9.35, loss_i: 6.445, loss_h: 1.684, loss_o: 1.466
Episode: 13675, episode steps: 10, avg. reward: 9.35, loss_i: 7.001, loss_h: 1.88, loss_o: 1.533
Episode: 13676, episode steps: 10, avg. reward: 9.35, loss_i: 6.326, loss_h: 1.511, loss_o: 1.422
Episode: 13677, episode steps: 11, avg. reward: 9.35, loss_i: 6.532, loss_h: 1.578, loss_o: 1.556
Episode: 13678, episode steps: 9, avg. reward: 9.35, loss_i: 7.662, loss_h: 2.031, loss_o: 1.547
Episode: 13679, episode steps: 9, avg. reward: 9.35, loss_i: 6.745, loss_h: 1.581, loss_o: 1.721
Episode: 13680, episode steps: 11, avg. reward: 9.35, loss_i: 7.526, loss_h: 228.076, loss_o: 1.962
Episode: 13681, episode steps: 10, avg. reward: 9.35, loss_i: 7.019, loss_h: 182.192, loss_o: 1.652
Episode: 13682, episode steps: 10, avg. reward: 9.35, loss_i: 7.166, loss_h: 1.334, loss_o: 1.525
Episode: 13683, episode steps: 10, avg. reward: 9.36, loss_i: 6.975, loss_h: 1.422, loss_o: 1.493
Episode: 13684, epi

Episode: 13758, episode steps: 10, avg. reward: 9.36, loss_i: 6.914, loss_h: 1.425, loss_o: 1.689
Episode: 13759, episode steps: 10, avg. reward: 9.36, loss_i: 7.477, loss_h: 1.8, loss_o: 1.488
Episode: 13760, episode steps: 10, avg. reward: 9.36, loss_i: 7.329, loss_h: 1.409, loss_o: 1.727
Episode: 13761, episode steps: 9, avg. reward: 9.36, loss_i: 7.791, loss_h: 1.39, loss_o: 1.657
Episode: 13762, episode steps: 10, avg. reward: 9.36, loss_i: 7.301, loss_h: 1.497, loss_o: 1.635
Episode: 13763, episode steps: 8, avg. reward: 9.36, loss_i: 7.624, loss_h: 1.641, loss_o: 1.779
Episode: 13764, episode steps: 9, avg. reward: 9.36, loss_i: 6.638, loss_h: 180.59, loss_o: 1.334
Episode: 13765, episode steps: 9, avg. reward: 9.36, loss_i: 6.98, loss_h: 179.408, loss_o: 1.469
Episode: 13766, episode steps: 10, avg. reward: 9.36, loss_i: 6.627, loss_h: 1.692, loss_o: 1.556
Episode: 13767, episode steps: 10, avg. reward: 9.36, loss_i: 7.187, loss_h: 1.587, loss_o: 1.713
Episode: 13768, episode s

Episode: 13842, episode steps: 8, avg. reward: 9.36, loss_i: 7.715, loss_h: 159.057, loss_o: 1.416
Episode: 13843, episode steps: 9, avg. reward: 9.36, loss_i: 6.078, loss_h: 151.144, loss_o: 1.51
Episode: 13844, episode steps: 10, avg. reward: 9.36, loss_i: 7.433, loss_h: 1.632, loss_o: 1.421
Episode: 13845, episode steps: 8, avg. reward: 9.36, loss_i: 8.355, loss_h: 1.744, loss_o: 1.691
Episode: 13846, episode steps: 9, avg. reward: 9.36, loss_i: 8.384, loss_h: 174.003, loss_o: 1.443
Episode: 13847, episode steps: 9, avg. reward: 9.36, loss_i: 7.768, loss_h: 154.508, loss_o: 1.733
Episode: 13848, episode steps: 10, avg. reward: 9.36, loss_i: 7.793, loss_h: 1.519, loss_o: 1.652
Episode: 13849, episode steps: 8, avg. reward: 9.36, loss_i: 9.052, loss_h: 117.412, loss_o: 1.699
Episode: 13850, episode steps: 10, avg. reward: 9.36, loss_i: 7.195, loss_h: 1.581, loss_o: 1.526
Episode: 13851, episode steps: 11, avg. reward: 9.36, loss_i: 6.982, loss_h: 1.798, loss_o: 1.56
Episode: 13852, ep

Episode: 13926, episode steps: 9, avg. reward: 9.36, loss_i: 7.448, loss_h: 170.484, loss_o: 1.787
Episode: 13927, episode steps: 8, avg. reward: 9.35, loss_i: 7.575, loss_h: 1.713, loss_o: 1.354
Episode: 13928, episode steps: 9, avg. reward: 9.35, loss_i: 7.154, loss_h: 1.591, loss_o: 1.46
Episode: 13929, episode steps: 10, avg. reward: 9.35, loss_i: 6.5, loss_h: 1.87, loss_o: 1.437
Episode: 13930, episode steps: 9, avg. reward: 9.35, loss_i: 7.016, loss_h: 1.987, loss_o: 1.366
Episode: 13931, episode steps: 10, avg. reward: 9.35, loss_i: 7.001, loss_h: 1.665, loss_o: 1.54
Episode: 13932, episode steps: 10, avg. reward: 9.36, loss_i: 8.22, loss_h: 233.636, loss_o: 1.5
Episode: 13933, episode steps: 9, avg. reward: 9.35, loss_i: 7.574, loss_h: 195.287, loss_o: 1.397
Episode: 13934, episode steps: 9, avg. reward: 9.35, loss_i: 7.318, loss_h: 1.735, loss_o: 1.815
Episode: 13935, episode steps: 10, avg. reward: 9.36, loss_i: 7.366, loss_h: 2.093, loss_o: 1.527
Episode: 13936, episode step

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 14000, episode steps: 9, avg. reward: 9.36, loss_i: 7.091, loss_h: 177.176, loss_o: 1.721
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True  True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 7.082986945493354, 'critic_2_loss': 7.098145727482107, 'policy_loss': -15.311527106497023, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'cnt': 1.0, 'critic_1_loss': 177.4490812383592, 'critic_2_loss': 176.9030959494412, 'policy_loss': -267.26223742961884, 'ent_loss': 0.0, 'alpha': 0.0}
o: {'critic_1_loss': 1.7245443397098117, 'critic_2_loss': 1.7176083591249254, 'policy_loss': -3.2426454491085477, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1

100%|██████████| 1000/1000 [01:55<00:00,  8.67it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.31, Max. Reward: 11.0
--------------------------------------------------------------------------------
Episode: 14001, episode steps: 9, avg. reward: 9.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 14002, episode steps: 10, avg. reward: 9.5, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 14003, episode steps: 9, avg. reward: 9.33, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 14004, episode steps: 9, avg. reward: 9.25, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 14005, episode steps: 8, avg. reward: 9.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 14006, episode steps: 11, avg. reward: 9.33, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 14007, episode steps: 10, avg. reward: 9.43, loss_i: 70.276, loss_h: nan, loss_o: 3.432
Episode: 14008, episode steps: 9, avg. reward: 9.38, loss_i: 7.136, loss_h: 170.861, loss_o: 1.656
Episode: 14009, episode steps: 9, avg. 

Episode: 14083, episode steps: 10, avg. reward: 9.45, loss_i: 7.978, loss_h: 201.682, loss_o: 1.705
Episode: 14084, episode steps: 9, avg. reward: 9.44, loss_i: 9.496, loss_h: 194.412, loss_o: 1.813
Episode: 14085, episode steps: 10, avg. reward: 9.45, loss_i: 8.984, loss_h: 199.655, loss_o: 1.563
Episode: 14086, episode steps: 9, avg. reward: 9.44, loss_i: 7.627, loss_h: 1.999, loss_o: 1.898
Episode: 14087, episode steps: 9, avg. reward: 9.44, loss_i: 7.003, loss_h: 1.789, loss_o: 1.754
Episode: 14088, episode steps: 10, avg. reward: 9.44, loss_i: 7.448, loss_h: 181.257, loss_o: 1.609
Episode: 14089, episode steps: 8, avg. reward: 9.43, loss_i: 8.203, loss_h: 175.584, loss_o: 1.912
Episode: 14090, episode steps: 10, avg. reward: 9.43, loss_i: 6.908, loss_h: 1.679, loss_o: 1.695
Episode: 14091, episode steps: 8, avg. reward: 9.42, loss_i: 8.273, loss_h: 145.103, loss_o: 1.666
Episode: 14092, episode steps: 9, avg. reward: 9.41, loss_i: 7.552, loss_h: 219.349, loss_o: 1.661
Episode: 140

Episode: 14166, episode steps: 9, avg. reward: 9.34, loss_i: 8.091, loss_h: 212.169, loss_o: 1.883
Episode: 14167, episode steps: 9, avg. reward: 9.34, loss_i: 8.925, loss_h: 191.672, loss_o: 1.772
Episode: 14168, episode steps: 10, avg. reward: 9.35, loss_i: 7.826, loss_h: 188.917, loss_o: 1.726
Episode: 14169, episode steps: 8, avg. reward: 9.34, loss_i: 6.822, loss_h: 174.625, loss_o: 1.749
Episode: 14170, episode steps: 10, avg. reward: 9.34, loss_i: 9.818, loss_h: 185.991, loss_o: 1.761
Episode: 14171, episode steps: 10, avg. reward: 9.35, loss_i: 7.668, loss_h: 185.561, loss_o: 1.553
Episode: 14172, episode steps: 9, avg. reward: 9.34, loss_i: 7.986, loss_h: 165.251, loss_o: 1.646
Episode: 14173, episode steps: 9, avg. reward: 9.34, loss_i: 8.742, loss_h: 194.848, loss_o: 1.572
Episode: 14174, episode steps: 9, avg. reward: 9.34, loss_i: 7.336, loss_h: 185.323, loss_o: 1.954
Episode: 14175, episode steps: 10, avg. reward: 9.34, loss_i: 7.541, loss_h: 1.694, loss_o: 1.739
Episode:

Episode: 14249, episode steps: 10, avg. reward: 9.31, loss_i: 7.486, loss_h: 234.173, loss_o: 1.626
Episode: 14250, episode steps: 9, avg. reward: 9.31, loss_i: 9.014, loss_h: 171.329, loss_o: 1.743
Episode: 14251, episode steps: 10, avg. reward: 9.31, loss_i: 8.207, loss_h: 143.733, loss_o: 1.6
Episode: 14252, episode steps: 10, avg. reward: 9.32, loss_i: 8.984, loss_h: 167.169, loss_o: 1.342
Episode: 14253, episode steps: 9, avg. reward: 9.32, loss_i: 7.288, loss_h: 178.597, loss_o: 1.68
Episode: 14254, episode steps: 9, avg. reward: 9.31, loss_i: 9.497, loss_h: 157.958, loss_o: 1.696
Episode: 14255, episode steps: 9, avg. reward: 9.31, loss_i: 7.41, loss_h: 144.462, loss_o: 1.44
Episode: 14256, episode steps: 9, avg. reward: 9.31, loss_i: 7.843, loss_h: 147.505, loss_o: 1.56
Episode: 14257, episode steps: 10, avg. reward: 9.32, loss_i: 8.113, loss_h: 1.607, loss_o: 1.643
Episode: 14258, episode steps: 10, avg. reward: 9.32, loss_i: 8.393, loss_h: 160.298, loss_o: 1.849
Episode: 1425

Episode: 14332, episode steps: 8, avg. reward: 9.33, loss_i: 7.428, loss_h: 1.187, loss_o: 1.742
Episode: 14333, episode steps: 9, avg. reward: 9.33, loss_i: 10.12, loss_h: 127.845, loss_o: 1.869
Episode: 14334, episode steps: 9, avg. reward: 9.33, loss_i: 8.284, loss_h: 139.022, loss_o: 1.681
Episode: 14335, episode steps: 10, avg. reward: 9.33, loss_i: 7.348, loss_h: 144.033, loss_o: 1.582
Episode: 14336, episode steps: 10, avg. reward: 9.33, loss_i: 7.98, loss_h: 149.641, loss_o: 1.79
Episode: 14337, episode steps: 9, avg. reward: 9.33, loss_i: 8.892, loss_h: 167.963, loss_o: 1.849
Episode: 14338, episode steps: 9, avg. reward: 9.33, loss_i: 9.06, loss_h: 1.444, loss_o: 1.565
Episode: 14339, episode steps: 11, avg. reward: 9.33, loss_i: 8.337, loss_h: 184.576, loss_o: 1.543
Episode: 14340, episode steps: 8, avg. reward: 9.33, loss_i: 8.977, loss_h: 135.471, loss_o: 1.849
Episode: 14341, episode steps: 8, avg. reward: 9.33, loss_i: 8.248, loss_h: 126.039, loss_o: 1.563
Episode: 14342

Episode: 14414, episode steps: 11, avg. reward: 9.32, loss_i: 6.579, loss_h: 167.972, loss_o: 1.923
Episode: 14415, episode steps: 10, avg. reward: 9.32, loss_i: 5.944, loss_h: 125.535, loss_o: 2.069
Episode: 14416, episode steps: 10, avg. reward: 9.32, loss_i: 6.936, loss_h: 121.748, loss_o: 1.927
Episode: 14417, episode steps: 10, avg. reward: 9.32, loss_i: 6.72, loss_h: 126.991, loss_o: 2.107
Episode: 14418, episode steps: 10, avg. reward: 9.33, loss_i: 6.82, loss_h: 109.54, loss_o: 1.897
Episode: 14419, episode steps: 9, avg. reward: 9.32, loss_i: 6.088, loss_h: 96.331, loss_o: 1.929
Episode: 14420, episode steps: 8, avg. reward: 9.32, loss_i: 6.289, loss_h: 89.992, loss_o: 1.933
Episode: 14421, episode steps: 10, avg. reward: 9.32, loss_i: 6.598, loss_h: 95.186, loss_o: 2.002
Episode: 14422, episode steps: 9, avg. reward: 9.32, loss_i: 7.064, loss_h: 139.422, loss_o: 2.083
Episode: 14423, episode steps: 8, avg. reward: 9.32, loss_i: 6.98, loss_h: 115.31, loss_o: 2.076
Episode: 144