In [1]:
from sac import SAC
from replay_memory import ReplayMemory
import gym
from gym import spaces
from addict import Dict
import numpy as np
import itertools
from scipy.special import softmax
import random
from tqdm import tqdm

In [2]:
WARM_UP = 0
MEMORY_SIZE = 10000
ACT_THRESHOLD = 0
BATCH_SIZE = 64
UPDATES_PER_STEP = 1
PRINT_FREQ = 1000
TEST_EPISODES = 1000
MIN_REQUIRED_UPDATES = 0

In [3]:
num_inputs = 4
num_hidden = 4
num_outputs = 1
bandwidth = 3

In [4]:
env = gym.make('CartPole-v0')
env.action_space.shape = (1,)
env.action_space.high = np.array([1])
env.action_space.low = np.array([0])

In [5]:
input_layer_action_space = gym.spaces.Box(low=0, high=1, shape=(num_hidden + bandwidth,))
hidden_layer_action_space = gym.spaces.Box(low=0, high=1, shape=(num_outputs + bandwidth,))
output_layer_action_space = env.action_space



In [6]:
args = Dict()
args.gamma = 0.99
args.tau = 1
args.alpha = 0.0
args.policy = 'Gaussian'
args.target_update_interval = 400
args.automatic_entropy_tuning = False
args.cuda = False
args.hidden_size = 256
args.lr = 0.003

In [7]:
input_layer = [SAC(1, input_layer_action_space, args) for _ in range(num_inputs)]
input_memory = [ReplayMemory(MEMORY_SIZE) for _ in range(num_inputs)]
input_updates = [0 for _ in range(num_inputs)]
hidden_layer = [SAC(bandwidth, hidden_layer_action_space, args) for _ in range(num_hidden)]
hidden_memory = [ReplayMemory(MEMORY_SIZE) for _ in range(num_hidden)]
hidden_updates = [0 for _ in range(num_hidden)]
output_layer = [SAC(bandwidth, output_layer_action_space, args) for _ in range(num_outputs)]
output_memory = [ReplayMemory(MEMORY_SIZE) for _ in range(num_outputs)]
output_updates = [0 for _ in range(num_outputs)]

In [8]:
def eval_layers(input_state):
    isa = [tuple() for _ in range(num_inputs)]
    hsa = [tuple() for _ in range(num_hidden)]
    osa = [tuple() for _ in range(num_outputs)]
    input_actions = [agent.select_action(input_state[i:i+1]) for i, agent in enumerate(input_layer)]
    for i, input_action in enumerate(input_actions):
        input_action[:num_hidden] = softmax(input_action[:num_hidden])
        if max(input_action[:num_hidden]) > ACT_THRESHOLD:
            isa[i] = (input_state[i:i+1], input_action, True)
            input_actions[i] = (np.argmax(input_action[:num_hidden]), input_action[num_hidden:])
        else:
            isa[i] = (input_state[i:i+1], input_action, False)
            input_actions[i] = tuple()
    hidden_state = [tuple() for _ in range(num_hidden)]
    for input_action in input_actions:
        try:
            hidden_i, hidden_msg = input_action
        except:
            continue
        try:
            hidden_state[hidden_i] += hidden_msg
        except:
            hidden_state[hidden_i] = hidden_msg
    hidden_actions = []
    for i, _hidden_state in enumerate(hidden_state):
        if len(_hidden_state):
            hidden_action = hidden_layer[i].select_action(softmax(hidden_state[i])) 
            if hidden_action[0] > ACT_THRESHOLD:
                hsa[i] = (_hidden_state, hidden_action, True)
                hidden_actions.append((0, hidden_action[1:]))
            else:
                hsa[i] = (_hidden_state, hidden_action, False)
                hidden_actions.append(tuple())
    output_state = [tuple() for _ in range(num_outputs)]
    for hidden_action in hidden_actions:
        try:
            output_i, output_msg = hidden_action
        except:
            continue
        try:
            output_state[output_i] += output_msg
        except:
            output_state[output_i] = output_msg
    output_actions = [agent.select_action(softmax(output_state[i])) for i, agent in enumerate(output_layer) if len(output_state[i])]
    if output_actions:
        osa = [(output_state[0], output_actions[0], True)]
    else:
        osa = [tuple()]
    inner_activations = Dict()
    inner_activations.isa = isa
    inner_activations.osa = osa
    inner_activations.hsa = hsa
    try:
        if output_actions[0] > 0.5:
            return 1, inner_activations
        else:
            return 0, inner_activations
    except:
        return random.randint(0, 1), inner_activations

In [9]:
def sample_layers(input_state):
    isa = [tuple() for _ in range(num_inputs)]
    hsa = [tuple() for _ in range(num_hidden)]
    osa = [tuple() for _ in range(num_outputs)]
    input_actions = [input_layer_action_space.sample() for _ in input_layer]
    for i, input_action in enumerate(input_actions):
        input_action[:num_hidden] = softmax(input_action[:num_hidden])
        if max(input_action[:num_hidden]) > ACT_THRESHOLD:
            isa[i] = (input_state[i:i+1], input_action, True)
            input_actions[i] = (np.argmax(input_action[:num_hidden]), input_action[num_hidden:])
        else:
            isa[i] = (input_state[i:i+1], input_action, False)
            input_actions[i] = tuple()
    hidden_state = [tuple() for _ in range(num_hidden)]
    for input_action in input_actions:
        try:
            hidden_i, hidden_msg = input_action
        except:
            continue
        try:
            hidden_state[hidden_i] += hidden_msg
        except:
            hidden_state[hidden_i] = hidden_msg
    hidden_actions = []
    for i, _hidden_state in enumerate(hidden_state):
        if len(_hidden_state):
            hidden_action = hidden_layer_action_space.sample()
            if hidden_action[0] > ACT_THRESHOLD:
                hidden_actions.append((0, hidden_action[1:]))
                hsa[i] = (_hidden_state, hidden_action, True)
            else:
                hidden_actions.append(tuple())
                hsa[i] = (_hidden_state, hidden_action, False)
    output_state = [tuple() for _ in range(num_outputs)]
    for hidden_action in hidden_actions:
        try:
            output_i, output_msg = hidden_action
        except:
            continue
        try:
            output_state[output_i] += output_msg
        except:
            output_state[output_i] = output_msg
    output_actions = [np.array([output_layer_action_space.sample()]) for i, _ in enumerate(output_layer) if len(output_state[i])]
    osa = [(output_state[0], output_actions[0], True)]
    inner_activations = Dict()
    inner_activations.isa = isa
    inner_activations.osa = osa
    inner_activations.hsa = hsa
    try:
        if output_actions[0] > 0.5:
            return 1, inner_activations
        else:
            return 0, inner_activations
    except:
        return random.randint(0, 1), inner_activations

In [10]:
def push_memory(inner_activations, reward, next_state, mask):
    isa = inner_activations.isa
    hsa = inner_activations.hsa
    osa = inner_activations.osa
    for i, ((state, action, flag), mem) in enumerate(zip(isa, input_memory)):
        _next_state = next_state[i:i+1]
        if flag:
            mem.push(state, action, reward, _next_state, mask)
        else:
            mem.push(state, action, 0, _next_state, mask)
    next_action, next_inner_activations = eval_layers(next_state)
    for hsa, mem, next_hsa in zip(hsa, hidden_memory, next_inner_activations.hsa):
        try:
            state, action, flag = hsa
            _next_state, _, _ = next_hsa
        except:
            continue
        if flag:
            mem.push(state, action, reward, _next_state, mask)
        else:
            mem.push(state, action, 0, _next_state, mask)
    for osa, mem, next_osa in zip(osa, output_memory, next_inner_activations.osa):
        try:
            state, action, flag = osa
            _next_state, _, _ = next_osa
        except:
            continue
        if flag:
            mem.push(state, action, reward, _next_state, mask)
        else:
            mem.push(state, action, 0, _next_state, mask)
    return next_action, next_inner_activations

In [11]:
def norm_stats(stats):
    for k in stats:
        stats[k] /= stats.cnt
    return stats
def average_stats(lst):
    avg = Dict()
    for stats in lst:
        for k in stats:
            avg[k] += stats[k]
        avg.cnt += 1
    return norm_stats(avg)
def get_avg_loss(train_stats):
    try:
        input_loss = (train_stats.input.critic_1_loss + train_stats.input.critic_2_loss)/2
    except:
        input_loss = float('nan')
    try:
        hidden_loss = (train_stats.hidden.critic_1_loss + train_stats.hidden.critic_2_loss)/2
    except:
        hidden_loss = float('nan')
    try:
        output_loss = (train_stats.output.critic_1_loss + train_stats.output.critic_2_loss)/2
    except:
        output_loss = float('nan')
    return input_loss, hidden_loss, output_loss

In [None]:
total_steps = 0
last_total_steps = 0
total_reward = 0.0
act_stats = Dict()
act_stats.input = np.array([0.0 for _ in input_layer])
act_stats.hidden = np.array([0.0 for _ in hidden_layer])
act_stats.output = np.array([0.0 for _ in output_layer])
for i_episode in itertools.count(1):
    episode_reward = 0
    episode_steps = 0
    done = False
    state = env.reset()
    inner_activations = {}
    action = None
    train_stats = Dict()
    train_stats.input = []
    train_stats.hidden = []
    train_stats.output = []
    while not done:
        if WARM_UP > total_steps:
            action, inner_activations = sample_layers(state)  # Sample random action
        else:
            if total_steps == WARM_UP:
                print('--WARM UP ENDED--'*3)
            if not inner_activations:
                action, inner_activations = eval_layers(state)  # Sample action from policy
        for i, isa in enumerate(inner_activations.isa):
            if len(isa):
                act_stats.input[i] += 1
        for i, hsa in enumerate(inner_activations.hsa):
            if len(hsa):
                act_stats.hidden[i] += 1
        for i, osa in enumerate(inner_activations.osa):
            if len(osa):
                act_stats.output[i] += 1

        # Number of updates per step in environment
        for i in range(UPDATES_PER_STEP):
            # Update parameters of all the networks
            for j, (agent, memory, updates) in enumerate(zip(input_layer, input_memory, input_updates)):
                input_stats = Dict()
                if len(memory) > BATCH_SIZE and inner_activations.isa[j]:
                    critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = agent.update_parameters(memory, BATCH_SIZE, updates)
                    input_stats.critic_1_loss += critic_1_loss
                    input_stats.critic_2_loss += critic_2_loss
                    input_stats.policy_loss += policy_loss
                    input_stats.ent_loss += ent_loss
                    input_stats.alpha += alpha
                    input_stats.cnt += 1
                    input_updates[j] += 1
                norm_stats(input_stats)
                train_stats.input.append(input_stats)
            for j, (agent, memory, updates) in enumerate(zip(hidden_layer, hidden_memory, hidden_updates)):
                hidden_stats = Dict()
                if len(memory) > BATCH_SIZE and inner_activations.hsa[j]:
                    critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = agent.update_parameters(memory, BATCH_SIZE, updates)
                    hidden_stats.critic_1_loss += critic_1_loss
                    hidden_stats.critic_2_loss += critic_2_loss
                    hidden_stats.policy_loss += policy_loss
                    hidden_stats.ent_loss += ent_loss
                    hidden_stats.alpha += alpha
                    hidden_stats.cnt += 1
                    hidden_updates[j] += 1
                norm_stats(hidden_stats)
                train_stats.hidden.append(hidden_stats)
            for j, (agent, memory, updates) in enumerate(zip(output_layer, output_memory, output_updates)):
                output_stats = Dict()
                if len(memory) > BATCH_SIZE and inner_activations.osa[j]:
                    critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = agent.update_parameters(memory, BATCH_SIZE, updates)
                    output_stats.critic_1_loss += critic_1_loss
                    output_stats.critic_2_loss += critic_2_loss
                    output_stats.policy_loss += policy_loss
                    output_stats.ent_loss += ent_loss
                    output_stats.alpha += alpha
                    output_stats.cnt += 1
                    output_updates[j] += 1
                norm_stats(output_stats)
                train_stats.output.append(output_stats)

        next_state, reward, done, _ = env.step(action) # Step
        episode_steps += 1
        total_steps += 1
        episode_reward += reward

        # Ignore the "done" signal if it comes from hitting the time horizon.
        # (https://github.com/openai/spinningup/blob/master/spinup/algos/sac/sac.py)
        mask = 1 if episode_steps == env._max_episode_steps else float(not done)
        action, inner_activations = push_memory(inner_activations, reward, next_state, mask)

        state = next_state
    total_reward += episode_reward
    train_stats.input = average_stats(train_stats.input)
    train_stats.hidden = average_stats(train_stats.hidden)
    train_stats.output = average_stats(train_stats.output)
    loss = get_avg_loss(train_stats)
    print("Episode: {}, episode steps: {}, avg. reward: {}, loss_i: {}, loss_h: {}, loss_o: {}".format(i_episode, episode_steps, round(total_reward/((i_episode % PRINT_FREQ) or PRINT_FREQ), 2), *list(map(lambda x : round(x, 3), loss))))
    if i_episode % args.target_update_interval == 0 and total_steps > 0:
        ptr = -1
        input_update_map = np.array(input_updates) >= MIN_REQUIRED_UPDATES
        hidden_update_map = np.array(hidden_updates) >= MIN_REQUIRED_UPDATES
        output_update_map = np.array(output_updates) >= MIN_REQUIRED_UPDATES
        for j, agent, memory, updates in zip(list(range(num_inputs)) + list(range(num_hidden)) + list(range(num_outputs)), input_layer + hidden_layer + output_layer, input_memory + hidden_memory + output_memory, input_updates + hidden_updates + output_updates):
            if j == 0:
                ptr += 1
            if updates >= MIN_REQUIRED_UPDATES:
                agent.update_target()
                memory.empty()
                if ptr == 0:
                    input_updates[j] = 0
                elif ptr == 1:
                    hidden_updates[j] = 0
                else:
                    output_updates[j] = 0
        print('--TARGET UPDATED--'*3)
        print('i: %s' % input_update_map)
        print('h: %s' % hidden_update_map)
        print('o: %s' % output_update_map)
        print('-'*80)
    if i_episode % PRINT_FREQ == 0:
        act_stats.input /= total_steps - last_total_steps
        act_stats.hidden /= total_steps - last_total_steps
        act_stats.output /= total_steps - last_total_steps
        last_total_steps = total_steps
        total_reward = 0
        print("-"*80)
        print('Train episodes: %d' % PRINT_FREQ)
        print('-'*80)
        print('LOSS')
        print('i: %s' % str(train_stats.input))
        print('h: %s' % str(train_stats.hidden))
        print('o: %s' % str(train_stats.output))
        print('ACTIVATIONS')
        print('i: %s' % str(list(map(lambda x : round(x, 2), act_stats.input))))
        print('h: %s' % str(list(map(lambda x : round(x, 2), act_stats.hidden))))
        print('o: %s' % str(list(map(lambda x : round(x, 2), act_stats.output))))
        print('UPDATES')
        print('i: %s' % input_updates)
        print('h: %s' % hidden_updates)
        print('o: %s' % output_updates)
        print('MEMORY')
        print('i: %s' % str(list(map(len, input_memory))))
        print('h: %s' % str(list(map(len, hidden_memory))))
        print('o: %s' % str(list(map(len, output_memory))))
        act_stats.input = np.array([0.0 for _ in input_layer])
        act_stats.hidden = np.array([0.0 for _ in hidden_layer])
        act_stats.output = np.array([0.0 for _ in output_layer])
        print('\n')
        avg_reward = 0.
        max_reward = 0.
        for _  in tqdm(range(TEST_EPISODES)):
            state = env.reset()
            episode_reward = 0
            done = False
            while not done:
                action, _ = eval_layers(state)

                next_state, reward, done, _ = env.step(action)
                episode_reward += reward


                state = next_state
            avg_reward += episode_reward
            max_reward = max(episode_reward, max_reward)
        avg_reward /= TEST_EPISODES

        print("-"*80)
        print("Test Episodes: {}, Avg. Reward: {}, Max. Reward: {}".format(TEST_EPISODES, round(avg_reward, 2), round(max_reward, 2)))
        print("-"*80)

--WARM UP ENDED----WARM UP ENDED----WARM UP ENDED--
Episode: 1, episode steps: 25, avg. reward: 25.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2, episode steps: 20, avg. reward: 22.5, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 3, episode steps: 13, avg. reward: 19.33, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 4, episode steps: 9, avg. reward: 16.75, loss_i: 7.374, loss_h: nan, loss_o: 1.515
Episode: 5, episode steps: 16, avg. reward: 16.6, loss_i: 0.283, loss_h: nan, loss_o: 0.149
Episode: 6, episode steps: 23, avg. reward: 17.67, loss_i: 0.103, loss_h: nan, loss_o: 0.05
Episode: 7, episode steps: 11, avg. reward: 16.71, loss_i: 0.048, loss_h: nan, loss_o: 0.043
Episode: 8, episode steps: 13, avg. reward: 16.25, loss_i: 0.033, loss_h: 5.882, loss_o: 0.038
Episode: 9, episode steps: 15, avg. reward: 16.11, loss_i: 0.024, loss_h: 12.319, loss_o: 0.038
Episode: 10, episode steps: 17, avg. reward: 16.2, loss_i: 0.02, loss_h: 0.098, loss_o: 0.03
Episode: 11, episode steps: 23

Episode: 87, episode steps: 10, avg. reward: 15.55, loss_i: 0.002, loss_h: 0.004, loss_o: 0.005
Episode: 88, episode steps: 11, avg. reward: 15.5, loss_i: 0.002, loss_h: 0.004, loss_o: 0.005
Episode: 89, episode steps: 10, avg. reward: 15.44, loss_i: 0.002, loss_h: 0.005, loss_o: 0.004
Episode: 90, episode steps: 12, avg. reward: 15.4, loss_i: 0.002, loss_h: 0.004, loss_o: 0.004
Episode: 91, episode steps: 12, avg. reward: 15.36, loss_i: 0.002, loss_h: 0.312, loss_o: 0.005
Episode: 92, episode steps: 8, avg. reward: 15.28, loss_i: 0.002, loss_h: 0.004, loss_o: 0.005
Episode: 93, episode steps: 8, avg. reward: 15.2, loss_i: 0.002, loss_h: 0.004, loss_o: 0.004
Episode: 94, episode steps: 13, avg. reward: 15.18, loss_i: 0.002, loss_h: 0.004, loss_o: 0.005
Episode: 95, episode steps: 10, avg. reward: 15.13, loss_i: 0.002, loss_h: 0.293, loss_o: 0.005
Episode: 96, episode steps: 8, avg. reward: 15.05, loss_i: 0.002, loss_h: 0.004, loss_o: 0.005
Episode: 97, episode steps: 11, avg. reward: 1

Episode: 172, episode steps: 8, avg. reward: 13.35, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 173, episode steps: 10, avg. reward: 13.34, loss_i: 0.001, loss_h: 0.003, loss_o: 0.002
Episode: 174, episode steps: 12, avg. reward: 13.33, loss_i: 0.001, loss_h: 0.206, loss_o: 0.003
Episode: 175, episode steps: 9, avg. reward: 13.3, loss_i: 0.001, loss_h: 0.003, loss_o: 0.002
Episode: 176, episode steps: 12, avg. reward: 13.3, loss_i: 0.001, loss_h: 0.002, loss_o: 0.003
Episode: 177, episode steps: 12, avg. reward: 13.29, loss_i: 0.001, loss_h: 0.003, loss_o: 0.002
Episode: 178, episode steps: 11, avg. reward: 13.28, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 179, episode steps: 20, avg. reward: 13.31, loss_i: 0.001, loss_h: 0.002, loss_o: 0.003
Episode: 180, episode steps: 12, avg. reward: 13.31, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 181, episode steps: 10, avg. reward: 13.29, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 182, episode steps: 12, a

Episode: 257, episode steps: 12, avg. reward: 12.45, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 258, episode steps: 11, avg. reward: 12.45, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 259, episode steps: 10, avg. reward: 12.44, loss_i: 0.001, loss_h: 0.109, loss_o: 0.002
Episode: 260, episode steps: 10, avg. reward: 12.43, loss_i: 0.001, loss_h: 0.143, loss_o: 0.002
Episode: 261, episode steps: 11, avg. reward: 12.42, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 262, episode steps: 9, avg. reward: 12.41, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 263, episode steps: 10, avg. reward: 12.4, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 264, episode steps: 10, avg. reward: 12.39, loss_i: 0.001, loss_h: 0.125, loss_o: 0.002
Episode: 265, episode steps: 11, avg. reward: 12.38, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 266, episode steps: 8, avg. reward: 12.37, loss_i: 0.001, loss_h: 0.001, loss_o: 0.002
Episode: 267, episode steps: 9, a

Episode: 342, episode steps: 11, avg. reward: 11.88, loss_i: 0.001, loss_h: 0.001, loss_o: 0.002
Episode: 343, episode steps: 11, avg. reward: 11.88, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 344, episode steps: 12, avg. reward: 11.88, loss_i: 0.001, loss_h: 0.001, loss_o: 0.002
Episode: 345, episode steps: 12, avg. reward: 11.88, loss_i: 0.001, loss_h: 0.001, loss_o: 0.002
Episode: 346, episode steps: 10, avg. reward: 11.87, loss_i: 0.001, loss_h: 0.124, loss_o: 0.002
Episode: 347, episode steps: 10, avg. reward: 11.87, loss_i: 0.001, loss_h: 0.077, loss_o: 0.002
Episode: 348, episode steps: 10, avg. reward: 11.86, loss_i: 0.001, loss_h: 0.092, loss_o: 0.002
Episode: 349, episode steps: 10, avg. reward: 11.86, loss_i: 0.001, loss_h: 0.002, loss_o: 0.002
Episode: 350, episode steps: 9, avg. reward: 11.85, loss_i: 0.001, loss_h: 0.001, loss_o: 0.002
Episode: 351, episode steps: 9, avg. reward: 11.84, loss_i: 0.001, loss_h: 0.113, loss_o: 0.002
Episode: 352, episode steps: 10,

Episode: 426, episode steps: 9, avg. reward: 11.6, loss_i: 0.097, loss_h: 0.034, loss_o: 0.071
Episode: 427, episode steps: 8, avg. reward: 11.59, loss_i: 0.091, loss_h: 2.017, loss_o: 0.063
Episode: 428, episode steps: 9, avg. reward: 11.58, loss_i: 0.095, loss_h: 0.035, loss_o: 0.072
Episode: 429, episode steps: 11, avg. reward: 11.58, loss_i: 0.094, loss_h: 0.036, loss_o: 0.059
Episode: 430, episode steps: 10, avg. reward: 11.58, loss_i: 0.1, loss_h: 0.039, loss_o: 0.057
Episode: 431, episode steps: 11, avg. reward: 11.58, loss_i: 0.099, loss_h: 0.038, loss_o: 0.062
Episode: 432, episode steps: 10, avg. reward: 11.57, loss_i: 0.092, loss_h: 2.766, loss_o: 0.058
Episode: 433, episode steps: 12, avg. reward: 11.57, loss_i: 0.091, loss_h: 0.039, loss_o: 0.058
Episode: 434, episode steps: 11, avg. reward: 11.57, loss_i: 0.09, loss_h: 0.035, loss_o: 0.06
Episode: 435, episode steps: 11, avg. reward: 11.57, loss_i: 0.094, loss_h: 2.879, loss_o: 0.062
Episode: 436, episode steps: 10, avg. 

Episode: 512, episode steps: 9, avg. reward: 11.36, loss_i: 0.083, loss_h: 0.034, loss_o: 0.058
Episode: 513, episode steps: 9, avg. reward: 11.35, loss_i: 0.084, loss_h: 0.036, loss_o: 0.053
Episode: 514, episode steps: 8, avg. reward: 11.35, loss_i: 0.079, loss_h: 1.99, loss_o: 0.065
Episode: 515, episode steps: 10, avg. reward: 11.34, loss_i: 0.083, loss_h: 2.344, loss_o: 0.05
Episode: 516, episode steps: 10, avg. reward: 11.34, loss_i: 0.086, loss_h: 2.382, loss_o: 0.063
Episode: 517, episode steps: 10, avg. reward: 11.34, loss_i: 0.08, loss_h: 0.032, loss_o: 0.063
Episode: 518, episode steps: 9, avg. reward: 11.33, loss_i: 0.087, loss_h: 1.997, loss_o: 0.045
Episode: 519, episode steps: 9, avg. reward: 11.33, loss_i: 0.083, loss_h: 2.169, loss_o: 0.069
Episode: 520, episode steps: 9, avg. reward: 11.32, loss_i: 0.084, loss_h: 1.736, loss_o: 0.062
Episode: 521, episode steps: 10, avg. reward: 11.32, loss_i: 0.082, loss_h: 2.196, loss_o: 0.056
Episode: 522, episode steps: 14, avg. r

Episode: 598, episode steps: 11, avg. reward: 11.22, loss_i: 0.076, loss_h: 0.032, loss_o: 0.053
Episode: 599, episode steps: 10, avg. reward: 11.22, loss_i: 0.075, loss_h: 0.034, loss_o: 0.051
Episode: 600, episode steps: 11, avg. reward: 11.22, loss_i: 0.079, loss_h: 0.032, loss_o: 0.053
Episode: 601, episode steps: 10, avg. reward: 11.22, loss_i: 0.078, loss_h: 0.036, loss_o: 0.063
Episode: 602, episode steps: 9, avg. reward: 11.22, loss_i: 0.075, loss_h: 0.032, loss_o: 0.06
Episode: 603, episode steps: 9, avg. reward: 11.21, loss_i: 0.082, loss_h: 2.148, loss_o: 0.05
Episode: 604, episode steps: 8, avg. reward: 11.21, loss_i: 0.076, loss_h: 2.122, loss_o: 0.069
Episode: 605, episode steps: 9, avg. reward: 11.2, loss_i: 0.076, loss_h: 2.205, loss_o: 0.05
Episode: 606, episode steps: 11, avg. reward: 11.2, loss_i: 0.082, loss_h: 0.035, loss_o: 0.054
Episode: 607, episode steps: 9, avg. reward: 11.2, loss_i: 0.08, loss_h: 1.843, loss_o: 0.057
Episode: 608, episode steps: 8, avg. rewar

Episode: 684, episode steps: 9, avg. reward: 11.05, loss_i: 0.071, loss_h: 2.444, loss_o: 0.064
Episode: 685, episode steps: 11, avg. reward: 11.05, loss_i: 0.072, loss_h: 3.006, loss_o: 0.048
Episode: 686, episode steps: 9, avg. reward: 11.05, loss_i: 0.078, loss_h: 0.031, loss_o: 0.065
Episode: 687, episode steps: 8, avg. reward: 11.04, loss_i: 0.067, loss_h: 0.03, loss_o: 0.053
Episode: 688, episode steps: 10, avg. reward: 11.04, loss_i: 0.076, loss_h: 0.036, loss_o: 0.054
Episode: 689, episode steps: 10, avg. reward: 11.04, loss_i: 0.074, loss_h: 0.03, loss_o: 0.05
Episode: 690, episode steps: 10, avg. reward: 11.04, loss_i: 0.069, loss_h: 0.034, loss_o: 0.06
Episode: 691, episode steps: 11, avg. reward: 11.04, loss_i: 0.065, loss_h: 2.609, loss_o: 0.05
Episode: 692, episode steps: 10, avg. reward: 11.03, loss_i: 0.074, loss_h: 2.318, loss_o: 0.057
Episode: 693, episode steps: 10, avg. reward: 11.03, loss_i: 0.069, loss_h: 0.034, loss_o: 0.06
Episode: 694, episode steps: 10, avg. r

Episode: 770, episode steps: 9, avg. reward: 10.92, loss_i: 0.071, loss_h: 2.226, loss_o: 0.055
Episode: 771, episode steps: 9, avg. reward: 10.92, loss_i: 0.065, loss_h: 2.146, loss_o: 0.06
Episode: 772, episode steps: 9, avg. reward: 10.92, loss_i: 0.071, loss_h: 0.033, loss_o: 0.053
Episode: 773, episode steps: 11, avg. reward: 10.92, loss_i: 0.069, loss_h: 0.032, loss_o: 0.047
Episode: 774, episode steps: 9, avg. reward: 10.91, loss_i: 0.065, loss_h: 0.032, loss_o: 0.041
Episode: 775, episode steps: 8, avg. reward: 10.91, loss_i: 0.072, loss_h: 0.036, loss_o: 0.072
Episode: 776, episode steps: 8, avg. reward: 10.91, loss_i: 0.065, loss_h: 2.104, loss_o: 0.055
Episode: 777, episode steps: 9, avg. reward: 10.9, loss_i: 0.073, loss_h: 2.034, loss_o: 0.06
Episode: 778, episode steps: 9, avg. reward: 10.9, loss_i: 0.074, loss_h: 0.029, loss_o: 0.048
Episode: 779, episode steps: 9, avg. reward: 10.9, loss_i: 0.067, loss_h: 1.737, loss_o: 0.062
Episode: 780, episode steps: 12, avg. reward

Episode: 854, episode steps: 8, avg. reward: 10.81, loss_i: 0.355, loss_h: 0.123, loss_o: 0.199
Episode: 855, episode steps: 10, avg. reward: 10.81, loss_i: 0.331, loss_h: 9.392, loss_o: 0.186
Episode: 856, episode steps: 10, avg. reward: 10.81, loss_i: 0.366, loss_h: 8.799, loss_o: 0.176
Episode: 857, episode steps: 10, avg. reward: 10.81, loss_i: 0.346, loss_h: 8.58, loss_o: 0.211
Episode: 858, episode steps: 8, avg. reward: 10.81, loss_i: 0.365, loss_h: 6.538, loss_o: 0.197
Episode: 859, episode steps: 8, avg. reward: 10.8, loss_i: 0.321, loss_h: 7.321, loss_o: 0.139
Episode: 860, episode steps: 8, avg. reward: 10.8, loss_i: 0.341, loss_h: 7.905, loss_o: 0.201
Episode: 861, episode steps: 9, avg. reward: 10.8, loss_i: 0.346, loss_h: 0.151, loss_o: 0.176
Episode: 862, episode steps: 12, avg. reward: 10.8, loss_i: 0.355, loss_h: 12.772, loss_o: 0.208
Episode: 863, episode steps: 10, avg. reward: 10.8, loss_i: 0.344, loss_h: 0.117, loss_o: 0.177
Episode: 864, episode steps: 10, avg. re

Episode: 940, episode steps: 9, avg. reward: 10.73, loss_i: 0.355, loss_h: 0.144, loss_o: 0.185
Episode: 941, episode steps: 9, avg. reward: 10.73, loss_i: 0.369, loss_h: 0.129, loss_o: 0.177
Episode: 942, episode steps: 10, avg. reward: 10.73, loss_i: 0.32, loss_h: 10.776, loss_o: 0.156
Episode: 943, episode steps: 10, avg. reward: 10.73, loss_i: 0.316, loss_h: 9.248, loss_o: 0.2
Episode: 944, episode steps: 10, avg. reward: 10.72, loss_i: 0.34, loss_h: 0.13, loss_o: 0.173
Episode: 945, episode steps: 9, avg. reward: 10.72, loss_i: 0.333, loss_h: 0.13, loss_o: 0.193
Episode: 946, episode steps: 8, avg. reward: 10.72, loss_i: 0.295, loss_h: 0.123, loss_o: 0.154
Episode: 947, episode steps: 8, avg. reward: 10.72, loss_i: 0.34, loss_h: 6.315, loss_o: 0.172
Episode: 948, episode steps: 10, avg. reward: 10.72, loss_i: 0.335, loss_h: 8.509, loss_o: 0.219
Episode: 949, episode steps: 11, avg. reward: 10.72, loss_i: 0.343, loss_h: 10.336, loss_o: 0.195
Episode: 950, episode steps: 11, avg. re

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 1000, episode steps: 10, avg. reward: 10.68, loss_i: 0.332, loss_h: 0.132, loss_o: 0.185
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 0.33163730669766667, 'critic_2_loss': 0.33147776294499637, 'policy_loss': -3.4505025684833526, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 0.1319454921328503, 'critic_2_loss': 0.1319293665929117, 'policy_loss': -1.1178605591041455, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 0.18472943753004073, 'critic_2_loss': 0.18472940996289253, 'policy_loss': -1.436676549911499, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [0.65, 0.68, 0.77, 0.75]
o: [1.0]
UPDATES
i: [1913, 1913, 1913, 1913]
h: [1147, 1306, 1457, 1531]
o: [1913]
MEMORY
i: [1978, 1978, 1978, 1978]
h: [777, 1020, 1196, 1344]
o: [1978]




100%|██████████| 1000/1000 [01:51<00:00,  8.95it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.85, Max. Reward: 15.0
--------------------------------------------------------------------------------
Episode: 1001, episode steps: 9, avg. reward: 9.0, loss_i: 0.319, loss_h: 0.114, loss_o: 0.175
Episode: 1002, episode steps: 8, avg. reward: 8.5, loss_i: 0.341, loss_h: 0.113, loss_o: 0.189
Episode: 1003, episode steps: 10, avg. reward: 9.0, loss_i: 0.289, loss_h: 10.078, loss_o: 0.22
Episode: 1004, episode steps: 10, avg. reward: 9.25, loss_i: 0.294, loss_h: 9.72, loss_o: 0.172
Episode: 1005, episode steps: 9, avg. reward: 9.2, loss_i: 0.334, loss_h: 0.122, loss_o: 0.172
Episode: 1006, episode steps: 8, avg. reward: 9.0, loss_i: 0.326, loss_h: 0.119, loss_o: 0.183
Episode: 1007, episode steps: 9, avg. reward: 9.0, loss_i: 0.312, loss_h: 0.144, loss_o: 0.169
Episode: 1008, episode steps: 8, avg. reward: 8.88, loss_i: 0.304, loss_h: 0.125, loss_o: 0.196
Episode: 1009, ep

Episode: 1084, episode steps: 10, avg. reward: 9.83, loss_i: 0.307, loss_h: 0.146, loss_o: 0.148
Episode: 1085, episode steps: 9, avg. reward: 9.82, loss_i: 0.287, loss_h: 8.922, loss_o: 0.175
Episode: 1086, episode steps: 8, avg. reward: 9.8, loss_i: 0.329, loss_h: 7.356, loss_o: 0.2
Episode: 1087, episode steps: 9, avg. reward: 9.79, loss_i: 0.309, loss_h: 0.166, loss_o: 0.193
Episode: 1088, episode steps: 8, avg. reward: 9.77, loss_i: 0.308, loss_h: 7.324, loss_o: 0.211
Episode: 1089, episode steps: 9, avg. reward: 9.76, loss_i: 0.308, loss_h: 9.435, loss_o: 0.187
Episode: 1090, episode steps: 10, avg. reward: 9.77, loss_i: 0.293, loss_h: 0.136, loss_o: 0.171
Episode: 1091, episode steps: 9, avg. reward: 9.76, loss_i: 0.292, loss_h: 0.139, loss_o: 0.21
Episode: 1092, episode steps: 9, avg. reward: 9.75, loss_i: 0.321, loss_h: 0.152, loss_o: 0.196
Episode: 1093, episode steps: 9, avg. reward: 9.74, loss_i: 0.3, loss_h: 0.129, loss_o: 0.201
Episode: 1094, episode steps: 9, avg. reward

Episode: 1170, episode steps: 10, avg. reward: 9.85, loss_i: 0.291, loss_h: 0.137, loss_o: 0.181
Episode: 1171, episode steps: 10, avg. reward: 9.85, loss_i: 0.308, loss_h: 8.414, loss_o: 0.179
Episode: 1172, episode steps: 12, avg. reward: 9.87, loss_i: 0.323, loss_h: 0.136, loss_o: 0.167
Episode: 1173, episode steps: 9, avg. reward: 9.86, loss_i: 0.308, loss_h: 0.147, loss_o: 0.168
Episode: 1174, episode steps: 9, avg. reward: 9.86, loss_i: 0.289, loss_h: 0.134, loss_o: 0.182
Episode: 1175, episode steps: 12, avg. reward: 9.87, loss_i: 0.264, loss_h: 10.585, loss_o: 0.171
Episode: 1176, episode steps: 9, avg. reward: 9.86, loss_i: 0.29, loss_h: 9.628, loss_o: 0.212
Episode: 1177, episode steps: 11, avg. reward: 9.87, loss_i: 0.263, loss_h: 0.123, loss_o: 0.217
Episode: 1178, episode steps: 8, avg. reward: 9.86, loss_i: 0.3, loss_h: 7.566, loss_o: 0.188
Episode: 1179, episode steps: 10, avg. reward: 9.86, loss_i: 0.271, loss_h: 8.896, loss_o: 0.16
Episode: 1180, episode steps: 10, avg

Episode: 1254, episode steps: 9, avg. reward: 9.84, loss_i: 1.011, loss_h: 14.97, loss_o: 0.364
Episode: 1255, episode steps: 10, avg. reward: 9.84, loss_i: 1.037, loss_h: 0.282, loss_o: 0.375
Episode: 1256, episode steps: 9, avg. reward: 9.84, loss_i: 1.161, loss_h: 0.289, loss_o: 0.329
Episode: 1257, episode steps: 11, avg. reward: 9.84, loss_i: 0.958, loss_h: 0.274, loss_o: 0.405
Episode: 1258, episode steps: 9, avg. reward: 9.84, loss_i: 1.005, loss_h: 0.251, loss_o: 0.421
Episode: 1259, episode steps: 8, avg. reward: 9.83, loss_i: 1.008, loss_h: 0.25, loss_o: 0.335
Episode: 1260, episode steps: 13, avg. reward: 9.84, loss_i: 1.203, loss_h: 0.266, loss_o: 0.448
Episode: 1261, episode steps: 9, avg. reward: 9.84, loss_i: 1.228, loss_h: 0.246, loss_o: 0.322
Episode: 1262, episode steps: 11, avg. reward: 9.84, loss_i: 1.122, loss_h: 18.73, loss_o: 0.406
Episode: 1263, episode steps: 9, avg. reward: 9.84, loss_i: 1.132, loss_h: 0.259, loss_o: 0.336
Episode: 1264, episode steps: 11, avg

Episode: 1340, episode steps: 13, avg. reward: 9.88, loss_i: 0.874, loss_h: 0.235, loss_o: 0.364
Episode: 1341, episode steps: 10, avg. reward: 9.88, loss_i: 0.993, loss_h: 0.242, loss_o: 0.372
Episode: 1342, episode steps: 9, avg. reward: 9.88, loss_i: 1.023, loss_h: 0.256, loss_o: 0.411
Episode: 1343, episode steps: 9, avg. reward: 9.87, loss_i: 1.004, loss_h: 0.243, loss_o: 0.421
Episode: 1344, episode steps: 9, avg. reward: 9.87, loss_i: 1.126, loss_h: 0.281, loss_o: 0.33
Episode: 1345, episode steps: 10, avg. reward: 9.87, loss_i: 0.851, loss_h: 17.617, loss_o: 0.35
Episode: 1346, episode steps: 10, avg. reward: 9.87, loss_i: 0.97, loss_h: 18.439, loss_o: 0.353
Episode: 1347, episode steps: 10, avg. reward: 9.87, loss_i: 1.242, loss_h: 0.249, loss_o: 0.36
Episode: 1348, episode steps: 9, avg. reward: 9.87, loss_i: 0.923, loss_h: 16.406, loss_o: 0.373
Episode: 1349, episode steps: 10, avg. reward: 9.87, loss_i: 1.049, loss_h: 18.329, loss_o: 0.332
Episode: 1350, episode steps: 9, a

Episode: 1425, episode steps: 9, avg. reward: 9.84, loss_i: 0.899, loss_h: 0.264, loss_o: 0.345
Episode: 1426, episode steps: 10, avg. reward: 9.84, loss_i: 1.048, loss_h: 17.242, loss_o: 0.39
Episode: 1427, episode steps: 8, avg. reward: 9.83, loss_i: 0.777, loss_h: 0.275, loss_o: 0.353
Episode: 1428, episode steps: 9, avg. reward: 9.83, loss_i: 1.072, loss_h: 19.665, loss_o: 0.319
Episode: 1429, episode steps: 10, avg. reward: 9.83, loss_i: 0.902, loss_h: 0.257, loss_o: 0.311
Episode: 1430, episode steps: 9, avg. reward: 9.83, loss_i: 1.133, loss_h: 0.304, loss_o: 0.394
Episode: 1431, episode steps: 10, avg. reward: 9.83, loss_i: 0.897, loss_h: 0.277, loss_o: 0.334
Episode: 1432, episode steps: 9, avg. reward: 9.83, loss_i: 0.928, loss_h: 15.71, loss_o: 0.367
Episode: 1433, episode steps: 9, avg. reward: 9.82, loss_i: 0.995, loss_h: 0.273, loss_o: 0.3
Episode: 1434, episode steps: 11, avg. reward: 9.83, loss_i: 1.106, loss_h: 0.235, loss_o: 0.333
Episode: 1435, episode steps: 9, avg.

Episode: 1510, episode steps: 10, avg. reward: 9.83, loss_i: 0.806, loss_h: 0.255, loss_o: 0.417
Episode: 1511, episode steps: 9, avg. reward: 9.83, loss_i: 0.692, loss_h: 0.253, loss_o: 0.35
Episode: 1512, episode steps: 10, avg. reward: 9.83, loss_i: 0.965, loss_h: 0.248, loss_o: 0.352
Episode: 1513, episode steps: 12, avg. reward: 9.84, loss_i: 0.788, loss_h: 0.261, loss_o: 0.321
Episode: 1514, episode steps: 9, avg. reward: 9.83, loss_i: 0.841, loss_h: 0.266, loss_o: 0.432
Episode: 1515, episode steps: 10, avg. reward: 9.83, loss_i: 0.91, loss_h: 0.294, loss_o: 0.322
Episode: 1516, episode steps: 11, avg. reward: 9.84, loss_i: 0.767, loss_h: 0.262, loss_o: 0.278
Episode: 1517, episode steps: 9, avg. reward: 9.84, loss_i: 0.832, loss_h: 0.302, loss_o: 0.396
Episode: 1518, episode steps: 10, avg. reward: 9.84, loss_i: 0.808, loss_h: 0.253, loss_o: 0.34
Episode: 1519, episode steps: 10, avg. reward: 9.84, loss_i: 0.922, loss_h: 18.886, loss_o: 0.367
Episode: 1520, episode steps: 9, av

Episode: 1595, episode steps: 9, avg. reward: 9.83, loss_i: 0.76, loss_h: 15.513, loss_o: 0.338
Episode: 1596, episode steps: 10, avg. reward: 9.83, loss_i: 0.898, loss_h: 17.554, loss_o: 0.383
Episode: 1597, episode steps: 10, avg. reward: 9.83, loss_i: 0.826, loss_h: 18.244, loss_o: 0.377
Episode: 1598, episode steps: 11, avg. reward: 9.83, loss_i: 0.77, loss_h: 0.269, loss_o: 0.438
Episode: 1599, episode steps: 9, avg. reward: 9.83, loss_i: 0.917, loss_h: 19.669, loss_o: 0.378
Episode: 1600, episode steps: 11, avg. reward: 9.84, loss_i: 0.771, loss_h: 0.283, loss_o: 0.31
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
Episode: 1601, episode steps: 9, avg. reward: 9.83, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 1602, episode steps: 10, avg. reward: 9.83, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 1603, episode steps: 9, 

Episode: 1679, episode steps: 10, avg. reward: 9.81, loss_i: 4.194, loss_h: 27.919, loss_o: 0.536
Episode: 1680, episode steps: 12, avg. reward: 9.82, loss_i: 3.803, loss_h: 29.792, loss_o: 0.579
Episode: 1681, episode steps: 8, avg. reward: 9.81, loss_i: 3.018, loss_h: 0.392, loss_o: 0.574
Episode: 1682, episode steps: 10, avg. reward: 9.82, loss_i: 3.329, loss_h: 0.346, loss_o: 0.481
Episode: 1683, episode steps: 11, avg. reward: 9.82, loss_i: 3.67, loss_h: 0.382, loss_o: 0.54
Episode: 1684, episode steps: 10, avg. reward: 9.82, loss_i: 3.606, loss_h: 22.531, loss_o: 0.653
Episode: 1685, episode steps: 10, avg. reward: 9.82, loss_i: 3.4, loss_h: 0.308, loss_o: 0.63
Episode: 1686, episode steps: 10, avg. reward: 9.82, loss_i: 3.491, loss_h: 0.372, loss_o: 0.642
Episode: 1687, episode steps: 11, avg. reward: 9.82, loss_i: 3.039, loss_h: 0.38, loss_o: 0.668
Episode: 1688, episode steps: 10, avg. reward: 9.82, loss_i: 3.432, loss_h: 24.18, loss_o: 0.582
Episode: 1689, episode steps: 12, 

Episode: 1765, episode steps: 12, avg. reward: 9.81, loss_i: 3.754, loss_h: 0.346, loss_o: 0.611
Episode: 1766, episode steps: 9, avg. reward: 9.81, loss_i: 3.316, loss_h: 0.348, loss_o: 0.64
Episode: 1767, episode steps: 8, avg. reward: 9.8, loss_i: 3.061, loss_h: 0.321, loss_o: 0.488
Episode: 1768, episode steps: 13, avg. reward: 9.81, loss_i: 2.906, loss_h: 28.445, loss_o: 0.571
Episode: 1769, episode steps: 10, avg. reward: 9.81, loss_i: 3.999, loss_h: 0.352, loss_o: 0.53
Episode: 1770, episode steps: 9, avg. reward: 9.81, loss_i: 2.752, loss_h: 0.339, loss_o: 0.603
Episode: 1771, episode steps: 11, avg. reward: 9.81, loss_i: 2.714, loss_h: 28.324, loss_o: 0.649
Episode: 1772, episode steps: 9, avg. reward: 9.81, loss_i: 2.63, loss_h: 0.386, loss_o: 0.686
Episode: 1773, episode steps: 9, avg. reward: 9.81, loss_i: 3.179, loss_h: 0.394, loss_o: 0.617
Episode: 1774, episode steps: 10, avg. reward: 9.81, loss_i: 2.578, loss_h: 0.351, loss_o: 0.627
Episode: 1775, episode steps: 10, avg

Episode: 1851, episode steps: 11, avg. reward: 9.8, loss_i: 2.422, loss_h: 0.33, loss_o: 0.548
Episode: 1852, episode steps: 9, avg. reward: 9.8, loss_i: 2.926, loss_h: 0.37, loss_o: 0.585
Episode: 1853, episode steps: 9, avg. reward: 9.8, loss_i: 2.806, loss_h: 22.275, loss_o: 0.547
Episode: 1854, episode steps: 9, avg. reward: 9.8, loss_i: 2.699, loss_h: 0.348, loss_o: 0.698
Episode: 1855, episode steps: 10, avg. reward: 9.8, loss_i: 2.403, loss_h: 0.387, loss_o: 0.636
Episode: 1856, episode steps: 10, avg. reward: 9.8, loss_i: 3.067, loss_h: 24.12, loss_o: 0.538
Episode: 1857, episode steps: 9, avg. reward: 9.79, loss_i: 2.525, loss_h: 0.355, loss_o: 0.581
Episode: 1858, episode steps: 10, avg. reward: 9.79, loss_i: 2.856, loss_h: 23.495, loss_o: 0.609
Episode: 1859, episode steps: 9, avg. reward: 9.79, loss_i: 2.148, loss_h: 0.335, loss_o: 0.643
Episode: 1860, episode steps: 8, avg. reward: 9.79, loss_i: 3.244, loss_h: 0.311, loss_o: 0.545
Episode: 1861, episode steps: 10, avg. rew

Episode: 1937, episode steps: 9, avg. reward: 9.78, loss_i: 2.65, loss_h: 0.355, loss_o: 0.618
Episode: 1938, episode steps: 9, avg. reward: 9.78, loss_i: 2.324, loss_h: 0.31, loss_o: 0.641
Episode: 1939, episode steps: 9, avg. reward: 9.78, loss_i: 2.429, loss_h: 0.343, loss_o: 0.555
Episode: 1940, episode steps: 9, avg. reward: 9.78, loss_i: 2.752, loss_h: 19.99, loss_o: 0.616
Episode: 1941, episode steps: 11, avg. reward: 9.78, loss_i: 2.098, loss_h: 0.335, loss_o: 0.583
Episode: 1942, episode steps: 10, avg. reward: 9.78, loss_i: 2.353, loss_h: 0.344, loss_o: 0.636
Episode: 1943, episode steps: 11, avg. reward: 9.78, loss_i: 2.133, loss_h: 0.37, loss_o: 0.546
Episode: 1944, episode steps: 9, avg. reward: 9.78, loss_i: 2.497, loss_h: 0.364, loss_o: 0.633
Episode: 1945, episode steps: 11, avg. reward: 9.79, loss_i: 3.233, loss_h: 0.363, loss_o: 0.611
Episode: 1946, episode steps: 10, avg. reward: 9.79, loss_i: 2.733, loss_h: 26.896, loss_o: 0.517
Episode: 1947, episode steps: 10, avg

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 2000, episode steps: 9, avg. reward: 9.78, loss_i: 2.231, loss_h: 0.339, loss_o: 0.659
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 2.2354602130750814, 'critic_2_loss': 2.2268603135728173, 'policy_loss': -9.082015103764004, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 0.33888778805969255, 'critic_2_loss': 0.3386822114624674, 'policy_loss': -1.6534734983292838, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 0.6588071253564622, 'critic_2_loss': 0.6589416133032905, 'policy_loss': -2.0976523293389215, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [

100%|██████████| 1000/1000 [01:47<00:00,  9.29it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.64, Max. Reward: 15.0
--------------------------------------------------------------------------------
Episode: 2001, episode steps: 9, avg. reward: 9.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2002, episode steps: 9, avg. reward: 9.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2003, episode steps: 9, avg. reward: 9.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2004, episode steps: 9, avg. reward: 9.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2005, episode steps: 10, avg. reward: 9.2, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2006, episode steps: 8, avg. reward: 9.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2007, episode steps: 10, avg. reward: 9.14, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 2008, episode steps: 12, avg. reward: 9.5, loss_i: 814.837, loss_h: 7.38, loss_o: 17.636
Episode: 2009, episode steps: 11, avg. reward: 9.67, los

Episode: 2085, episode steps: 12, avg. reward: 9.76, loss_i: 6.398, loss_h: 0.469, loss_o: 0.837
Episode: 2086, episode steps: 8, avg. reward: 9.74, loss_i: 8.14, loss_h: 0.456, loss_o: 0.647
Episode: 2087, episode steps: 10, avg. reward: 9.75, loss_i: 7.586, loss_h: 0.425, loss_o: 0.825
Episode: 2088, episode steps: 10, avg. reward: 9.75, loss_i: 7.618, loss_h: 31.031, loss_o: 0.883
Episode: 2089, episode steps: 9, avg. reward: 9.74, loss_i: 6.801, loss_h: 24.704, loss_o: 0.75
Episode: 2090, episode steps: 10, avg. reward: 9.74, loss_i: 6.797, loss_h: 0.563, loss_o: 0.829
Episode: 2091, episode steps: 10, avg. reward: 9.75, loss_i: 7.682, loss_h: 0.445, loss_o: 0.838
Episode: 2092, episode steps: 9, avg. reward: 9.74, loss_i: 8.115, loss_h: 0.518, loss_o: 0.721
Episode: 2093, episode steps: 9, avg. reward: 9.73, loss_i: 7.086, loss_h: 0.486, loss_o: 0.753
Episode: 2094, episode steps: 11, avg. reward: 9.74, loss_i: 7.036, loss_h: 0.43, loss_o: 0.783
Episode: 2095, episode steps: 9, av

Episode: 2170, episode steps: 10, avg. reward: 9.81, loss_i: 5.82, loss_h: 32.508, loss_o: 0.961
Episode: 2171, episode steps: 10, avg. reward: 9.81, loss_i: 7.103, loss_h: 32.861, loss_o: 0.794
Episode: 2172, episode steps: 10, avg. reward: 9.81, loss_i: 6.63, loss_h: 0.456, loss_o: 0.84
Episode: 2173, episode steps: 10, avg. reward: 9.82, loss_i: 7.208, loss_h: 0.478, loss_o: 0.716
Episode: 2174, episode steps: 9, avg. reward: 9.81, loss_i: 6.914, loss_h: 0.454, loss_o: 0.815
Episode: 2175, episode steps: 9, avg. reward: 9.81, loss_i: 7.056, loss_h: 0.452, loss_o: 0.693
Episode: 2176, episode steps: 10, avg. reward: 9.81, loss_i: 6.967, loss_h: 0.506, loss_o: 0.751
Episode: 2177, episode steps: 10, avg. reward: 9.81, loss_i: 6.781, loss_h: 0.469, loss_o: 0.793
Episode: 2178, episode steps: 9, avg. reward: 9.8, loss_i: 6.057, loss_h: 0.501, loss_o: 0.908
Episode: 2179, episode steps: 10, avg. reward: 9.8, loss_i: 6.981, loss_h: 26.526, loss_o: 0.778
Episode: 2180, episode steps: 10, a

Episode: 2256, episode steps: 10, avg. reward: 9.78, loss_i: 6.519, loss_h: 0.493, loss_o: 0.644
Episode: 2257, episode steps: 8, avg. reward: 9.77, loss_i: 5.855, loss_h: 22.451, loss_o: 0.857
Episode: 2258, episode steps: 10, avg. reward: 9.77, loss_i: 6.515, loss_h: 43.017, loss_o: 0.773
Episode: 2259, episode steps: 9, avg. reward: 9.77, loss_i: 5.582, loss_h: 0.451, loss_o: 0.68
Episode: 2260, episode steps: 10, avg. reward: 9.77, loss_i: 5.946, loss_h: 0.472, loss_o: 0.779
Episode: 2261, episode steps: 9, avg. reward: 9.77, loss_i: 6.631, loss_h: 31.62, loss_o: 0.842
Episode: 2262, episode steps: 9, avg. reward: 9.76, loss_i: 5.236, loss_h: 29.014, loss_o: 0.862
Episode: 2263, episode steps: 9, avg. reward: 9.76, loss_i: 5.227, loss_h: 0.481, loss_o: 0.814
Episode: 2264, episode steps: 9, avg. reward: 9.76, loss_i: 7.415, loss_h: 0.538, loss_o: 0.891
Episode: 2265, episode steps: 9, avg. reward: 9.75, loss_i: 5.836, loss_h: 0.4, loss_o: 0.808
Episode: 2266, episode steps: 8, avg.

Episode: 2342, episode steps: 10, avg. reward: 9.69, loss_i: 6.282, loss_h: 0.474, loss_o: 0.835
Episode: 2343, episode steps: 9, avg. reward: 9.69, loss_i: 6.586, loss_h: 0.503, loss_o: 0.853
Episode: 2344, episode steps: 9, avg. reward: 9.69, loss_i: 5.021, loss_h: 0.541, loss_o: 0.63
Episode: 2345, episode steps: 11, avg. reward: 9.69, loss_i: 5.296, loss_h: 0.543, loss_o: 0.837
Episode: 2346, episode steps: 10, avg. reward: 9.69, loss_i: 5.933, loss_h: 0.46, loss_o: 0.751
Episode: 2347, episode steps: 9, avg. reward: 9.69, loss_i: 5.879, loss_h: 32.481, loss_o: 0.697
Episode: 2348, episode steps: 9, avg. reward: 9.69, loss_i: 5.671, loss_h: 0.517, loss_o: 0.877
Episode: 2349, episode steps: 11, avg. reward: 9.69, loss_i: 6.203, loss_h: 30.67, loss_o: 0.598
Episode: 2350, episode steps: 10, avg. reward: 9.69, loss_i: 5.967, loss_h: 0.511, loss_o: 0.746
Episode: 2351, episode steps: 11, avg. reward: 9.7, loss_i: 5.143, loss_h: 0.48, loss_o: 0.724
Episode: 2352, episode steps: 11, avg

Episode: 2426, episode steps: 10, avg. reward: 9.68, loss_i: 8.474, loss_h: 0.517, loss_o: 0.821
Episode: 2427, episode steps: 10, avg. reward: 9.68, loss_i: 6.974, loss_h: 28.58, loss_o: 0.717
Episode: 2428, episode steps: 10, avg. reward: 9.68, loss_i: 9.547, loss_h: 0.53, loss_o: 0.885
Episode: 2429, episode steps: 9, avg. reward: 9.68, loss_i: 8.253, loss_h: 25.42, loss_o: 0.784
Episode: 2430, episode steps: 9, avg. reward: 9.68, loss_i: 8.27, loss_h: 0.5, loss_o: 0.812
Episode: 2431, episode steps: 9, avg. reward: 9.68, loss_i: 6.867, loss_h: 0.502, loss_o: 0.901
Episode: 2432, episode steps: 9, avg. reward: 9.67, loss_i: 9.445, loss_h: 0.518, loss_o: 0.837
Episode: 2433, episode steps: 14, avg. reward: 9.68, loss_i: 8.681, loss_h: 39.754, loss_o: 0.828
Episode: 2434, episode steps: 10, avg. reward: 9.68, loss_i: 9.69, loss_h: 0.459, loss_o: 0.741
Episode: 2435, episode steps: 9, avg. reward: 9.68, loss_i: 9.257, loss_h: 0.515, loss_o: 0.654
Episode: 2436, episode steps: 10, avg. 

Episode: 2511, episode steps: 10, avg. reward: 9.7, loss_i: 8.27, loss_h: 0.644, loss_o: 0.885
Episode: 2512, episode steps: 9, avg. reward: 9.7, loss_i: 8.457, loss_h: 0.67, loss_o: 1.013
Episode: 2513, episode steps: 8, avg. reward: 9.69, loss_i: 7.73, loss_h: 0.694, loss_o: 0.731
Episode: 2514, episode steps: 9, avg. reward: 9.69, loss_i: 6.442, loss_h: 0.572, loss_o: 0.835
Episode: 2515, episode steps: 9, avg. reward: 9.69, loss_i: 8.396, loss_h: 0.598, loss_o: 0.742
Episode: 2516, episode steps: 9, avg. reward: 9.69, loss_i: 8.482, loss_h: 0.563, loss_o: 1.015
Episode: 2517, episode steps: 8, avg. reward: 9.69, loss_i: 6.542, loss_h: 40.23, loss_o: 0.877
Episode: 2518, episode steps: 10, avg. reward: 9.69, loss_i: 7.146, loss_h: 0.664, loss_o: 0.932
Episode: 2519, episode steps: 10, avg. reward: 9.69, loss_i: 8.295, loss_h: 0.555, loss_o: 0.827
Episode: 2520, episode steps: 9, avg. reward: 9.69, loss_i: 7.721, loss_h: 31.185, loss_o: 0.806
Episode: 2521, episode steps: 8, avg. rew

Episode: 2596, episode steps: 9, avg. reward: 9.67, loss_i: 7.217, loss_h: 0.649, loss_o: 0.878
Episode: 2597, episode steps: 11, avg. reward: 9.68, loss_i: 8.174, loss_h: 0.63, loss_o: 1.079
Episode: 2598, episode steps: 10, avg. reward: 9.68, loss_i: 7.629, loss_h: 0.581, loss_o: 0.907
Episode: 2599, episode steps: 11, avg. reward: 9.68, loss_i: 7.486, loss_h: 0.62, loss_o: 0.899
Episode: 2600, episode steps: 9, avg. reward: 9.68, loss_i: 7.098, loss_h: 0.605, loss_o: 0.83
Episode: 2601, episode steps: 10, avg. reward: 9.68, loss_i: 7.076, loss_h: 0.671, loss_o: 0.918
Episode: 2602, episode steps: 11, avg. reward: 9.68, loss_i: 7.742, loss_h: 0.501, loss_o: 0.693
Episode: 2603, episode steps: 13, avg. reward: 9.68, loss_i: 7.77, loss_h: 58.377, loss_o: 0.922
Episode: 2604, episode steps: 10, avg. reward: 9.69, loss_i: 7.584, loss_h: 41.151, loss_o: 0.926
Episode: 2605, episode steps: 11, avg. reward: 9.69, loss_i: 6.803, loss_h: 0.615, loss_o: 0.769
Episode: 2606, episode steps: 12, 

Episode: 2681, episode steps: 11, avg. reward: 9.68, loss_i: 7.807, loss_h: 0.635, loss_o: 0.787
Episode: 2682, episode steps: 8, avg. reward: 9.68, loss_i: 7.835, loss_h: 36.949, loss_o: 0.714
Episode: 2683, episode steps: 9, avg. reward: 9.68, loss_i: 7.388, loss_h: 0.68, loss_o: 0.965
Episode: 2684, episode steps: 10, avg. reward: 9.68, loss_i: 8.917, loss_h: 0.696, loss_o: 0.887
Episode: 2685, episode steps: 10, avg. reward: 9.68, loss_i: 8.188, loss_h: 0.66, loss_o: 1.031
Episode: 2686, episode steps: 10, avg. reward: 9.68, loss_i: 9.023, loss_h: 45.182, loss_o: 0.993
Episode: 2687, episode steps: 9, avg. reward: 9.68, loss_i: 7.17, loss_h: 0.727, loss_o: 1.045
Episode: 2688, episode steps: 9, avg. reward: 9.68, loss_i: 8.224, loss_h: 0.683, loss_o: 0.941
Episode: 2689, episode steps: 8, avg. reward: 9.68, loss_i: 8.354, loss_h: 0.836, loss_o: 1.061
Episode: 2690, episode steps: 11, avg. reward: 9.68, loss_i: 7.947, loss_h: 0.586, loss_o: 0.884
Episode: 2691, episode steps: 9, avg

Episode: 2766, episode steps: 9, avg. reward: 9.66, loss_i: 8.34, loss_h: 0.681, loss_o: 1.016
Episode: 2767, episode steps: 11, avg. reward: 9.66, loss_i: 6.943, loss_h: 50.629, loss_o: 0.936
Episode: 2768, episode steps: 10, avg. reward: 9.67, loss_i: 6.558, loss_h: 0.693, loss_o: 0.792
Episode: 2769, episode steps: 11, avg. reward: 9.67, loss_i: 7.831, loss_h: 0.729, loss_o: 0.803
Episode: 2770, episode steps: 9, avg. reward: 9.67, loss_i: 9.127, loss_h: 0.724, loss_o: 1.007
Episode: 2771, episode steps: 11, avg. reward: 9.67, loss_i: 7.551, loss_h: 0.657, loss_o: 0.972
Episode: 2772, episode steps: 9, avg. reward: 9.67, loss_i: 7.57, loss_h: 0.586, loss_o: 0.871
Episode: 2773, episode steps: 11, avg. reward: 9.67, loss_i: 7.549, loss_h: 0.715, loss_o: 0.966
Episode: 2774, episode steps: 10, avg. reward: 9.67, loss_i: 7.096, loss_h: 0.625, loss_o: 0.782
Episode: 2775, episode steps: 8, avg. reward: 9.67, loss_i: 9.496, loss_h: 0.657, loss_o: 0.887
Episode: 2776, episode steps: 10, a

Episode: 2849, episode steps: 12, avg. reward: 9.67, loss_i: 13.874, loss_h: 0.773, loss_o: 1.175
Episode: 2850, episode steps: 9, avg. reward: 9.67, loss_i: 12.67, loss_h: 0.823, loss_o: 1.089
Episode: 2851, episode steps: 10, avg. reward: 9.67, loss_i: 9.999, loss_h: 48.461, loss_o: 1.081
Episode: 2852, episode steps: 9, avg. reward: 9.67, loss_i: 12.74, loss_h: 0.88, loss_o: 0.974
Episode: 2853, episode steps: 11, avg. reward: 9.67, loss_i: 10.734, loss_h: 0.99, loss_o: 1.029
Episode: 2854, episode steps: 10, avg. reward: 9.67, loss_i: 13.006, loss_h: 0.764, loss_o: 0.996
Episode: 2855, episode steps: 8, avg. reward: 9.67, loss_i: 10.71, loss_h: 38.166, loss_o: 1.124
Episode: 2856, episode steps: 10, avg. reward: 9.67, loss_i: 12.585, loss_h: 0.769, loss_o: 1.153
Episode: 2857, episode steps: 9, avg. reward: 9.67, loss_i: 11.739, loss_h: 0.845, loss_o: 1.104
Episode: 2858, episode steps: 9, avg. reward: 9.67, loss_i: 12.875, loss_h: 0.868, loss_o: 1.021
Episode: 2859, episode steps:

Episode: 2934, episode steps: 9, avg. reward: 9.65, loss_i: 26.215, loss_h: 0.836, loss_o: 1.247
Episode: 2935, episode steps: 10, avg. reward: 9.65, loss_i: 9.565, loss_h: 54.278, loss_o: 1.32
Episode: 2936, episode steps: 9, avg. reward: 9.65, loss_i: 10.868, loss_h: 45.998, loss_o: 1.315
Episode: 2937, episode steps: 10, avg. reward: 9.65, loss_i: 14.204, loss_h: 57.023, loss_o: 1.229
Episode: 2938, episode steps: 10, avg. reward: 9.65, loss_i: 11.446, loss_h: 0.738, loss_o: 0.951
Episode: 2939, episode steps: 9, avg. reward: 9.65, loss_i: 9.335, loss_h: 48.341, loss_o: 1.041
Episode: 2940, episode steps: 11, avg. reward: 9.65, loss_i: 12.268, loss_h: 0.872, loss_o: 1.072
Episode: 2941, episode steps: 10, avg. reward: 9.65, loss_i: 12.221, loss_h: 0.743, loss_o: 1.11
Episode: 2942, episode steps: 9, avg. reward: 9.65, loss_i: 12.003, loss_h: 46.987, loss_o: 1.148
Episode: 2943, episode steps: 10, avg. reward: 9.65, loss_i: 9.689, loss_h: 55.24, loss_o: 1.237
Episode: 2944, episode s

  0%|          | 1/1000 [00:00<01:49,  9.16it/s]

Episode: 3000, episode steps: 11, avg. reward: 9.65, loss_i: 11.748, loss_h: 65.588, loss_o: 1.022
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 11.801835932514884, 'critic_2_loss': 11.69447265159, 'policy_loss': -14.411047328602184, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'cnt': 1.0, 'critic_1_loss': 65.57185761258006, 'critic_2_loss': 65.60342338308692, 'policy_loss': -182.77500820159912, 'ent_loss': 0.0, 'alpha': 0.0}
o: {'critic_1_loss': 1.0223097096789966, 'critic_2_loss': 1.022652187130668, 'policy_loss': -2.7224982435053047, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [0.9, 0.65, 0.74, 0.47]
o: [1.0]
UPDATES
i: [1865, 1865, 1865, 1865]
h: [1689, 1252, 1244, 709]
o: [1865]
MEMORY
i: [1930, 1930, 1930, 1930]
h: [1653, 927, 936, 367]
o: [1930]




100%|██████████| 1000/1000 [01:47<00:00,  9.33it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.66, Max. Reward: 15.0
--------------------------------------------------------------------------------
Episode: 3001, episode steps: 9, avg. reward: 9.0, loss_i: 12.033, loss_h: 0.689, loss_o: 1.256
Episode: 3002, episode steps: 9, avg. reward: 9.0, loss_i: 10.283, loss_h: 0.791, loss_o: 1.217
Episode: 3003, episode steps: 10, avg. reward: 9.33, loss_i: 13.355, loss_h: 0.807, loss_o: 1.006
Episode: 3004, episode steps: 10, avg. reward: 9.5, loss_i: 12.959, loss_h: 0.691, loss_o: 1.071
Episode: 3005, episode steps: 9, avg. reward: 9.4, loss_i: 11.446, loss_h: 0.812, loss_o: 0.942
Episode: 3006, episode steps: 9, avg. reward: 9.33, loss_i: 9.341, loss_h: 43.151, loss_o: 1.02
Episode: 3007, episode steps: 9, avg. reward: 9.29, loss_i: 9.243, loss_h: 53.432, loss_o: 1.154
Episode: 3008, episode steps: 9, avg. reward: 9.25, loss_i: 11.438, loss_h: 0.805, loss_o: 1.069
Episode

Episode: 3084, episode steps: 10, avg. reward: 9.61, loss_i: 11.15, loss_h: 0.75, loss_o: 1.034
Episode: 3085, episode steps: 9, avg. reward: 9.6, loss_i: 10.748, loss_h: 0.786, loss_o: 0.915
Episode: 3086, episode steps: 10, avg. reward: 9.6, loss_i: 8.217, loss_h: 0.733, loss_o: 1.237
Episode: 3087, episode steps: 12, avg. reward: 9.63, loss_i: 9.443, loss_h: 0.761, loss_o: 0.889
Episode: 3088, episode steps: 10, avg. reward: 9.64, loss_i: 9.911, loss_h: 49.338, loss_o: 1.034
Episode: 3089, episode steps: 14, avg. reward: 9.69, loss_i: 11.499, loss_h: 0.802, loss_o: 0.964
Episode: 3090, episode steps: 8, avg. reward: 9.67, loss_i: 10.167, loss_h: 0.78, loss_o: 1.161
Episode: 3091, episode steps: 10, avg. reward: 9.67, loss_i: 11.177, loss_h: 0.918, loss_o: 1.028
Episode: 3092, episode steps: 10, avg. reward: 9.67, loss_i: 12.842, loss_h: 0.877, loss_o: 1.206
Episode: 3093, episode steps: 12, avg. reward: 9.7, loss_i: 12.918, loss_h: 0.945, loss_o: 1.321
Episode: 3094, episode steps: 

Episode: 3169, episode steps: 10, avg. reward: 9.6, loss_i: 11.085, loss_h: 0.85, loss_o: 1.38
Episode: 3170, episode steps: 8, avg. reward: 9.59, loss_i: 10.489, loss_h: 46.842, loss_o: 0.983
Episode: 3171, episode steps: 10, avg. reward: 9.59, loss_i: 12.703, loss_h: 0.901, loss_o: 1.201
Episode: 3172, episode steps: 13, avg. reward: 9.61, loss_i: 9.426, loss_h: 0.879, loss_o: 1.205
Episode: 3173, episode steps: 10, avg. reward: 9.61, loss_i: 14.187, loss_h: 0.711, loss_o: 1.382
Episode: 3174, episode steps: 9, avg. reward: 9.61, loss_i: 10.575, loss_h: 0.94, loss_o: 1.072
Episode: 3175, episode steps: 9, avg. reward: 9.61, loss_i: 11.04, loss_h: 0.783, loss_o: 1.071
Episode: 3176, episode steps: 9, avg. reward: 9.6, loss_i: 9.648, loss_h: 0.738, loss_o: 1.106
Episode: 3177, episode steps: 9, avg. reward: 9.6, loss_i: 10.281, loss_h: 0.776, loss_o: 1.228
Episode: 3178, episode steps: 10, avg. reward: 9.6, loss_i: 9.809, loss_h: 0.779, loss_o: 1.084
Episode: 3179, episode steps: 8, av

Episode: 3252, episode steps: 11, avg. reward: 9.61, loss_i: 13.723, loss_h: 1.048, loss_o: 1.22
Episode: 3253, episode steps: 9, avg. reward: 9.6, loss_i: 15.809, loss_h: 0.886, loss_o: 1.217
Episode: 3254, episode steps: 10, avg. reward: 9.61, loss_i: 13.829, loss_h: 64.967, loss_o: 1.441
Episode: 3255, episode steps: 8, avg. reward: 9.6, loss_i: 13.243, loss_h: 0.997, loss_o: 1.319
Episode: 3256, episode steps: 10, avg. reward: 9.6, loss_i: 13.87, loss_h: 1.027, loss_o: 1.037
Episode: 3257, episode steps: 11, avg. reward: 9.61, loss_i: 13.493, loss_h: 1.025, loss_o: 1.171
Episode: 3258, episode steps: 10, avg. reward: 9.61, loss_i: 13.804, loss_h: 1.221, loss_o: 1.276
Episode: 3259, episode steps: 10, avg. reward: 9.61, loss_i: 17.602, loss_h: 0.945, loss_o: 1.285
Episode: 3260, episode steps: 8, avg. reward: 9.6, loss_i: 14.032, loss_h: 1.072, loss_o: 1.458
Episode: 3261, episode steps: 9, avg. reward: 9.6, loss_i: 13.188, loss_h: 62.825, loss_o: 1.315
Episode: 3262, episode steps:

Episode: 3337, episode steps: 9, avg. reward: 9.61, loss_i: 15.079, loss_h: 0.898, loss_o: 1.328
Episode: 3338, episode steps: 9, avg. reward: 9.61, loss_i: 12.856, loss_h: 55.631, loss_o: 1.362
Episode: 3339, episode steps: 10, avg. reward: 9.61, loss_i: 15.255, loss_h: 75.248, loss_o: 1.357
Episode: 3340, episode steps: 9, avg. reward: 9.61, loss_i: 14.745, loss_h: 0.962, loss_o: 1.21
Episode: 3341, episode steps: 9, avg. reward: 9.61, loss_i: 12.273, loss_h: 58.594, loss_o: 1.37
Episode: 3342, episode steps: 9, avg. reward: 9.61, loss_i: 12.463, loss_h: 0.94, loss_o: 1.549
Episode: 3343, episode steps: 10, avg. reward: 9.61, loss_i: 12.476, loss_h: 0.878, loss_o: 1.35
Episode: 3344, episode steps: 8, avg. reward: 9.6, loss_i: 14.027, loss_h: 0.945, loss_o: 1.327
Episode: 3345, episode steps: 10, avg. reward: 9.6, loss_i: 11.587, loss_h: 0.899, loss_o: 1.532
Episode: 3346, episode steps: 10, avg. reward: 9.6, loss_i: 15.55, loss_h: 0.967, loss_o: 1.422
Episode: 3347, episode steps: 9

Episode: 3422, episode steps: 10, avg. reward: 9.59, loss_i: 11.187, loss_h: 0.988, loss_o: 1.165
Episode: 3423, episode steps: 10, avg. reward: 9.6, loss_i: 14.72, loss_h: 0.83, loss_o: 1.43
Episode: 3424, episode steps: 8, avg. reward: 9.59, loss_i: 12.742, loss_h: 0.667, loss_o: 1.398
Episode: 3425, episode steps: 10, avg. reward: 9.59, loss_i: 10.233, loss_h: 1.031, loss_o: 1.376
Episode: 3426, episode steps: 11, avg. reward: 9.6, loss_i: 12.961, loss_h: 0.939, loss_o: 1.381
Episode: 3427, episode steps: 10, avg. reward: 9.6, loss_i: 10.371, loss_h: 59.774, loss_o: 1.395
Episode: 3428, episode steps: 9, avg. reward: 9.6, loss_i: 11.693, loss_h: 50.654, loss_o: 1.313
Episode: 3429, episode steps: 10, avg. reward: 9.6, loss_i: 9.535, loss_h: 0.868, loss_o: 1.17
Episode: 3430, episode steps: 9, avg. reward: 9.6, loss_i: 11.342, loss_h: 0.893, loss_o: 1.183
Episode: 3431, episode steps: 9, avg. reward: 9.59, loss_i: 17.307, loss_h: 0.832, loss_o: 1.245
Episode: 3432, episode steps: 9, 

Episode: 3507, episode steps: 10, avg. reward: 9.61, loss_i: 12.346, loss_h: 1.016, loss_o: 1.365
Episode: 3508, episode steps: 11, avg. reward: 9.61, loss_i: 10.495, loss_h: 79.353, loss_o: 1.373
Episode: 3509, episode steps: 10, avg. reward: 9.61, loss_i: 11.309, loss_h: 0.87, loss_o: 1.561
Episode: 3510, episode steps: 10, avg. reward: 9.61, loss_i: 11.684, loss_h: 0.876, loss_o: 1.31
Episode: 3511, episode steps: 9, avg. reward: 9.61, loss_i: 11.207, loss_h: 0.893, loss_o: 1.317
Episode: 3512, episode steps: 9, avg. reward: 9.61, loss_i: 11.375, loss_h: 44.898, loss_o: 1.114
Episode: 3513, episode steps: 10, avg. reward: 9.61, loss_i: 11.047, loss_h: 0.937, loss_o: 1.448
Episode: 3514, episode steps: 10, avg. reward: 9.61, loss_i: 11.148, loss_h: 60.833, loss_o: 1.163
Episode: 3515, episode steps: 9, avg. reward: 9.61, loss_i: 10.73, loss_h: 0.983, loss_o: 1.303
Episode: 3516, episode steps: 9, avg. reward: 9.61, loss_i: 10.117, loss_h: 1.006, loss_o: 1.415
Episode: 3517, episode s

Episode: 3592, episode steps: 10, avg. reward: 9.61, loss_i: 11.229, loss_h: 1.055, loss_o: 1.105
Episode: 3593, episode steps: 10, avg. reward: 9.61, loss_i: 11.762, loss_h: 1.072, loss_o: 1.448
Episode: 3594, episode steps: 9, avg. reward: 9.61, loss_i: 11.423, loss_h: 0.944, loss_o: 1.443
Episode: 3595, episode steps: 10, avg. reward: 9.61, loss_i: 11.145, loss_h: 1.09, loss_o: 1.478
Episode: 3596, episode steps: 10, avg. reward: 9.61, loss_i: 10.024, loss_h: 0.981, loss_o: 1.589
Episode: 3597, episode steps: 10, avg. reward: 9.61, loss_i: 11.731, loss_h: 1.04, loss_o: 1.236
Episode: 3598, episode steps: 12, avg. reward: 9.62, loss_i: 13.057, loss_h: 0.968, loss_o: 1.329
Episode: 3599, episode steps: 10, avg. reward: 9.62, loss_i: 10.784, loss_h: 1.016, loss_o: 1.264
Episode: 3600, episode steps: 8, avg. reward: 9.62, loss_i: 11.5, loss_h: 0.863, loss_o: 1.506
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True]
o: [ True]


Episode: 3675, episode steps: 9, avg. reward: 9.61, loss_i: 12.155, loss_h: 1.162, loss_o: 1.531
Episode: 3676, episode steps: 9, avg. reward: 9.61, loss_i: 12.707, loss_h: 1.125, loss_o: 1.486
Episode: 3677, episode steps: 12, avg. reward: 9.61, loss_i: 11.467, loss_h: 90.503, loss_o: 1.49
Episode: 3678, episode steps: 10, avg. reward: 9.62, loss_i: 13.354, loss_h: 0.925, loss_o: 1.552
Episode: 3679, episode steps: 10, avg. reward: 9.62, loss_i: 13.476, loss_h: 1.076, loss_o: 1.57
Episode: 3680, episode steps: 11, avg. reward: 9.62, loss_i: 17.802, loss_h: 1.007, loss_o: 1.255
Episode: 3681, episode steps: 10, avg. reward: 9.62, loss_i: 16.633, loss_h: 1.076, loss_o: 1.079
Episode: 3682, episode steps: 11, avg. reward: 9.62, loss_i: 15.191, loss_h: 71.005, loss_o: 1.628
Episode: 3683, episode steps: 8, avg. reward: 9.62, loss_i: 14.995, loss_h: 1.051, loss_o: 1.157
Episode: 3684, episode steps: 10, avg. reward: 9.62, loss_i: 16.467, loss_h: 70.685, loss_o: 1.37
Episode: 3685, episode 

Episode: 3760, episode steps: 10, avg. reward: 9.62, loss_i: 13.943, loss_h: 0.957, loss_o: 1.341
Episode: 3761, episode steps: 10, avg. reward: 9.62, loss_i: 17.211, loss_h: 72.051, loss_o: 1.406
Episode: 3762, episode steps: 10, avg. reward: 9.62, loss_i: 13.24, loss_h: 86.313, loss_o: 1.23
Episode: 3763, episode steps: 9, avg. reward: 9.62, loss_i: 11.973, loss_h: 66.814, loss_o: 1.437
Episode: 3764, episode steps: 8, avg. reward: 9.62, loss_i: 14.546, loss_h: 1.203, loss_o: 1.243
Episode: 3765, episode steps: 9, avg. reward: 9.62, loss_i: 16.154, loss_h: 66.735, loss_o: 1.546
Episode: 3766, episode steps: 10, avg. reward: 9.62, loss_i: 15.657, loss_h: 1.038, loss_o: 1.343
Episode: 3767, episode steps: 11, avg. reward: 9.62, loss_i: 11.99, loss_h: 1.017, loss_o: 1.515
Episode: 3768, episode steps: 10, avg. reward: 9.62, loss_i: 18.512, loss_h: 1.318, loss_o: 1.328
Episode: 3769, episode steps: 10, avg. reward: 9.62, loss_i: 14.685, loss_h: 71.682, loss_o: 1.412
Episode: 3770, episod

Episode: 3844, episode steps: 10, avg. reward: 9.63, loss_i: 16.238, loss_h: 1.035, loss_o: 1.408
Episode: 3845, episode steps: 10, avg. reward: 9.63, loss_i: 12.781, loss_h: 1.175, loss_o: 1.521
Episode: 3846, episode steps: 9, avg. reward: 9.63, loss_i: 17.671, loss_h: 56.958, loss_o: 1.547
Episode: 3847, episode steps: 9, avg. reward: 9.62, loss_i: 11.603, loss_h: 1.065, loss_o: 1.4
Episode: 3848, episode steps: 9, avg. reward: 9.62, loss_i: 17.557, loss_h: 0.926, loss_o: 1.651
Episode: 3849, episode steps: 10, avg. reward: 9.62, loss_i: 15.738, loss_h: 0.972, loss_o: 1.419
Episode: 3850, episode steps: 9, avg. reward: 9.62, loss_i: 10.896, loss_h: 62.099, loss_o: 1.648
Episode: 3851, episode steps: 10, avg. reward: 9.62, loss_i: 16.463, loss_h: 1.206, loss_o: 1.436
Episode: 3852, episode steps: 9, avg. reward: 9.62, loss_i: 13.33, loss_h: 0.921, loss_o: 1.514
Episode: 3853, episode steps: 10, avg. reward: 9.62, loss_i: 14.452, loss_h: 0.93, loss_o: 1.186
Episode: 3854, episode step

Episode: 3929, episode steps: 9, avg. reward: 9.63, loss_i: 16.946, loss_h: 1.152, loss_o: 1.397
Episode: 3930, episode steps: 9, avg. reward: 9.63, loss_i: 20.581, loss_h: 1.087, loss_o: 1.231
Episode: 3931, episode steps: 9, avg. reward: 9.63, loss_i: 12.713, loss_h: 1.002, loss_o: 1.374
Episode: 3932, episode steps: 11, avg. reward: 9.63, loss_i: 13.963, loss_h: 86.202, loss_o: 1.376
Episode: 3933, episode steps: 8, avg. reward: 9.63, loss_i: 10.96, loss_h: 1.019, loss_o: 1.281
Episode: 3934, episode steps: 9, avg. reward: 9.63, loss_i: 14.794, loss_h: 1.009, loss_o: 1.472
Episode: 3935, episode steps: 9, avg. reward: 9.63, loss_i: 16.82, loss_h: 1.117, loss_o: 1.244
Episode: 3936, episode steps: 13, avg. reward: 9.63, loss_i: 16.643, loss_h: 1.016, loss_o: 1.527
Episode: 3937, episode steps: 10, avg. reward: 9.63, loss_i: 11.201, loss_h: 1.29, loss_o: 1.676
Episode: 3938, episode steps: 8, avg. reward: 9.63, loss_i: 13.831, loss_h: 1.139, loss_o: 1.43
Episode: 3939, episode steps: 

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 4000, episode steps: 10, avg. reward: 9.62, loss_i: 11.165, loss_h: 0.947, loss_o: 1.165
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 11.335343827307224, 'critic_2_loss': 10.995128853619098, 'policy_loss': -16.289196360111237, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 0.9473054296104237, 'critic_2_loss': 0.9470893307588995, 'policy_loss': -2.234866939485073, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 1.1652310967445374, 'critic_2_loss': 1.1648091912269591, 'policy_loss': -3.034594750404358, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [

100%|██████████| 1000/1000 [01:46<00:00,  9.37it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.63, Max. Reward: 15.0
--------------------------------------------------------------------------------
Episode: 4001, episode steps: 11, avg. reward: 11.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 4002, episode steps: 9, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 4003, episode steps: 9, avg. reward: 9.67, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 4004, episode steps: 11, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 4005, episode steps: 10, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 4006, episode steps: 9, avg. reward: 9.83, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 4007, episode steps: 10, avg. reward: 9.86, loss_i: 478.972, loss_h: nan, loss_o: 11.879
Episode: 4008, episode steps: 10, avg. reward: 9.88, loss_i: 14.353, loss_h: 9.944, loss_o: 1.533
Episode: 4009, episode steps: 9, avg. rew

Episode: 4084, episode steps: 9, avg. reward: 9.67, loss_i: 14.794, loss_h: 68.964, loss_o: 1.631
Episode: 4085, episode steps: 9, avg. reward: 9.66, loss_i: 12.779, loss_h: 1.107, loss_o: 1.901
Episode: 4086, episode steps: 10, avg. reward: 9.66, loss_i: 13.368, loss_h: 1.189, loss_o: 1.663
Episode: 4087, episode steps: 8, avg. reward: 9.64, loss_i: 19.072, loss_h: 0.948, loss_o: 1.582
Episode: 4088, episode steps: 9, avg. reward: 9.64, loss_i: 12.692, loss_h: 1.054, loss_o: 1.699
Episode: 4089, episode steps: 9, avg. reward: 9.63, loss_i: 11.605, loss_h: 0.938, loss_o: 1.478
Episode: 4090, episode steps: 11, avg. reward: 9.64, loss_i: 11.66, loss_h: 77.626, loss_o: 1.415
Episode: 4091, episode steps: 9, avg. reward: 9.64, loss_i: 13.067, loss_h: 1.155, loss_o: 1.654
Episode: 4092, episode steps: 8, avg. reward: 9.62, loss_i: 12.792, loss_h: 48.683, loss_o: 1.572
Episode: 4093, episode steps: 10, avg. reward: 9.62, loss_i: 11.099, loss_h: 72.505, loss_o: 1.681
Episode: 4094, episode s

Episode: 4169, episode steps: 9, avg. reward: 9.66, loss_i: 11.811, loss_h: 1.098, loss_o: 1.636
Episode: 4170, episode steps: 10, avg. reward: 9.66, loss_i: 12.319, loss_h: 94.241, loss_o: 1.719
Episode: 4171, episode steps: 8, avg. reward: 9.65, loss_i: 14.174, loss_h: 1.052, loss_o: 1.801
Episode: 4172, episode steps: 9, avg. reward: 9.65, loss_i: 13.281, loss_h: 64.331, loss_o: 1.721
Episode: 4173, episode steps: 9, avg. reward: 9.64, loss_i: 15.008, loss_h: 1.192, loss_o: 1.743
Episode: 4174, episode steps: 10, avg. reward: 9.64, loss_i: 12.54, loss_h: 83.942, loss_o: 1.676
Episode: 4175, episode steps: 10, avg. reward: 9.65, loss_i: 13.349, loss_h: 1.081, loss_o: 1.724
Episode: 4176, episode steps: 11, avg. reward: 9.65, loss_i: 12.937, loss_h: 85.354, loss_o: 1.669
Episode: 4177, episode steps: 10, avg. reward: 9.66, loss_i: 10.543, loss_h: 1.121, loss_o: 1.666
Episode: 4178, episode steps: 9, avg. reward: 9.65, loss_i: 14.126, loss_h: 68.707, loss_o: 1.587
Episode: 4179, episod

Episode: 4254, episode steps: 10, avg. reward: 9.59, loss_i: 12.562, loss_h: 0.913, loss_o: 1.764
Episode: 4255, episode steps: 8, avg. reward: 9.58, loss_i: 12.034, loss_h: 1.149, loss_o: 1.556
Episode: 4256, episode steps: 10, avg. reward: 9.58, loss_i: 10.788, loss_h: 1.023, loss_o: 1.582
Episode: 4257, episode steps: 9, avg. reward: 9.58, loss_i: 12.839, loss_h: 75.893, loss_o: 1.6
Episode: 4258, episode steps: 8, avg. reward: 9.57, loss_i: 11.662, loss_h: 1.302, loss_o: 1.622
Episode: 4259, episode steps: 8, avg. reward: 9.57, loss_i: 13.577, loss_h: 1.277, loss_o: 1.549
Episode: 4260, episode steps: 11, avg. reward: 9.57, loss_i: 11.576, loss_h: 77.352, loss_o: 1.637
Episode: 4261, episode steps: 10, avg. reward: 9.57, loss_i: 12.348, loss_h: 85.23, loss_o: 1.574
Episode: 4262, episode steps: 10, avg. reward: 9.58, loss_i: 16.161, loss_h: 1.116, loss_o: 1.395
Episode: 4263, episode steps: 9, avg. reward: 9.57, loss_i: 13.517, loss_h: 1.179, loss_o: 2.027
Episode: 4264, episode st

Episode: 4339, episode steps: 9, avg. reward: 9.53, loss_i: 14.001, loss_h: 1.322, loss_o: 1.828
Episode: 4340, episode steps: 10, avg. reward: 9.53, loss_i: 11.65, loss_h: 1.116, loss_o: 1.769
Episode: 4341, episode steps: 9, avg. reward: 9.52, loss_i: 13.794, loss_h: 1.052, loss_o: 1.613
Episode: 4342, episode steps: 8, avg. reward: 9.52, loss_i: 12.143, loss_h: 1.128, loss_o: 1.507
Episode: 4343, episode steps: 10, avg. reward: 9.52, loss_i: 16.225, loss_h: 1.263, loss_o: 1.797
Episode: 4344, episode steps: 9, avg. reward: 9.52, loss_i: 14.711, loss_h: 73.989, loss_o: 1.958
Episode: 4345, episode steps: 12, avg. reward: 9.53, loss_i: 14.823, loss_h: 1.236, loss_o: 1.553
Episode: 4346, episode steps: 10, avg. reward: 9.53, loss_i: 13.657, loss_h: 1.114, loss_o: 1.883
Episode: 4347, episode steps: 10, avg. reward: 9.53, loss_i: 12.457, loss_h: 1.266, loss_o: 1.776
Episode: 4348, episode steps: 10, avg. reward: 9.53, loss_i: 13.192, loss_h: 86.657, loss_o: 2.056
Episode: 4349, episode 

Episode: 4422, episode steps: 9, avg. reward: 9.56, loss_i: 10.738, loss_h: 1.198, loss_o: 1.545
Episode: 4423, episode steps: 11, avg. reward: 9.56, loss_i: 12.756, loss_h: 81.305, loss_o: 1.762
Episode: 4424, episode steps: 10, avg. reward: 9.56, loss_i: 19.267, loss_h: 1.252, loss_o: 1.619
Episode: 4425, episode steps: 10, avg. reward: 9.56, loss_i: 15.708, loss_h: 1.224, loss_o: 1.662
Episode: 4426, episode steps: 9, avg. reward: 9.56, loss_i: 14.207, loss_h: 1.032, loss_o: 1.579
Episode: 4427, episode steps: 9, avg. reward: 9.56, loss_i: 12.346, loss_h: 61.826, loss_o: 1.808
Episode: 4428, episode steps: 10, avg. reward: 9.56, loss_i: 14.001, loss_h: 1.425, loss_o: 1.805
Episode: 4429, episode steps: 10, avg. reward: 9.56, loss_i: 17.518, loss_h: 1.065, loss_o: 1.358
Episode: 4430, episode steps: 8, avg. reward: 9.56, loss_i: 12.507, loss_h: 1.161, loss_o: 1.806
Episode: 4431, episode steps: 10, avg. reward: 9.56, loss_i: 11.409, loss_h: 1.191, loss_o: 1.852
Episode: 4432, episode

Episode: 4506, episode steps: 10, avg. reward: 9.56, loss_i: 11.242, loss_h: 1.4, loss_o: 1.728
Episode: 4507, episode steps: 10, avg. reward: 9.56, loss_i: 13.736, loss_h: 1.436, loss_o: 1.955
Episode: 4508, episode steps: 10, avg. reward: 9.56, loss_i: 22.554, loss_h: 1.278, loss_o: 1.821
Episode: 4509, episode steps: 9, avg. reward: 9.56, loss_i: 12.999, loss_h: 1.318, loss_o: 1.904
Episode: 4510, episode steps: 9, avg. reward: 9.56, loss_i: 11.6, loss_h: 1.34, loss_o: 1.769
Episode: 4511, episode steps: 9, avg. reward: 9.56, loss_i: 16.268, loss_h: 1.23, loss_o: 1.701
Episode: 4512, episode steps: 9, avg. reward: 9.56, loss_i: 12.488, loss_h: 81.268, loss_o: 2.142
Episode: 4513, episode steps: 9, avg. reward: 9.56, loss_i: 14.103, loss_h: 1.369, loss_o: 1.77
Episode: 4514, episode steps: 11, avg. reward: 9.56, loss_i: 12.65, loss_h: 1.396, loss_o: 1.383
Episode: 4515, episode steps: 10, avg. reward: 9.56, loss_i: 10.176, loss_h: 1.361, loss_o: 1.836
Episode: 4516, episode steps: 10

Episode: 4591, episode steps: 11, avg. reward: 9.56, loss_i: 15.46, loss_h: 1.314, loss_o: 2.063
Episode: 4592, episode steps: 8, avg. reward: 9.56, loss_i: 10.993, loss_h: 1.223, loss_o: 1.667
Episode: 4593, episode steps: 10, avg. reward: 9.56, loss_i: 13.814, loss_h: 1.232, loss_o: 1.914
Episode: 4594, episode steps: 9, avg. reward: 9.56, loss_i: 17.227, loss_h: 1.152, loss_o: 1.892
Episode: 4595, episode steps: 9, avg. reward: 9.56, loss_i: 15.465, loss_h: 1.195, loss_o: 1.854
Episode: 4596, episode steps: 10, avg. reward: 9.56, loss_i: 17.178, loss_h: 1.243, loss_o: 2.061
Episode: 4597, episode steps: 8, avg. reward: 9.56, loss_i: 20.204, loss_h: 70.646, loss_o: 1.987
Episode: 4598, episode steps: 12, avg. reward: 9.56, loss_i: 14.421, loss_h: 108.361, loss_o: 1.731
Episode: 4599, episode steps: 8, avg. reward: 9.56, loss_i: 13.338, loss_h: 1.436, loss_o: 1.599
Episode: 4600, episode steps: 10, avg. reward: 9.56, loss_i: 14.286, loss_h: 1.409, loss_o: 1.686
Episode: 4601, episode 

Episode: 4676, episode steps: 9, avg. reward: 9.57, loss_i: 12.594, loss_h: 1.444, loss_o: 1.758
Episode: 4677, episode steps: 9, avg. reward: 9.57, loss_i: 12.093, loss_h: 77.378, loss_o: 1.65
Episode: 4678, episode steps: 10, avg. reward: 9.57, loss_i: 15.31, loss_h: 1.159, loss_o: 1.711
Episode: 4679, episode steps: 10, avg. reward: 9.57, loss_i: 17.145, loss_h: 88.841, loss_o: 1.659
Episode: 4680, episode steps: 9, avg. reward: 9.57, loss_i: 13.691, loss_h: 1.177, loss_o: 1.902
Episode: 4681, episode steps: 11, avg. reward: 9.57, loss_i: 12.032, loss_h: 1.28, loss_o: 1.58
Episode: 4682, episode steps: 11, avg. reward: 9.57, loss_i: 11.613, loss_h: 1.402, loss_o: 1.833
Episode: 4683, episode steps: 10, avg. reward: 9.57, loss_i: 19.424, loss_h: 1.271, loss_o: 1.851
Episode: 4684, episode steps: 9, avg. reward: 9.57, loss_i: 18.709, loss_h: 1.335, loss_o: 1.581
Episode: 4685, episode steps: 9, avg. reward: 9.57, loss_i: 12.982, loss_h: 1.343, loss_o: 1.784
Episode: 4686, episode step

Episode: 4761, episode steps: 10, avg. reward: 9.57, loss_i: 15.778, loss_h: 85.313, loss_o: 1.894
Episode: 4762, episode steps: 11, avg. reward: 9.57, loss_i: 14.615, loss_h: 1.38, loss_o: 1.826
Episode: 4763, episode steps: 13, avg. reward: 9.58, loss_i: 12.864, loss_h: 112.065, loss_o: 2.087
Episode: 4764, episode steps: 9, avg. reward: 9.58, loss_i: 13.709, loss_h: 1.311, loss_o: 1.604
Episode: 4765, episode steps: 10, avg. reward: 9.58, loss_i: 13.24, loss_h: 1.474, loss_o: 1.85
Episode: 4766, episode steps: 10, avg. reward: 9.58, loss_i: 14.124, loss_h: 1.251, loss_o: 1.619
Episode: 4767, episode steps: 8, avg. reward: 9.57, loss_i: 13.896, loss_h: 1.056, loss_o: 1.609
Episode: 4768, episode steps: 10, avg. reward: 9.58, loss_i: 16.226, loss_h: 82.167, loss_o: 1.571
Episode: 4769, episode steps: 8, avg. reward: 9.57, loss_i: 12.959, loss_h: 1.238, loss_o: 1.809
Episode: 4770, episode steps: 10, avg. reward: 9.57, loss_i: 11.195, loss_h: 1.223, loss_o: 1.824
Episode: 4771, episode

Episode: 4844, episode steps: 10, avg. reward: 9.56, loss_i: 11.333, loss_h: 95.519, loss_o: 1.755
Episode: 4845, episode steps: 10, avg. reward: 9.56, loss_i: 8.846, loss_h: 1.448, loss_o: 1.824
Episode: 4846, episode steps: 9, avg. reward: 9.56, loss_i: 13.278, loss_h: 1.37, loss_o: 2.185
Episode: 4847, episode steps: 10, avg. reward: 9.56, loss_i: 10.614, loss_h: 1.242, loss_o: 1.747
Episode: 4848, episode steps: 10, avg. reward: 9.56, loss_i: 13.178, loss_h: 1.442, loss_o: 2.328
Episode: 4849, episode steps: 9, avg. reward: 9.56, loss_i: 14.461, loss_h: 1.099, loss_o: 1.792
Episode: 4850, episode steps: 10, avg. reward: 9.56, loss_i: 13.889, loss_h: 1.212, loss_o: 1.913
Episode: 4851, episode steps: 10, avg. reward: 9.57, loss_i: 10.181, loss_h: 1.464, loss_o: 1.627
Episode: 4852, episode steps: 10, avg. reward: 9.57, loss_i: 13.625, loss_h: 1.624, loss_o: 2.034
Episode: 4853, episode steps: 12, avg. reward: 9.57, loss_i: 12.432, loss_h: 1.522, loss_o: 1.753
Episode: 4854, episode 

Episode: 4929, episode steps: 9, avg. reward: 9.57, loss_i: 15.754, loss_h: 1.405, loss_o: 2.048
Episode: 4930, episode steps: 11, avg. reward: 9.57, loss_i: 12.305, loss_h: 1.483, loss_o: 2.056
Episode: 4931, episode steps: 10, avg. reward: 9.57, loss_i: 10.615, loss_h: 1.404, loss_o: 1.602
Episode: 4932, episode steps: 10, avg. reward: 9.57, loss_i: 11.367, loss_h: 1.506, loss_o: 1.87
Episode: 4933, episode steps: 9, avg. reward: 9.57, loss_i: 10.138, loss_h: 1.437, loss_o: 2.155
Episode: 4934, episode steps: 9, avg. reward: 9.57, loss_i: 12.294, loss_h: 1.616, loss_o: 2.113
Episode: 4935, episode steps: 9, avg. reward: 9.57, loss_i: 11.411, loss_h: 1.414, loss_o: 1.829
Episode: 4936, episode steps: 10, avg. reward: 9.57, loss_i: 12.073, loss_h: 1.298, loss_o: 2.202
Episode: 4937, episode steps: 9, avg. reward: 9.57, loss_i: 12.245, loss_h: 1.324, loss_o: 2.269
Episode: 4938, episode steps: 9, avg. reward: 9.57, loss_i: 16.747, loss_h: 1.402, loss_o: 2.03
Episode: 4939, episode steps

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 5000, episode steps: 10, avg. reward: 9.56, loss_i: 11.38, loss_h: 1.577, loss_o: 1.836
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 11.392743860930205, 'critic_2_loss': 11.36757833212614, 'policy_loss': -17.42464760541916, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'critic_1_loss': 1.5766106984194588, 'critic_2_loss': 1.5771943067803103, 'policy_loss': -2.9755409955978394, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
o: {'critic_1_loss': 1.8363738298416137, 'critic_2_loss': 1.8364405155181884, 'policy_loss': -3.4342472076416017, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [0.93, 0.7, 0.68, 0.42]
o: [1.0]
UPDATES
i: [1842, 1842, 1842, 1842]
h: [1785, 1226, 1122, 530]
o: [1842]
MEMORY
i: [1907, 1907, 1907, 1907]
h: [1817, 909, 769, 251]
o: [1907]




100%|██████████| 1000/1000 [01:45<00:00,  9.48it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.62, Max. Reward: 14.0
--------------------------------------------------------------------------------
Episode: 5001, episode steps: 10, avg. reward: 10.0, loss_i: 10.411, loss_h: 1.449, loss_o: 2.059
Episode: 5002, episode steps: 9, avg. reward: 9.5, loss_i: 14.325, loss_h: 1.283, loss_o: 1.7
Episode: 5003, episode steps: 9, avg. reward: 9.33, loss_i: 12.494, loss_h: 1.561, loss_o: 1.827
Episode: 5004, episode steps: 10, avg. reward: 9.5, loss_i: 11.538, loss_h: 1.265, loss_o: 2.109
Episode: 5005, episode steps: 9, avg. reward: 9.4, loss_i: 12.122, loss_h: 1.355, loss_o: 2.125
Episode: 5006, episode steps: 11, avg. reward: 9.67, loss_i: 10.635, loss_h: 1.312, loss_o: 1.683
Episode: 5007, episode steps: 9, avg. reward: 9.57, loss_i: 11.922, loss_h: 1.672, loss_o: 1.941
Episode: 5008, episode steps: 9, avg. reward: 9.5, loss_i: 11.708, loss_h: 1.203, loss_o: 1.612
Episode

Episode: 5084, episode steps: 10, avg. reward: 9.39, loss_i: 13.301, loss_h: 1.483, loss_o: 1.988
Episode: 5085, episode steps: 9, avg. reward: 9.39, loss_i: 13.986, loss_h: 1.466, loss_o: 2.041
Episode: 5086, episode steps: 14, avg. reward: 9.44, loss_i: 12.083, loss_h: 1.229, loss_o: 2.095
Episode: 5087, episode steps: 9, avg. reward: 9.44, loss_i: 11.338, loss_h: 1.573, loss_o: 2.061
Episode: 5088, episode steps: 11, avg. reward: 9.45, loss_i: 9.862, loss_h: 1.319, loss_o: 1.615
Episode: 5089, episode steps: 10, avg. reward: 9.46, loss_i: 13.273, loss_h: 1.283, loss_o: 1.675
Episode: 5090, episode steps: 9, avg. reward: 9.46, loss_i: 15.276, loss_h: 1.495, loss_o: 1.928
Episode: 5091, episode steps: 10, avg. reward: 9.46, loss_i: 11.016, loss_h: 1.397, loss_o: 1.765
Episode: 5092, episode steps: 9, avg. reward: 9.46, loss_i: 13.028, loss_h: 1.472, loss_o: 2.317
Episode: 5093, episode steps: 8, avg. reward: 9.44, loss_i: 13.8, loss_h: 1.432, loss_o: 1.757
Episode: 5094, episode steps

Episode: 5169, episode steps: 8, avg. reward: 9.44, loss_i: 9.501, loss_h: 1.389, loss_o: 1.455
Episode: 5170, episode steps: 9, avg. reward: 9.44, loss_i: 11.843, loss_h: 1.289, loss_o: 1.957
Episode: 5171, episode steps: 10, avg. reward: 9.44, loss_i: 11.074, loss_h: 1.31, loss_o: 2.171
Episode: 5172, episode steps: 10, avg. reward: 9.44, loss_i: 12.939, loss_h: 1.505, loss_o: 1.93
Episode: 5173, episode steps: 10, avg. reward: 9.45, loss_i: 13.743, loss_h: 1.321, loss_o: 2.173
Episode: 5174, episode steps: 10, avg. reward: 9.45, loss_i: 11.275, loss_h: 1.572, loss_o: 1.846
Episode: 5175, episode steps: 10, avg. reward: 9.45, loss_i: 13.041, loss_h: 1.707, loss_o: 2.254
Episode: 5176, episode steps: 9, avg. reward: 9.45, loss_i: 14.016, loss_h: 1.483, loss_o: 2.109
Episode: 5177, episode steps: 11, avg. reward: 9.46, loss_i: 13.499, loss_h: 1.471, loss_o: 2.01
Episode: 5178, episode steps: 13, avg. reward: 9.48, loss_i: 11.557, loss_h: 1.26, loss_o: 1.683
Episode: 5179, episode steps

Episode: 5252, episode steps: 9, avg. reward: 9.53, loss_i: 10.597, loss_h: 1.73, loss_o: 2.347
Episode: 5253, episode steps: 9, avg. reward: 9.53, loss_i: 12.986, loss_h: 1.386, loss_o: 2.187
Episode: 5254, episode steps: 10, avg. reward: 9.53, loss_i: 10.234, loss_h: 1.503, loss_o: 1.998
Episode: 5255, episode steps: 10, avg. reward: 9.53, loss_i: 11.451, loss_h: 1.48, loss_o: 2.022
Episode: 5256, episode steps: 11, avg. reward: 9.54, loss_i: 20.802, loss_h: 1.689, loss_o: 2.125
Episode: 5257, episode steps: 9, avg. reward: 9.54, loss_i: 16.236, loss_h: 1.756, loss_o: 1.984
Episode: 5258, episode steps: 9, avg. reward: 9.53, loss_i: 13.14, loss_h: 1.487, loss_o: 2.41
Episode: 5259, episode steps: 11, avg. reward: 9.54, loss_i: 15.662, loss_h: 1.628, loss_o: 2.073
Episode: 5260, episode steps: 8, avg. reward: 9.53, loss_i: 10.099, loss_h: 1.683, loss_o: 2.064
Episode: 5261, episode steps: 10, avg. reward: 9.54, loss_i: 13.761, loss_h: 1.416, loss_o: 1.913
Episode: 5262, episode steps:

Episode: 5337, episode steps: 10, avg. reward: 9.51, loss_i: 18.177, loss_h: 1.528, loss_o: 2.077
Episode: 5338, episode steps: 8, avg. reward: 9.5, loss_i: 12.526, loss_h: 1.417, loss_o: 1.618
Episode: 5339, episode steps: 9, avg. reward: 9.5, loss_i: 13.717, loss_h: 85.742, loss_o: 2.107
Episode: 5340, episode steps: 10, avg. reward: 9.5, loss_i: 11.527, loss_h: 92.031, loss_o: 1.809
Episode: 5341, episode steps: 11, avg. reward: 9.51, loss_i: 13.438, loss_h: 1.361, loss_o: 2.31
Episode: 5342, episode steps: 10, avg. reward: 9.51, loss_i: 11.56, loss_h: 1.367, loss_o: 2.157
Episode: 5343, episode steps: 9, avg. reward: 9.51, loss_i: 10.533, loss_h: 1.633, loss_o: 1.617
Episode: 5344, episode steps: 9, avg. reward: 9.51, loss_i: 12.84, loss_h: 88.692, loss_o: 1.977
Episode: 5345, episode steps: 9, avg. reward: 9.5, loss_i: 13.664, loss_h: 94.248, loss_o: 2.031
Episode: 5346, episode steps: 10, avg. reward: 9.51, loss_i: 15.295, loss_h: 1.382, loss_o: 1.995
Episode: 5347, episode steps

Episode: 5422, episode steps: 9, avg. reward: 9.51, loss_i: 15.984, loss_h: 1.347, loss_o: 2.184
Episode: 5423, episode steps: 10, avg. reward: 9.51, loss_i: 13.83, loss_h: 1.555, loss_o: 2.095
Episode: 5424, episode steps: 10, avg. reward: 9.51, loss_i: 13.176, loss_h: 1.397, loss_o: 2.213
Episode: 5425, episode steps: 9, avg. reward: 9.51, loss_i: 13.805, loss_h: 1.453, loss_o: 1.862
Episode: 5426, episode steps: 10, avg. reward: 9.51, loss_i: 12.162, loss_h: 1.61, loss_o: 2.209
Episode: 5427, episode steps: 12, avg. reward: 9.52, loss_i: 10.931, loss_h: 107.533, loss_o: 2.214
Episode: 5428, episode steps: 10, avg. reward: 9.52, loss_i: 15.721, loss_h: 1.572, loss_o: 2.06
Episode: 5429, episode steps: 10, avg. reward: 9.52, loss_i: 14.181, loss_h: 1.198, loss_o: 2.049
Episode: 5430, episode steps: 10, avg. reward: 9.52, loss_i: 16.513, loss_h: 113.709, loss_o: 2.278
Episode: 5431, episode steps: 10, avg. reward: 9.52, loss_i: 9.536, loss_h: 1.351, loss_o: 2.206
Episode: 5432, episode

Episode: 5506, episode steps: 9, avg. reward: 9.53, loss_i: 12.642, loss_h: 1.309, loss_o: 2.228
Episode: 5507, episode steps: 10, avg. reward: 9.53, loss_i: 14.867, loss_h: 98.469, loss_o: 1.982
Episode: 5508, episode steps: 10, avg. reward: 9.53, loss_i: 11.935, loss_h: 1.356, loss_o: 2.376
Episode: 5509, episode steps: 11, avg. reward: 9.54, loss_i: 14.64, loss_h: 1.505, loss_o: 1.935
Episode: 5510, episode steps: 9, avg. reward: 9.54, loss_i: 12.72, loss_h: 1.186, loss_o: 1.875
Episode: 5511, episode steps: 8, avg. reward: 9.53, loss_i: 12.727, loss_h: 82.174, loss_o: 1.861
Episode: 5512, episode steps: 9, avg. reward: 9.53, loss_i: 12.663, loss_h: 81.258, loss_o: 2.348
Episode: 5513, episode steps: 9, avg. reward: 9.53, loss_i: 12.976, loss_h: 75.09, loss_o: 2.362
Episode: 5514, episode steps: 9, avg. reward: 9.53, loss_i: 13.251, loss_h: 1.349, loss_o: 1.84
Episode: 5515, episode steps: 9, avg. reward: 9.53, loss_i: 20.001, loss_h: 1.564, loss_o: 2.149
Episode: 5516, episode step

Episode: 5590, episode steps: 11, avg. reward: 9.53, loss_i: 12.799, loss_h: 1.619, loss_o: 1.668
Episode: 5591, episode steps: 10, avg. reward: 9.53, loss_i: 13.062, loss_h: 1.296, loss_o: 2.167
Episode: 5592, episode steps: 11, avg. reward: 9.53, loss_i: 11.927, loss_h: 93.116, loss_o: 2.031
Episode: 5593, episode steps: 10, avg. reward: 9.53, loss_i: 16.721, loss_h: 1.234, loss_o: 2.077
Episode: 5594, episode steps: 9, avg. reward: 9.53, loss_i: 10.151, loss_h: 1.039, loss_o: 2.251
Episode: 5595, episode steps: 9, avg. reward: 9.53, loss_i: 14.438, loss_h: 89.394, loss_o: 2.178
Episode: 5596, episode steps: 10, avg. reward: 9.53, loss_i: 12.428, loss_h: 103.537, loss_o: 2.064
Episode: 5597, episode steps: 10, avg. reward: 9.53, loss_i: 11.982, loss_h: 1.442, loss_o: 2.006
Episode: 5598, episode steps: 9, avg. reward: 9.53, loss_i: 18.156, loss_h: 1.316, loss_o: 1.782
Episode: 5599, episode steps: 8, avg. reward: 9.53, loss_i: 14.603, loss_h: 67.606, loss_o: 2.11
Episode: 5600, episo

Episode: 5673, episode steps: 10, avg. reward: 9.54, loss_i: 12.722, loss_h: 1.193, loss_o: 2.224
Episode: 5674, episode steps: 10, avg. reward: 9.54, loss_i: 12.828, loss_h: 1.293, loss_o: 2.651
Episode: 5675, episode steps: 8, avg. reward: 9.54, loss_i: 17.662, loss_h: 77.943, loss_o: 2.07
Episode: 5676, episode steps: 9, avg. reward: 9.54, loss_i: 13.944, loss_h: 75.115, loss_o: 2.323
Episode: 5677, episode steps: 9, avg. reward: 9.54, loss_i: 16.157, loss_h: 1.246, loss_o: 2.09
Episode: 5678, episode steps: 9, avg. reward: 9.54, loss_i: 13.264, loss_h: 1.26, loss_o: 2.217
Episode: 5679, episode steps: 10, avg. reward: 9.54, loss_i: 12.779, loss_h: 1.321, loss_o: 1.963
Episode: 5680, episode steps: 10, avg. reward: 9.54, loss_i: 11.826, loss_h: 93.936, loss_o: 2.01
Episode: 5681, episode steps: 10, avg. reward: 9.54, loss_i: 14.592, loss_h: 1.278, loss_o: 2.195
Episode: 5682, episode steps: 13, avg. reward: 9.54, loss_i: 13.933, loss_h: 1.257, loss_o: 2.143
Episode: 5683, episode st

Episode: 5757, episode steps: 9, avg. reward: 9.55, loss_i: 17.552, loss_h: 71.85, loss_o: 2.419
Episode: 5758, episode steps: 9, avg. reward: 9.54, loss_i: 11.851, loss_h: 82.773, loss_o: 1.939
Episode: 5759, episode steps: 10, avg. reward: 9.55, loss_i: 14.822, loss_h: 1.459, loss_o: 2.411
Episode: 5760, episode steps: 10, avg. reward: 9.55, loss_i: 14.488, loss_h: 100.492, loss_o: 2.391
Episode: 5761, episode steps: 10, avg. reward: 9.55, loss_i: 12.552, loss_h: 111.923, loss_o: 2.21
Episode: 5762, episode steps: 9, avg. reward: 9.55, loss_i: 19.456, loss_h: 1.592, loss_o: 2.02
Episode: 5763, episode steps: 11, avg. reward: 9.55, loss_i: 14.281, loss_h: 1.371, loss_o: 2.169
Episode: 5764, episode steps: 9, avg. reward: 9.55, loss_i: 15.762, loss_h: 1.47, loss_o: 2.304
Episode: 5765, episode steps: 8, avg. reward: 9.55, loss_i: 13.401, loss_h: 1.524, loss_o: 2.101
Episode: 5766, episode steps: 10, avg. reward: 9.55, loss_i: 14.716, loss_h: 95.633, loss_o: 2.186
Episode: 5767, episode

Episode: 5841, episode steps: 10, avg. reward: 9.55, loss_i: 14.954, loss_h: 1.49, loss_o: 2.255
Episode: 5842, episode steps: 10, avg. reward: 9.55, loss_i: 15.572, loss_h: 110.14, loss_o: 2.272
Episode: 5843, episode steps: 9, avg. reward: 9.55, loss_i: 16.448, loss_h: 91.729, loss_o: 2.281
Episode: 5844, episode steps: 12, avg. reward: 9.55, loss_i: 15.718, loss_h: 1.247, loss_o: 2.513
Episode: 5845, episode steps: 10, avg. reward: 9.56, loss_i: 16.751, loss_h: 83.655, loss_o: 2.651
Episode: 5846, episode steps: 10, avg. reward: 9.56, loss_i: 14.52, loss_h: 1.584, loss_o: 2.411
Episode: 5847, episode steps: 10, avg. reward: 9.56, loss_i: 15.112, loss_h: 1.444, loss_o: 2.224
Episode: 5848, episode steps: 8, avg. reward: 9.55, loss_i: 13.059, loss_h: 1.453, loss_o: 2.328
Episode: 5849, episode steps: 13, avg. reward: 9.56, loss_i: 14.537, loss_h: 1.311, loss_o: 2.149
Episode: 5850, episode steps: 9, avg. reward: 9.56, loss_i: 12.618, loss_h: 1.308, loss_o: 2.131
Episode: 5851, episode

Episode: 5925, episode steps: 9, avg. reward: 9.56, loss_i: 17.184, loss_h: 1.575, loss_o: 2.712
Episode: 5926, episode steps: 9, avg. reward: 9.56, loss_i: 11.537, loss_h: 1.587, loss_o: 1.804
Episode: 5927, episode steps: 10, avg. reward: 9.56, loss_i: 15.239, loss_h: 1.638, loss_o: 2.37
Episode: 5928, episode steps: 12, avg. reward: 9.56, loss_i: 14.378, loss_h: 95.121, loss_o: 2.239
Episode: 5929, episode steps: 9, avg. reward: 9.56, loss_i: 14.577, loss_h: 89.273, loss_o: 2.399
Episode: 5930, episode steps: 9, avg. reward: 9.56, loss_i: 13.658, loss_h: 80.082, loss_o: 2.23
Episode: 5931, episode steps: 12, avg. reward: 9.57, loss_i: 14.779, loss_h: 1.594, loss_o: 1.952
Episode: 5932, episode steps: 10, avg. reward: 9.57, loss_i: 15.914, loss_h: 89.441, loss_o: 1.98
Episode: 5933, episode steps: 9, avg. reward: 9.57, loss_i: 12.598, loss_h: 1.757, loss_o: 2.807
Episode: 5934, episode steps: 9, avg. reward: 9.57, loss_i: 12.893, loss_h: 1.382, loss_o: 2.046
Episode: 5935, episode st

  0%|          | 0/1000 [00:00<?, ?it/s]

Episode: 6000, episode steps: 11, avg. reward: 9.56, loss_i: 13.524, loss_h: 95.654, loss_o: 2.605
--TARGET UPDATED----TARGET UPDATED----TARGET UPDATED--
i: [ True  True  True  True]
h: [ True  True  True  True]
o: [ True]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Train episodes: 1000
--------------------------------------------------------------------------------
LOSS
i: {'critic_1_loss': 13.473601788282394, 'critic_2_loss': 13.573567521843044, 'policy_loss': -19.598610271107066, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
h: {'cnt': 1.0, 'critic_1_loss': 95.65574448555708, 'critic_2_loss': 95.6521777883172, 'policy_loss': -217.4676079750061, 'ent_loss': 0.0, 'alpha': 0.0}
o: {'critic_1_loss': 2.6050987135280264, 'critic_2_loss': 2.6050599163228814, 'policy_loss': -3.6105288808996026, 'ent_loss': 0.0, 'alpha': 0.0, 'cnt': 1.0}
ACTIVATIONS
i: [1.0, 1.0, 1.0, 1.0]
h: [0

100%|██████████| 1000/1000 [01:46<00:00,  9.40it/s]


--------------------------------------------------------------------------------
Test Episodes: 1000, Avg. Reward: 9.53, Max. Reward: 13.0
--------------------------------------------------------------------------------
Episode: 6001, episode steps: 10, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6002, episode steps: 12, avg. reward: 11.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6003, episode steps: 10, avg. reward: 10.67, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6004, episode steps: 8, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6005, episode steps: 9, avg. reward: 9.8, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6006, episode steps: 11, avg. reward: 10.0, loss_i: nan, loss_h: nan, loss_o: nan
Episode: 6007, episode steps: 10, avg. reward: 10.0, loss_i: 496.175, loss_h: nan, loss_o: 21.427
Episode: 6008, episode steps: 9, avg. reward: 9.88, loss_i: 16.203, loss_h: nan, loss_o: 2.203
Episode: 6009, episode steps: 9, avg. rewar

Episode: 6084, episode steps: 9, avg. reward: 9.52, loss_i: 12.887, loss_h: 76.813, loss_o: 1.929
Episode: 6085, episode steps: 9, avg. reward: 9.52, loss_i: 21.632, loss_h: 1.515, loss_o: 2.721
Episode: 6086, episode steps: 9, avg. reward: 9.51, loss_i: 16.239, loss_h: 1.808, loss_o: 2.085
Episode: 6087, episode steps: 9, avg. reward: 9.51, loss_i: 16.724, loss_h: 111.694, loss_o: 2.594
Episode: 6088, episode steps: 12, avg. reward: 9.53, loss_i: 16.658, loss_h: 1.742, loss_o: 2.1
Episode: 6089, episode steps: 9, avg. reward: 9.53, loss_i: 16.676, loss_h: 83.665, loss_o: 2.36
Episode: 6090, episode steps: 9, avg. reward: 9.52, loss_i: 13.785, loss_h: 1.181, loss_o: 2.336
Episode: 6091, episode steps: 10, avg. reward: 9.53, loss_i: 16.858, loss_h: 120.159, loss_o: 2.005
Episode: 6092, episode steps: 11, avg. reward: 9.54, loss_i: 12.733, loss_h: 124.351, loss_o: 2.095
Episode: 6093, episode steps: 9, avg. reward: 9.54, loss_i: 15.283, loss_h: 1.72, loss_o: 2.371
Episode: 6094, episode 

Episode: 6169, episode steps: 9, avg. reward: 9.43, loss_i: 12.399, loss_h: 1.645, loss_o: 2.327
Episode: 6170, episode steps: 10, avg. reward: 9.44, loss_i: 15.042, loss_h: 1.743, loss_o: 2.294
Episode: 6171, episode steps: 11, avg. reward: 9.44, loss_i: 16.337, loss_h: 1.351, loss_o: 2.967
Episode: 6172, episode steps: 10, avg. reward: 9.45, loss_i: 12.28, loss_h: 1.625, loss_o: 2.596
Episode: 6173, episode steps: 10, avg. reward: 9.45, loss_i: 20.612, loss_h: 1.752, loss_o: 2.766
Episode: 6174, episode steps: 10, avg. reward: 9.45, loss_i: 16.436, loss_h: 1.313, loss_o: 2.183
Episode: 6175, episode steps: 10, avg. reward: 9.46, loss_i: 15.327, loss_h: 1.55, loss_o: 2.203
Episode: 6176, episode steps: 10, avg. reward: 9.46, loss_i: 13.303, loss_h: 1.847, loss_o: 2.508
Episode: 6177, episode steps: 10, avg. reward: 9.46, loss_i: 13.978, loss_h: 1.516, loss_o: 2.928
Episode: 6178, episode steps: 11, avg. reward: 9.47, loss_i: 17.046, loss_h: 96.897, loss_o: 2.096
Episode: 6179, episode

Episode: 6253, episode steps: 10, avg. reward: 9.53, loss_i: 12.099, loss_h: 1.572, loss_o: 2.228
Episode: 6254, episode steps: 9, avg. reward: 9.52, loss_i: 13.332, loss_h: 71.031, loss_o: 1.989
Episode: 6255, episode steps: 10, avg. reward: 9.53, loss_i: 11.643, loss_h: 104.106, loss_o: 2.552
Episode: 6256, episode steps: 9, avg. reward: 9.52, loss_i: 12.024, loss_h: 87.16, loss_o: 2.245
Episode: 6257, episode steps: 10, avg. reward: 9.53, loss_i: 16.395, loss_h: 89.514, loss_o: 2.124
Episode: 6258, episode steps: 9, avg. reward: 9.52, loss_i: 11.347, loss_h: 77.562, loss_o: 1.957
Episode: 6259, episode steps: 10, avg. reward: 9.53, loss_i: 19.559, loss_h: 1.819, loss_o: 2.406
Episode: 6260, episode steps: 10, avg. reward: 9.53, loss_i: 14.319, loss_h: 1.457, loss_o: 2.239
Episode: 6261, episode steps: 9, avg. reward: 9.52, loss_i: 13.698, loss_h: 1.656, loss_o: 2.125
Episode: 6262, episode steps: 11, avg. reward: 9.53, loss_i: 13.84, loss_h: 125.298, loss_o: 2.289
Episode: 6263, epi

Episode: 6337, episode steps: 10, avg. reward: 9.56, loss_i: 13.893, loss_h: 1.834, loss_o: 2.456
Episode: 6338, episode steps: 10, avg. reward: 9.57, loss_i: 10.629, loss_h: 1.39, loss_o: 2.129
Episode: 6339, episode steps: 9, avg. reward: 9.56, loss_i: 10.81, loss_h: 88.331, loss_o: 2.768
Episode: 6340, episode steps: 9, avg. reward: 9.56, loss_i: 12.501, loss_h: 78.548, loss_o: 2.498
Episode: 6341, episode steps: 9, avg. reward: 9.56, loss_i: 14.944, loss_h: 1.44, loss_o: 2.394
Episode: 6342, episode steps: 9, avg. reward: 9.56, loss_i: 12.935, loss_h: 1.587, loss_o: 2.477
Episode: 6343, episode steps: 9, avg. reward: 9.56, loss_i: 16.559, loss_h: 71.197, loss_o: 2.158
Episode: 6344, episode steps: 9, avg. reward: 9.56, loss_i: 17.873, loss_h: 97.106, loss_o: 2.002
Episode: 6345, episode steps: 9, avg. reward: 9.55, loss_i: 16.204, loss_h: 1.318, loss_o: 2.547
Episode: 6346, episode steps: 10, avg. reward: 9.55, loss_i: 15.024, loss_h: 1.519, loss_o: 1.955
Episode: 6347, episode ste

Episode: 6420, episode steps: 9, avg. reward: 9.55, loss_i: 10.622, loss_h: 1.153, loss_o: 2.134
Episode: 6421, episode steps: 9, avg. reward: 9.55, loss_i: 13.035, loss_h: 1.353, loss_o: 1.993
Episode: 6422, episode steps: 9, avg. reward: 9.55, loss_i: 12.911, loss_h: 1.282, loss_o: 2.175
Episode: 6423, episode steps: 9, avg. reward: 9.55, loss_i: 14.356, loss_h: 1.369, loss_o: 2.364
Episode: 6424, episode steps: 10, avg. reward: 9.55, loss_i: 13.101, loss_h: 1.554, loss_o: 2.153
Episode: 6425, episode steps: 10, avg. reward: 9.55, loss_i: 13.979, loss_h: 88.246, loss_o: 2.455
Episode: 6426, episode steps: 9, avg. reward: 9.55, loss_i: 12.483, loss_h: 1.439, loss_o: 2.182
Episode: 6427, episode steps: 10, avg. reward: 9.55, loss_i: 13.868, loss_h: 1.161, loss_o: 2.096
Episode: 6428, episode steps: 8, avg. reward: 9.55, loss_i: 10.496, loss_h: 1.196, loss_o: 2.161
Episode: 6429, episode steps: 8, avg. reward: 9.55, loss_i: 13.031, loss_h: 1.084, loss_o: 2.285
Episode: 6430, episode ste

Episode: 6505, episode steps: 10, avg. reward: 9.53, loss_i: 13.641, loss_h: 1.137, loss_o: 2.234
Episode: 6506, episode steps: 9, avg. reward: 9.53, loss_i: 11.458, loss_h: 1.212, loss_o: 2.219
Episode: 6507, episode steps: 10, avg. reward: 9.53, loss_i: 16.616, loss_h: 102.793, loss_o: 2.032
Episode: 6508, episode steps: 8, avg. reward: 9.52, loss_i: 13.551, loss_h: 1.159, loss_o: 2.851
Episode: 6509, episode steps: 9, avg. reward: 9.52, loss_i: 19.383, loss_h: 85.655, loss_o: 2.225
Episode: 6510, episode steps: 9, avg. reward: 9.52, loss_i: 15.043, loss_h: 1.25, loss_o: 2.129
Episode: 6511, episode steps: 8, avg. reward: 9.52, loss_i: 14.243, loss_h: 72.534, loss_o: 2.362
Episode: 6512, episode steps: 9, avg. reward: 9.52, loss_i: 12.643, loss_h: 85.509, loss_o: 2.43
Episode: 6513, episode steps: 12, avg. reward: 9.52, loss_i: 13.364, loss_h: 89.783, loss_o: 2.177
Episode: 6514, episode steps: 8, avg. reward: 9.52, loss_i: 14.475, loss_h: 1.201, loss_o: 2.605
Episode: 6515, episode 

Episode: 6589, episode steps: 9, avg. reward: 9.53, loss_i: 14.517, loss_h: 1.2, loss_o: 2.58
Episode: 6590, episode steps: 8, avg. reward: 9.53, loss_i: 15.172, loss_h: 53.908, loss_o: 2.613
Episode: 6591, episode steps: 10, avg. reward: 9.53, loss_i: 16.549, loss_h: 1.098, loss_o: 2.179
Episode: 6592, episode steps: 8, avg. reward: 9.53, loss_i: 17.183, loss_h: 1.464, loss_o: 2.606
Episode: 6593, episode steps: 9, avg. reward: 9.52, loss_i: 13.467, loss_h: 1.378, loss_o: 2.694
Episode: 6594, episode steps: 10, avg. reward: 9.53, loss_i: 15.367, loss_h: 1.261, loss_o: 2.288
Episode: 6595, episode steps: 9, avg. reward: 9.52, loss_i: 16.511, loss_h: 86.858, loss_o: 2.567
Episode: 6596, episode steps: 9, avg. reward: 9.52, loss_i: 13.192, loss_h: 1.178, loss_o: 2.256
Episode: 6597, episode steps: 10, avg. reward: 9.52, loss_i: 16.263, loss_h: 1.135, loss_o: 2.461
Episode: 6598, episode steps: 9, avg. reward: 9.52, loss_i: 18.587, loss_h: 1.22, loss_o: 2.274
Episode: 6599, episode steps: