# Init Bionic VTOL

In [1]:
include("../Flyonic.jl");
using .Flyonic;

using Rotations; # used for initial position

using ReinforcementLearning;
using StableRNGs;
using Flux;
using Flux.Losses;
using Random;
using IntervalSets;
using LinearAlgebra;
using Distributions;

using Plots;
using Statistics;

using BSON: @save, @load # save mode

In [2]:
create_visualization();

┌ Info: MeshCat server started. You can open the visualizer by visiting the following URL in your browser:
│ http://127.0.0.1:8700
└ @ MeshCat /Users/leonardoigler/.julia/packages/MeshCat/Ax8pH/src/visualizer.jl:73


In [3]:
# indicates how many threads Julia was started with. This is important for the multi-threaded environment
Threads.nthreads()

1

In [4]:
eth_vtol_param["gravity"] = 0;#9.81;

In [5]:
DESIRED_x = [-4.0, 0.0, 4.0] # desired distance    
angle = -pi/8.0 # 
DESIRED_R = Matrix(UnitQuaternion(RotY(angle)*RotX(pi/2.0)*RotZ(pi/2.0)))

create_VTOL("fixgoal", actuators = false, color_vec=[1.0; 0.0; 0.0; 1.0]);
set_transform("fixgoal", DESIRED_x ,QuatRotation(DESIRED_R)); 

# Create Reinforcement Learning Environment

In [6]:
mutable struct VtolEnv{A,T,ACT,R<:AbstractRNG} <: AbstractEnv # Parametric Constructor for a subtype of AbstractEnv
    action_space::A
    observation_space::Space{Vector{ClosedInterval{T}}}
    state::Vector{T}
    action::ACT
    done::Bool
    t::T
    rng::R

    name::String #for multible environoments
    visualization::Bool
    realtime::Bool
    
    # Everything you need aditionaly can also go in here.
    x_W::Vector{T}
    v_B::Vector{T}
    R_W::Matrix{T}
    ω_B::Vector{T}
    wind_W::Vector{T}
    Δt::T
    
    # Bonus / Target
    x_d_W::Vector{T}
    R_d_W::Matrix{T}
    covered_line::T
    previously_covered_line::T
end

In [7]:
# define a keyword-based constructor for the type declared in the mutable struct typedef. 
# It could also be done with the macro Base.@kwdef.
function VtolEnv(;
     
    #continuous = true,
    rng = Random.GLOBAL_RNG, # Random number generation
    name = "vtol",
    visualization = false,
    realtime = false,
    kwargs... # let the function take an arbitrary number of keyword arguments 
)
    
    T = Float64; # explicit type which is used e.g. in state. Cannot be altered due to the poor matrix defininon.

    #action_space = Base.OneTo(21) # 21 discrete positions for the flaps
    
    action_space = Space(
        ClosedInterval{T}[
            0.0..2.0, # propeller 1
            0.0..2.0, # propeller 2
            ], 
    )

    
    state_space = Space( # Three continuous values in state space.
        ClosedInterval{T}[
            
            # If you are not flying horizontally, you can later switch gravitation 
            # back on and counteract it with the rotors as well.
            # In addition, once the drone has flown over its target, 
            # it can "fall down" and does not have to turn around.
            
            # orientate yourself on the state space from the paper
            typemin(T)..typemax(T), # position along x
            typemin(T)..typemax(T), # position along y
            typemin(T)..typemax(T), # orientation along x: REALLY NEEDED?
            typemin(T)..typemax(T), # orientation along z
            typemin(T)..typemax(T), # velocity along x BODY coordinates
            typemin(T)..typemax(T), # velocity along y BODY coordinates
            typemin(T)..typemax(T), # rotational velocity along z BODY coordinates
            
            typemin(T)..typemax(T), # position error along x
            typemin(T)..typemax(T), # position error along y
            # Not used in Paper!!!
            typemin(T)..typemax(T), # target rotation along x (better than angle for neural networks)
            typemin(T)..typemax(T), # target rotation along z (better than angle for neural networks)
            typemin(T)..typemax(T), # The distance along the connecting line which has been covered
            typemin(T)..typemax(T), # The distance along the connecting line which has been previously covered
            
            ], 
    )

    # Calculate random vector & angle

    #################################
    
    if visualization
        create_VTOL(name, actuators = true, color_vec=[1.0; 1.0; 0.6; 1.0]);
    end

    environment = VtolEnv(
        action_space,
        state_space,
        zeros(T, 10), # current state, needs to be extended. 
        rand(action_space),
        false, # episode done ?
        0.0, # time
        rng, # random number generator  
        name,
        visualization,
        realtime,
        zeros(T, 3), # x_W
        zeros(T, 3), # v_B
        #Matrix(UnitQuaternion((RotX(pi)))),
        [1.0 0.0 0.0; 0.0 -1.0 0.0; 0.0 0.0 -1.0], # R_W Float64... so T needs to be Float64
        zeros(T, 3), # ω_B
        zeros(T, 3), # wind_W
        T(0.025), # Δt 
        # TODO Random
        DESIRED_x, # desired distance 
        DESIRED_R, # desired orientation
        0.0,
        0.0,
    )
    
    
    reset!(environment)
    
    return environment
    
end;

Just for explanation:

1. A mutable Struct is created. A struct is a constructor and a constructor is a function that creates new objects.
2. A outer keyword-based constructor method is added for the type declared in the mutable struct typedef before.

So now we have a function with two methods. Julia will decide which method to call by multiple dispatch.

In [8]:
methods(VtolEnv)

# Define the RL interface

In [9]:
Random.seed!(env::VtolEnv, seed) = Random.seed!(env.rng, seed)
RLBase.action_space(env::VtolEnv) = env.action_space
RLBase.state_space(env::VtolEnv) = env.observation_space
RLBase.is_terminated(env::VtolEnv) = env.done
RLBase.state(env::VtolEnv) = env.state

In [10]:
function computeReward(env::VtolEnv{A,T}) where {A,T}
        
    # TODO: Add tolerance for VTOL-Drone
    if norm(env.x_W - env.x_d_W)<1
        distance_goal = exp(-norm(env.x_W - env.x_d_W))*1000
    else
        distance_goal = 0
    end

    limit_rotation = abs(env.ω_B[3])*10

    progress = abs(env.covered_line-norm(env.x_d_W))*10#exp((-(env.covered_line-norm(env.x_d_W))^2)/10)*100
    
    new_progress = (env.covered_line-env.previously_covered_line)*10

    # TODO: Make yourself comfortable with what this is
    difference_angle = 0#exp(-sum((env.R_W[:,1] - env.R_d_W[:,1]).^2))#*10.0 #0 was good
    
    #    difference_angle = exp(-sum(((env.R_W[:,1] - env.R_d_W[:,1]).^2)))*10#*10.0 #0 was good
      
    #print("\ncovered: \n")
    #print(env.covered_line)
    #print("\nnew: \n")
    #print(env.previously_covered_line)
    #print("\n")
    #print("\n")

    #env.previously_covered_line = env.covered_line

    #print("distance_goal: \n")
    #print(distance_goal)
    #print("\nlimit_rotation: \n")
    #print(limit_rotation)
    #print("\nprogress: \n")
    #print(progress)
    #print("\nnew_progress: \n")
    #print(new_progress)
    #print("\ndifference_angle: \n")
    #print(difference_angle)
    #print("\n")
    #print("\n")

    return progress + new_progress + distance_goal - limit_rotation - difference_angle
end


RLBase.reward(env::VtolEnv{A,T}) where {A,T} = computeReward(env)

In [11]:
function RLBase.reset!(env::VtolEnv{A,T}) where {A,T}
    
    # Visualize initial state
    if env.visualization
        set_transform(env.name, env.x_W, QuatRotation(env.R_W));
        set_actuators(env.name, [0.0; 0.0; 0.0; 0.0])
    end
    
    env.x_W = [0.0; 0.0; 0.0];
    env.v_B = [0.0; 0.0; 0.0];
    env.R_W = Matrix(UnitQuaternion(RotX(pi)))#Matrix(UnitQuaternion(RotZ(-pi/2.0)*RotY(-pi/2.0)*RotX(pi)));
    #env.R_W = Matrix(UnitQuaternion(RotX(pi)));
    env.ω_B = [0.0; 0.0; 0.0];
    env.wind_W = [0.0; 0.0; 0.0];

    env.x_d_W = [rand(Uniform(0,10)), rand(Uniform(-10,10)), 0.0]
    #env.angle_d_W = calculateAngle([1.0 ,0.0, 0.0], env.x_d_W) # desired angle
    #x_vec = [1.0; 0.0; 0.0]
    #env.angle_d_W = calculateAngle(x_vec, env.x_d_W) # Should the drone arrive at the target like this or is there another idea behind it?
    angle = calculateAngle([1.0 ,0.0, 0.0], env.x_d_W)
    env.R_d_W = Matrix(UnitQuaternion(RotZ(angle)*RotX(pi)))

    env.covered_line = 0.0
    env.previously_covered_line = 0.0
    
    if env.visualization
        create_VTOL("fixgoal", actuators = false, color_vec=[1.0; 0.0; 0.0; 1.0]);
        set_transform("fixgoal", env.x_d_W ,QuatRotation(env.R_d_W)); 
    end

    env.state = [env.x_W[1];
                 env.x_W[2];
                 env.R_W[1,1];
                 env.R_W[3,1];
                 env.v_B[1];
                 env.v_B[2];
                 env.ω_B[3];
                 env.x_W[1] - env.x_d_W[1];
                 env.x_W[2] - env.x_d_W[2]; # CHANGED TO Y
                 env.R_d_W[1,1];
                 env.R_d_W[3,1];
                 env.covered_line;
                 env.previously_covered_line] 
    
    env.t = 0.0
    env.action = [0.0, 0.0]
    env.done = false
    nothing

end;

In [12]:
R_W = Matrix(UnitQuaternion(RotX(pi)))
Rotations.params(RotYXZ(R_W))


3-element StaticArraysCore.SVector{3, Float64} with indices SOneTo(3):
 3.141592653589793
 1.2246467991473532e-16
 3.141592653589793

In [13]:
# defines a methods for a callable object.
# So when a VtolEnv object is created, it has this method that can be called
function (env::VtolEnv)(a)

    # set the propeller trust and the two flaps 2D case
    next_action = [a[1], a[2], 0.0, 0.0]
   
    _step!(env, next_action)
end

In [14]:
env = VtolEnv();

In [15]:
methods(env) # Just to explain which methods the object has

In [16]:
function _step!(env::VtolEnv, next_action)
    
    # caluclate wind impact
    v_in_wind_B = vtol_add_wind(env.v_B, env.R_W, env.wind_W)
    # caluclate aerodynamic forces
    torque_B, force_B = vtol_model(v_in_wind_B, next_action, eth_vtol_param);
    # Limit to 2D
    force_B[3] = 0.0; # Body Z
    env.v_B[3] = 0.0;
    torque_B[1] = 0.0; torque_B[2] = 0.0;  # Body X and Y
    env.ω_B[1] = 0.0; env.ω_B[2] = 0.0;
    # integrate rigid body dynamics for Δt
    env.x_W, env.v_B, env.R_W, env.ω_B, time = rigid_body_simple(torque_B, force_B, env.x_W, env.v_B, env.R_W, env.ω_B, env.t, env.Δt, eth_vtol_param)
    env.covered_line = dot(env.x_W, env.x_d_W)/(norm(env.x_d_W))
    
    if env.realtime
        sleep(env.Δt) # TODO: just a dirty hack. this is of course slower than real time.
    end

    # Visualize the new state 
    if env.visualization
        set_transform(env.name, env.x_W, QuatRotation(env.R_W));
        set_actuators(env.name, next_action)
    end
 
    env.t += env.Δt
    
    # State space
    #rot = Rotations.params(RotYXZ(env.R_W))[3]
    #env.state[1] = env.x_W[1] # world position in x
    #env.state[2] = env.ω_B[2] # world position in y
    #env.state[3] = rot # rotation around z
    #rot = Rotations.params(RotYXZ(env.R_W))[1]
    
    
    env.state[1] = env.x_W[1]; # position along x
    env.state[2] = env.x_W[2];  # position along y
    env.state[3] = env.R_W[1,1];    # orientation along x
    env.state[4] = env.R_W[3,1];   # orientation along z
    env.state[5] = env.v_B[1];  # velocity along x BODY coordinates
    env.state[6] = env.v_B[2];  # velocity along y BODY coordinates
    env.state[7] = env.ω_B[3];  # rotational velocity along z BODY coordinates
    env.state[8] = env.x_W[1] - env.x_d_W[1];   # position error along x
    env.state[9] = env.x_W[2] - env.x_d_W[2];   # position error along y
    env.state[10] = env.R_d_W[1,1];  # target rotation along x (better than angle for neural networks)
    env.state[11] = env.R_d_W[3,1];    # target orientation along z
    env.state[12] = env.covered_line    # Covered distance along line after step
    env.state[13] = env.previously_covered_line # Covered distance along line before step
    
    #print("\nSTEP: \n")
    # Termination criteria
    # TODO: Use many termination criteria so that you do not train unnecessarily in wrong areas
    env.done = #true

        # TODO Zu lang
        # TODO Zu weit weg
        # TODO Ziel erreicht

        # After time... How fast is drone+Range of desired point
        # After reaching position (circle of r_tol)
        norm(env.ω_B) > 50.0 || 
        norm(env.v_B) > 50.0 || # stop if body is too fast
        #env.x_W[3] < -10.0 || # stop if body is below -10m
        #0.0 > rot || # Stop if the drone is pitched 90°.
        #rot > pi || # Stop if the drone is pitched 90°.
        sum((env.x_W - env.x_d_W).^2) < 0.1 ||
        sum((env.x_W - env.x_d_W).^2) > 20.0||
        env.t > 5.0 # stop after 10s
    nothing
end;

In [17]:
RLBase.test_runnable!(env)

[0m[1mTest Summary:              | [22m[32m[1mPass  [22m[39m[36m[1mTotal  [22m[39m[0m[1mTime[22m
random policy with VtolEnv | [32m2000  [39m[36m 2000  [39m[0m0.9s


Test.DefaultTestSet("random policy with VtolEnv", Any[], 2000, false, false, true, 1.668960204513955e9, 1.668960205461051e9)

Show an overview of the environment.

# Setup of a reinforcement learning experiment.

In [18]:
seed = 123    
rng = StableRNG(seed)
    N_ENV = 8
    UPDATE_FREQ = 1024
    
    
    # define multiple environments for parallel training
    env = MultiThreadEnv([
        # use different names for the visualization
        VtolEnv(; rng = StableRNG(hash(seed+i)), name = "vtol$i") for i in 1:N_ENV
    ])

MultiThreadEnv(8 x VtolEnv)

In [19]:
# Define the function approximator
    ns, na = length(state(env[1])), length(action_space(env[1]))
    approximator = ActorCritic(
                actor = GaussianNetwork(
                    pre = Chain(
                    Dense(ns, 64, relu; initW = glorot_uniform(rng)),#
                    Dense(64, 64, relu; initW = glorot_uniform(rng)),
                    ),
                    μ = Chain(Dense(64, na; initW = glorot_uniform(rng))),
                    logσ = Chain(Dense(64, na; initW = glorot_uniform(rng))),
                ),
                critic = Chain(
                    Dense(ns, 64, relu; initW = glorot_uniform(rng)),
                    Dense(64, 64, relu; initW = glorot_uniform(rng)),
                    Dense(64, 1; initW = glorot_uniform(rng)),
                ),
                optimizer = ADAM(1e-3),
            );

In [20]:
    agent = Agent( # A wrapper of an AbstractPolicy
        # AbstractPolicy: the policy to use
        policy = PPOPolicy(;
                    approximator = approximator |> gpu,
                    update_freq=UPDATE_FREQ,
                    dist = Normal,
                    # For parameters visit the docu: https://juliareinforcementlearning.org/docs/rlzoo/#ReinforcementLearningZoo.PPOPolicy
                    ),
        
        # AbstractTrajectory: used to store transitions between an agent and an environment source
        trajectory = PPOTrajectory(;
            capacity = UPDATE_FREQ,
            state = Matrix{Float64} => (ns, N_ENV),
            action = Matrix{Float64} => (na, N_ENV),
            action_log_prob = Vector{Float64} => (N_ENV,),
            reward = Vector{Float64} => (N_ENV,),
            terminal = Vector{Bool} => (N_ENV,),
        ),
    );


┌ Info: The GPU function is being called but the GPU is not accessible. 
│ Defaulting back to the CPU. (No action is required if you want to run on the CPU).
└ @ Flux /Users/leonardoigler/.julia/packages/Flux/7nTyc/src/functor.jl:187


In [21]:
function saveModel(t, agent, env)
    model = cpu(agent.policy.approximator)   
    f = joinpath("./RL_models/", "vtol_2D_ppo_$t.bson")
    @save f model
    println("parameters at step $t saved to $f")
end;

In [22]:
function loadModel()
    f = joinpath("./RL_models/", "vtol_2D_ppo_3100000.bson")
    @load f model
    return model
end;

In [23]:
function validate_policy(t, agent, env)
    run(agent.policy, test_env, StopAfterEpisode(1), episode_test_reward_hook)
    # the result of the hook
    println("test reward at step $t: $(mean(episode_test_reward_hook.rewards))")
    
end;

episode_test_reward_hook = TotalRewardPerEpisode(;is_display_on_exit=false)
# create a env only for reward test
test_env = VtolEnv(;name = "testVTOL", visualization = true, realtime = true);

In [24]:
#agent.policy.approximator = loadModel();

In [25]:
ReinforcementLearning.run(
    agent,
    env,
    StopAfterStep(4_000_000),
    ComposedHook(
        DoEveryNStep(saveModel, n=100_000_000), 
        DoEveryNStep(validate_policy, n=10_000)),
)


[32mProgress:   0%|                                         |  ETA: 78.12 days[39m[K

[32mProgress:   0%|▏                                        |  ETA: 3:47:08[39m[K

test reward at step 10000: 10567.193777839386


[32mProgress:   0%|▏                                        |  ETA: 3:17:33[39m[K

[32mProgress:   0%|▏                                        |  ETA: 2:56:49[39m[K

[32mProgress:   0%|▏                                        |  ETA: 2:39:11[39m[K

[32mProgress:   0%|▏                                        |  ETA: 2:25:57[39m[K

[32mProgress:   0%|▎                                        |  ETA: 2:14:25[39m[K

test reward at step 20000: 5449.713787500369


[32mProgress:   1%|▎                                        |  ETA: 2:27:56[39m[K

[32mProgress:   1%|▎                                        |  ETA: 2:17:28[39m[K

[32mProgress:   1%|▎                                        |  ETA: 2:10:39[39m[K

[32mProgress:   1%|▎                                        |  ETA: 2:04:51[39m[K

[32mProgress:   1%|▎                                        |  ETA: 1:58:01[39m[K

test reward at step 30000: -4025.6537761373747


[32mProgress:   1%|▍                                        |  ETA: 2:04:54[39m[K

[32mProgress:   1%|▍                                        |  ETA: 1:57:30[39m[K

[32mProgress:   1%|▍                                        |  ETA: 1:52:42[39m[K

[32mProgress:   1%|▍                                        |  ETA: 1:48:18[39m[K

test reward at step 40000: -2169.722446095856


[32mProgress:   1%|▍                                        |  ETA: 1:53:19[39m[K

[32mProgress:   1%|▌                                        |  ETA: 1:47:15[39m[K

[32mProgress:   1%|▌                                        |  ETA: 1:44:06[39m[K

[32mProgress:   1%|▌                                        |  ETA: 1:41:13[39m[K

[32mProgress:   1%|▌                                        |  ETA: 1:38:36[39m[K

test reward at step 50000: -4831.700910216838


[32mProgress:   1%|▌                                        |  ETA: 1:43:12[39m[K

[32mProgress:   1%|▌                                        |  ETA: 1:41:10[39m[K

[32mProgress:   1%|▌                                        |  ETA: 1:38:50[39m[K

[32mProgress:   1%|▋                                        |  ETA: 1:36:41[39m[K

[32mProgress:   1%|▋                                        |  ETA: 1:34:38[39m[K

test reward at step 60000: -2304.4149952804587


[32mProgress:   2%|▋                                        |  ETA: 1:38:35[39m[K

[32mProgress:   2%|▋                                        |  ETA: 1:36:51[39m[K

[32mProgress:   2%|▋                                        |  ETA: 1:35:11[39m[K

[32mProgress:   2%|▋                                        |  ETA: 1:33:13[39m[K

[32mProgress:   2%|▊                                        |  ETA: 1:31:28[39m[K

[32mProgress:   2%|▊                                        |  ETA: 1:29:59[39m[K

test reward at step 70000: -798.2247619153748


[32mProgress:   2%|▊                                        |  ETA: 1:33:30[39m[K

[32mProgress:   2%|▊                                        |  ETA: 1:31:55[39m[K

[32mProgress:   2%|▊                                        |  ETA: 1:30:33[39m[K

[32mProgress:   2%|▊                                        |  ETA: 1:29:14[39m[K

[32mProgress:   2%|▊                                        |  ETA: 1:27:49[39m[K

test reward at step 80000: 1601.4912796864662


[32mProgress:   2%|▉                                        |  ETA: 1:30:51[39m[K

[32mProgress:   2%|▉                                        |  ETA: 1:29:18[39m[K

[32mProgress:   2%|▉                                        |  ETA: 1:28:04[39m[K

[32mProgress:   2%|▉                                        |  ETA: 1:26:41[39m[K

[32mProgress:   2%|▉                                        |  ETA: 1:25:31[39m[K

test reward at step 90000: 3258.363225487565


[32mProgress:   2%|▉                                        |  ETA: 1:28:17[39m[K

[32mProgress:   2%|█                                        |  ETA: 1:27:18[39m[K

[32mProgress:   2%|█                                        |  ETA: 1:26:24[39m[K

[32mProgress:   2%|█                                        |  ETA: 1:25:05[39m[K

[32mProgress:   2%|█                                        |  ETA: 1:24:04[39m[K

test reward at step 100000: 2897.3758116126314


[32mProgress:   3%|█                                        |  ETA: 1:26:43[39m[K

[32mProgress:   3%|█                                        |  ETA: 1:25:51[39m[K

[32mProgress:   3%|█▏                                       |  ETA: 1:24:53[39m[K

[32mProgress:   3%|█▏                                       |  ETA: 1:24:01[39m[K

[32mProgress:   3%|█▏                                       |  ETA: 1:23:02[39m[K

test reward at step 110000: 4785.995665511516


[32mProgress:   3%|█▏                                       |  ETA: 1:25:22[39m[K

[32mProgress:   3%|█▏                                       |  ETA: 1:25:02[39m[K

[32mProgress:   3%|█▏                                       |  ETA: 1:24:11[39m[K

[32mProgress:   3%|█▏                                       |  ETA: 1:23:21[39m[K

[32mProgress:   3%|█▎                                       |  ETA: 1:22:40[39m[K

[32mProgress:   3%|█▎                                       |  ETA: 1:22:04[39m[K

test reward at step 120000: 6866.510203392728


[32mProgress:   3%|█▎                                       |  ETA: 1:24:20[39m[K

[32mProgress:   3%|█▎                                       |  ETA: 1:23:47[39m[K

[32mProgress:   3%|█▎                                       |  ETA: 1:23:01[39m[K

[32mProgress:   3%|█▎                                       |  ETA: 1:22:11[39m[K

[32mProgress:   3%|█▎                                       |  ETA: 1:21:30[39m[K

[32mProgress:   3%|█▍                                       |  ETA: 1:23:26[39m[K

test reward at step 130000: 9862.796890304991


[32mProgress:   3%|█▍                                       |  ETA: 1:22:53[39m[K

[32mProgress:   3%|█▍                                       |  ETA: 1:22:18[39m[K

[32mProgress:   3%|█▍                                       |  ETA: 1:21:32[39m[K

[32mProgress:   3%|█▍                                       |  ETA: 1:20:49[39m[K

[32mProgress:   3%|█▍                                       |  ETA: 1:20:06[39m[K

test reward at step 140000: 10193.435030515075


[32mProgress:   4%|█▌                                       |  ETA: 1:22:01[39m[K

[32mProgress:   4%|█▌                                       |  ETA: 1:21:19[39m[K

[32mProgress:   4%|█▌                                       |  ETA: 1:20:39[39m[K

[32mProgress:   4%|█▌                                       |  ETA: 1:19:57[39m[K

[32mProgress:   4%|█▌                                       |  ETA: 1:19:17[39m[K

test reward at step 150000: 10507.216011132616


[32mProgress:   4%|█▌                                       |  ETA: 1:21:04[39m[K

[32mProgress:   4%|█▋                                       |  ETA: 1:20:50[39m[K

[32mProgress:   4%|█▋                                       |  ETA: 1:20:17[39m[K

[32mProgress:   4%|█▋                                       |  ETA: 1:19:40[39m[K

[32mProgress:   4%|█▋                                       |  ETA: 1:19:09[39m[K

test reward at step 160000: 10546.59152295955


[32mProgress:   4%|█▋                                       |  ETA: 1:20:59[39m[K

[32mProgress:   4%|█▋                                       |  ETA: 1:20:14[39m[K

[32mProgress:   4%|█▋                                       |  ETA: 1:19:42[39m[K

[32mProgress:   4%|█▊                                       |  ETA: 1:19:15[39m[K

[32mProgress:   4%|█▊                                       |  ETA: 1:18:40[39m[K

[32mProgress:   4%|█▊                                       |  ETA: 1:20:15[39m[K

test reward at step 170000: 10796.6006717445


[32mProgress:   4%|█▊                                       |  ETA: 1:19:25[39m[K

[32mProgress:   4%|█▊                                       |  ETA: 1:18:53[39m[K

[32mProgress:   4%|█▊                                       |  ETA: 1:18:31[39m[K

[32mProgress:   4%|█▉                                       |  ETA: 1:18:00[39m[K

test reward at step 180000: 10951.277751196845


[32mProgress:   5%|█▉                                       |  ETA: 1:19:32[39m[K

[32mProgress:   5%|█▉                                       |  ETA: 1:19:00[39m[K

[32mProgress:   5%|█▉                                       |  ETA: 1:18:30[39m[K

[32mProgress:   5%|█▉                                       |  ETA: 1:17:58[39m[K

[32mProgress:   5%|█▉                                       |  ETA: 1:17:30[39m[K

test reward at step 190000: 11576.550250427606


[32mProgress:   5%|██                                       |  ETA: 1:18:54[39m[K

[32mProgress:   5%|██                                       |  ETA: 1:18:18[39m[K

[32mProgress:   5%|██                                       |  ETA: 1:17:46[39m[K

[32mProgress:   5%|██                                       |  ETA: 1:17:23[39m[K

[32mProgress:   5%|██                                       |  ETA: 1:16:54[39m[K

test reward at step 200000: 13729.058620175749


[32mProgress:   5%|██                                       |  ETA: 1:18:19[39m[K

[32mProgress:   5%|██▏                                      |  ETA: 1:17:55[39m[K

[32mProgress:   5%|██▏                                      |  ETA: 1:17:29[39m[K

[32mProgress:   5%|██▏                                      |  ETA: 1:17:09[39m[K

[32mProgress:   5%|██▏                                      |  ETA: 1:16:48[39m[K

[32mProgress:   5%|██▏                                      |  ETA: 1:16:28[39m[K

test reward at step 210000: 12322.024876876012


[32mProgress:   5%|██▏                                      |  ETA: 1:17:45[39m[K

[32mProgress:   5%|██▎                                      |  ETA: 1:17:13[39m[K

[32mProgress:   5%|██▎                                      |  ETA: 1:16:45[39m[K

[32mProgress:   5%|██▎                                      |  ETA: 1:16:19[39m[K

[32mProgress:   6%|██▎                                      |  ETA: 1:17:30[39m[K

test reward at step 220000: 12788.916696850343


[32mProgress:   6%|██▎                                      |  ETA: 1:17:09[39m[K

[32mProgress:   6%|██▎                                      |  ETA: 1:16:55[39m[K

[32mProgress:   6%|██▎                                      |  ETA: 1:16:29[39m[K

[32mProgress:   6%|██▍                                      |  ETA: 1:16:08[39m[K

[32mProgress:   6%|██▍                                      |  ETA: 1:15:42[39m[K

test reward at step 230000: 12320.359525124602


[32mProgress:   6%|██▍                                      |  ETA: 1:16:51[39m[K

[32mProgress:   6%|██▍                                      |  ETA: 1:16:34[39m[K

[32mProgress:   6%|██▍                                      |  ETA: 1:16:08[39m[K

[32mProgress:   6%|██▍                                      |  ETA: 1:15:48[39m[K

[32mProgress:   6%|██▌                                      |  ETA: 1:15:28[39m[K

test reward at step 240000: 11784.6664958969


[32mProgress:   6%|██▌                                      |  ETA: 1:16:32[39m[K

[32mProgress:   6%|██▌                                      |  ETA: 1:16:18[39m[K

[32mProgress:   6%|██▌                                      |  ETA: 1:15:54[39m[K

[32mProgress:   6%|██▌                                      |  ETA: 1:15:34[39m[K

[32mProgress:   6%|██▌                                      |  ETA: 1:15:11[39m[K

[32mProgress:   6%|██▋                                      |  ETA: 1:16:11[39m[K

test reward at step 250000: 12073.40538126666


[32mProgress:   6%|██▋                                      |  ETA: 1:15:36[39m[K

[32mProgress:   6%|██▋                                      |  ETA: 1:15:16[39m[K

[32mProgress:   6%|██▋                                      |  ETA: 1:14:51[39m[K

[32mProgress:   6%|██▋                                      |  ETA: 1:14:32[39m[K

test reward at step 260000: 12464.533683785


[32mProgress:   7%|██▋                                      |  ETA: 1:15:31[39m[K

[32mProgress:   7%|██▊                                      |  ETA: 1:15:16[39m[K

[32mProgress:   7%|██▊                                      |  ETA: 1:14:58[39m[K

[32mProgress:   7%|██▊                                      |  ETA: 1:14:39[39m[K

[32mProgress:   7%|██▊                                      |  ETA: 1:14:18[39m[K

[32mProgress:   7%|██▊                                      |  ETA: 1:15:14[39m[K

test reward at step 270000: 12701.39491451577


[32mProgress:   7%|██▊                                      |  ETA: 1:14:57[39m[K

[32mProgress:   7%|██▊                                      |  ETA: 1:14:44[39m[K

[32mProgress:   7%|██▉                                      |  ETA: 1:14:26[39m[K

[32mProgress:   7%|██▉                                      |  ETA: 1:14:15[39m[K

[32mProgress:   7%|██▉                                      |  ETA: 1:14:07[39m[K

[32mProgress:   7%|██▉                                      |  ETA: 1:13:58[39m[K

test reward at step 280000: 12461.17735290215


[32mProgress:   7%|██▉                                      |  ETA: 1:15:00[39m[K

[32mProgress:   7%|██▉                                      |  ETA: 1:14:41[39m[K

[32mProgress:   7%|██▉                                      |  ETA: 1:14:28[39m[K

[32mProgress:   7%|███                                      |  ETA: 1:14:11[39m[K

[32mProgress:   7%|███                                      |  ETA: 1:13:52[39m[K

test reward at step 290000: 12522.863054667348


[32mProgress:   7%|███                                      |  ETA: 1:14:46[39m[K

[32mProgress:   7%|███                                      |  ETA: 1:14:22[39m[K

[32mProgress:   7%|███                                      |  ETA: 1:13:59[39m[K

[32mProgress:   7%|███                                      |  ETA: 1:13:41[39m[K

[32mProgress:   7%|███▏                                     |  ETA: 1:13:25[39m[K

test reward at step 300000: 13011.939549889299


[32mProgress:   8%|███▏                                     |  ETA: 1:14:15[39m[K

[32mProgress:   8%|███▏                                     |  ETA: 1:14:01[39m[K

[32mProgress:   8%|███▏                                     |  ETA: 1:13:46[39m[K

[32mProgress:   8%|███▏                                     |  ETA: 1:13:29[39m[K

[32mProgress:   8%|███▏                                     |  ETA: 1:13:10[39m[K

test reward at step 310000: 13209.189244140543


[32mProgress:   8%|███▏                                     |  ETA: 1:14:00[39m[K

[32mProgress:   8%|███▎                                     |  ETA: 1:13:40[39m[K

[32mProgress:   8%|███▎                                     |  ETA: 1:13:21[39m

[K

[32mProgress:   8%|███▎                                     |  ETA: 1:13:04[39m[K

[32mProgress:   8%|███▎                                     |  ETA: 1:12:47[39m[K

[32mProgress:   8%|███▎                                     |  ETA: 1:13:33[39m[K

test reward at step 320000: 13550.633826728324


[32mProgress:   8%|███▎                                     |  ETA: 1:13:18[39m[K

[32mProgress:   8%|███▍                                     |  ETA: 1:13:03[39m[K

[32mProgress:   8%|███▍                                     |  ETA: 1:12:47[39m[K

[32mProgress:   8%|███▍                                     |  ETA: 1:12:30[39m[K

[32mProgress:   8%|███▍                                     |  ETA: 1:12:13[39m[K

test reward at step 330000: 13923.175799392584


[32mProgress:   8%|███▍                                     |  ETA: 1:12:57[39m[K

[32mProgress:   8%|███▍                                     |  ETA: 1:12:44[39m[K

[32mProgress:   8%|███▍                                     |  ETA: 1:12:29[39m[K

[32mProgress:   8%|███▌                                     |  ETA: 1:12:14[39m[K

[32mProgress:   8%|███▌                                     |  ETA: 1:11:56[39m[K

test reward at step 340000: 14459.321370334852


[32mProgress:   9%|███▌                                     |  ETA: 1:12:40[39m[K

[32mProgress:   9%|███▌                                     |  ETA: 1:12:23[39m[K

[32mProgress:   9%|███▌                                     |  ETA: 1:12:04[39m[K

[32mProgress:   9%|███▌                                     |  ETA: 1:11:49[39m[K

[32mProgress:   9%|███▋                                     |  ETA: 1:11:34[39m[K

test reward at step 350000: 14560.093521057306


[32mProgress:   9%|███▋                                     |  ETA: 1:12:17[39m[K

[32mProgress:   9%|███▋                                     |  ETA: 1:12:05[39m[K

[32mProgress:   9%|███▋                                     |  ETA: 1:11:53[39m[K

[32mProgress:   9%|███▋                                     |  ETA: 1:11:36[39m[K

[32mProgress:   9%|███▋                                     |  ETA: 1:11:22[39m[K

test reward at step 360000: 14691.448280291066


[32mProgress:   9%|███▊                                     |  ETA: 1:12:04[39m[K

[32mProgress:   9%|███▊                                     |  ETA: 1:11:53[39m[K

[32mProgress:   9%|███▊                                     |  ETA: 1:11:41[39m[K

[32mProgress:   9%|███▊                                     |  ETA: 1:11:27[39m[K

[32mProgress:   9%|███▊                                     |  ETA: 1:11:12[39m[K

[32mProgress:   9%|███▊                                     |  ETA: 1:10:57[39m[K

test reward at step 370000: 15100.079411723202


[32mProgress:   9%|███▊                                     |  ETA: 1:11:37[39m[K

[32mProgress:   9%|███▉                                     |  ETA: 1:11:28[39m[K

[32mProgress:   9%|███▉                                     |  ETA: 1:11:13[39m[K

[32mProgress:   9%|███▉                                     |  ETA: 1:11:00[39m[K

[32mProgress:   9%|███▉                                     |  ETA: 1:10:44[39m[K

test reward at step 380000: 15559.51199553688


[32mProgress:  10%|███▉                                     |  ETA: 1:11:24[39m[K

[32mProgress:  10%|███▉                                     |  ETA: 1:11:09[39m[K

[32mProgress:  10%|████                                     |  ETA: 1:10:54[39m[K

[32mProgress:  10%|████                                     |  ETA: 1:10:39[39m[K

[32mProgress:  10%|████                                     |  ETA: 1:10:27[39m[K

test reward at step 390000: 15448.33028922845


[32mProgress:  10%|████                                     |  ETA: 1:11:10[39m[K

[32mProgress:  10%|████                                     |  ETA: 1:10:52[39m[K

[32mProgress:  10%|████                                     |  ETA: 1:10:38[39m[K

[32mProgress:  10%|████▏                                    |  ETA: 1:10:24[39m[K

[32mProgress:  10%|████▏                                    |  ETA: 1:10:09[39m[K

test reward at step 400000: 15677.159465713896


[32mProgress:  10%|████▏                                    |  ETA: 1:10:47[39m[K

[32mProgress:  10%|████▏                                    |  ETA: 1:10:33[39m[K

[32mProgress:  10%|████▏                                    |  ETA: 1:10:19[39m[K

[32mProgress:  10%|████▏                                    |  ETA: 1:10:05[39m[K

[32mProgress:  10%|████▎                                    |  ETA: 1:09:51[39m[K

test reward at step 410000: 15792.709604012618


[32mProgress:  10%|████▎                                    |  ETA: 1:10:27[39m[K

[32mProgress:  10%|████▎                                    |  ETA: 1:10:16[39m[K

[32mProgress:  10%|████▎                                    |  ETA: 1:10:06[39m[K

[32mProgress:  10%|████▎                                    |  ETA: 1:09:52[39m[K

[32mProgress:  10%|████▎                                    |  ETA: 1:09:37[39m[K

test reward at step 420000: 15983.690604684889


[32mProgress:  11%|████▍                                    |  ETA: 1:10:13[39m[K

[32mProgress:  11%|████▍                                    |  ETA: 1:09:57[39m[K

[32mProgress:  11%|████▍                                    |  ETA: 1:09:47[39m[K

[32mProgress:  11%|████▍                                    |  ETA: 1:09:37[39m[K

[32mProgress:  11%|████▍                                    |  ETA: 1:09:26[39m[K

test reward at step 430000: 16096.540446977066


[32mProgress:  11%|████▍                                    |  ETA: 1:10:02[39m[K

[32mProgress:  11%|████▌                                    |  ETA: 1:09:47[39m[K

[32mProgress:  11%|████▌                                    |  ETA: 1:09:34[39m[K

[32mProgress:  11%|████▌                                    |  ETA: 1:09:23[39m[K

[32mProgress:  11%|████▌                                    |  ETA: 1:09:16[39m[K

[32mProgress:  11%|████▌                                    |  ETA: 1:09:52[39m[K

test reward at step 440000: 16126.385105661966


[32mProgress:  11%|████▌                                    |  ETA: 1:09:40[39m[K

[32mProgress:  11%|████▌                                    |  ETA: 1:09:31[39m[K

[32mProgress:  11%|████▋                                    |  ETA: 1:09:20[39m[K

[32mProgress:  11%|████▋                                    |  ETA: 1:09:11[39m[K

[32mProgress:  11%|████▋                                    |  ETA: 1:08:58[39m[K

test reward at step 450000: 15825.348937304903


[32mProgress:  11%|████▋                                    |  ETA: 1:09:31[39m[K

[32mProgress:  11%|████▋                                    |  ETA: 1:09:24[39m[K

[32mProgress:  11%|████▋                                    |  ETA: 1:09:14[39m[K

[32mProgress:  11%|████▋                                    |  ETA: 1:09:03[39m[K

[32mProgress:  11%|████▊                                    |  ETA: 1:08:50[39m[K

test reward at step 460000: 16003.563306123879


[32mProgress:  12%|████▊                                    |  ETA: 1:09:21[39m[K

[32mProgress:  12%|████▊                                    |  ETA: 1:09:14[39m[K

[32mProgress:  12%|████▊                                    |  ETA: 1:09:05[39m[K

[32mProgress:  12%|████▊                                    |  ETA: 1:08:53[39m[K

[32mProgress:  12%|████▊                                    |  ETA: 1:08:40[39m[K

test reward at step 470000: 16654.83117263174


[32mProgress:  12%|████▉                                    |  ETA: 1:09:10[39m[K

[32mProgress:  12%|████▉                                    |  ETA: 1:08:59[39m[K

[32mProgress:  12%|████▉                                    |  ETA: 1:08:46[39m[K

[32mProgress:  12%|████▉                                    |  ETA: 1:08:35[39m[K

[32mProgress:  12%|████▉                                    |  ETA: 1:08:23[39m[K

[32mProgress:  12%|████▉                                    |  ETA: 1:08:53[39m[K

test reward at step 480000: 16758.624699811935


[32mProgress:  12%|█████                                    |  ETA: 1:08:41[39m[K

[32mProgress:  12%|█████                                    |  ETA: 1:08:30[39m[K

[32mProgress:  12%|█████                                    |  ETA: 1:08:21[39m[K

[32mProgress:  12%|█████                                    |  ETA: 1:08:10[39m[K

[32mProgress:  12%|█████                                    |  ETA: 1:07:59[39m[K

test reward at step 490000: 17319.933493412453


[32mProgress:  12%|█████                                    |  ETA: 1:08:28[39m[K

[32mProgress:  12%|█████                                    |  ETA: 1:08:21[39m[K

[32mProgress:  12%|█████▏                                   |  ETA: 1:08:10[39m[K

[32mProgress:  12%|█████▏                                   |  ETA: 1:08:02[39m[K

[32mProgress:  12%|█████▏                                   |  ETA: 1:07:50[39m[K

test reward at step 500000: 17300.163630831565


[32mProgress:  13%|█████▏                                   |  ETA: 1:08:20[39m[K

[32mProgress:  13%|█████▏                                   |  ETA: 1:08:12[39m[K

[32mProgress:  13%|█████▏                                   |  ETA: 1:07:59[39m[K

[32mProgress:  13%|█████▎                                   |  ETA: 1:07:48[39m[K

[32mProgress:  13%|█████▎                                   |  ETA: 1:07:40[39m[K

test reward at step 510000: 17305.16045123329


[32mProgress:  13%|█████▎                                   |  ETA: 1:08:04[39m[K

[32mProgress:  13%|█████▎                                   |  ETA: 1:07:45[39m[K

[32mProgress:  13%|█████▎                                   |  ETA: 1:07:35[39m[K

[32mProgress:  13%|█████▎                                   |  ETA: 1:07:27[39m[K

[32mProgress:  13%|█████▍                                   |  ETA: 1:07:15[39m[K

test reward at step 520000: 18099.268780277293


[32mProgress:  13%|█████▍                                   |  ETA: 1:07:43[39m[K

[32mProgress:  13%|█████▍                                   |  ETA: 1:07:28[39m[K

[32mProgress:  13%|█████▍                                   |  ETA: 1:07:19[39m[K

[32mProgress:  13%|█████▍                                   |  ETA: 1:07:08[39m[K

test reward at step 530000: 18089.04885188008


[32mProgress:  13%|█████▍                                   |  ETA: 1:07:35[39m[K

[32mProgress:  13%|█████▌                                   |  ETA: 1:07:27[39m[K

[32mProgress:  13%|█████▌                                   |  ETA: 1:07:16[39m[K

[32mProgress:  13%|█████▌                                   |  ETA: 1:07:07[39m[K

[32mProgress:  13%|█████▌                                   |  ETA: 1:07:00[39m[K

[32mProgress:  13%|█████▌                                   |  ETA: 1:06:52[39m[K

test reward at step 540000: 18712.405187374417


[32mProgress:  14%|█████▌                                   |  ETA: 1:07:23[39m[K

[32mProgress:  14%|█████▋                                   |  ETA: 1:07:12[39m[K

[32mProgress:  14%|█████▋                                   |  ETA: 1:07:04[39m[K

[32mProgress:  14%|█████▋                                   |  ETA: 1:06:57[39m[K

[32mProgress:  14%|█████▋                                   |  ETA: 1:06:47[39m[K

test reward at step 550000: 18684.174019101476


[32mProgress:  14%|█████▋                                   |  ETA: 1:07:14[39m[K

[32mProgress:  14%|█████▋                                   |  ETA: 1:07:01[39m[K

[32mProgress:  14%|█████▊                                   |  ETA: 1:06:51[39m[K

[32mProgress:  14%|█████▊                                   |  ETA: 1:06:41[39m[K

[32mProgress:  14%|█████▊                                   |  ETA: 1:06:31[39m[K

test reward at step 560000: 19182.061492796853


[32mProgress:  14%|█████▊                                   |  ETA: 1:06:56[39m[K

[32mProgress:  14%|█████▊                                   |  ETA: 1:06:53[39m[K

[32mProgress:  14%|█████▊                                   |  ETA: 1:06:43[39m[K

[32mProgress:  14%|█████▊                                   |  ETA: 1:06:35[39m[K

[32mProgress:  14%|█████▉                                   |  ETA: 1:06:25[39m[K

[32mProgress:  14%|█████▉                                   |  ETA: 1:06:50[39m[K

test reward at step 570000: 19221.282474566273


[32mProgress:  14%|█████▉                                   |  ETA: 1:06:47[39m[K

[32mProgress:  14%|█████▉                                   |  ETA: 1:06:38[39m[K

[32mProgress:  14%|█████▉                                   |  ETA: 1:06:31[39m[K

[32mProgress:  14%|█████▉                                   |  ETA: 1:06:23[39m[K

[32mProgress:  14%|█████▉                                   |  ETA: 1:06:16[39m[K

[32mProgress:  15%|██████                                   |  ETA: 1:06:21[39m[K

test reward at step 580000: 19053.709148838425


[32mProgress:  15%|██████                                   |  ETA: 1:06:12[39m[K

[32mProgress:  15%|██████                                   |  ETA: 1:06:04[39m[K

[32mProgress:  15%|██████                                   |  ETA: 1:05:55[39m[K

[32mProgress:  15%|██████                                   |  ETA: 1:05:46[39m[K

[32mProgress:  15%|██████                                   |  ETA: 1:05:37[39m[K

test reward at step 590000: 19136.69861460725


[32mProgress:  15%|██████▏                                  |  ETA: 1:06:00[39m[K

[32mProgress:  15%|██████▏                                  |  ETA: 1:05:53[39m[K

[32mProgress:  15%|██████▏                                  |  ETA: 1:05:43[39m[K

[32mProgress:  15%|██████▏                                  |  ETA: 1:05:35[39m[K

[32mProgress:  15%|██████▏                                  |  ETA: 1:05:26[39m[K

test reward at step 600000: 19519.402008460496


[32mProgress:  15%|██████▏                                  |  ETA: 1:05:51[39m[K

[32mProgress:  15%|██████▏                                  |  ETA: 1:05:48[39m[K

[32mProgress:  15%|██████▎                                  |  ETA: 1:05:40[39m[K

[32mProgress:  15%|██████▎                                  |  ETA: 1:05:34[39m[K

[32mProgress:  15%|██████▎                                  |  ETA: 1:05:26[39m[K

[32mProgress:  15%|██████▎                                  |  ETA: 1:05:20[39m[K

test reward at step 610000: 19552.124439713007


[32mProgress:  15%|██████▎                                  |  ETA: 1:05:45[39m[K

[32mProgress:  15%|██████▎                                  |  ETA: 1:05:35[39m[K

[32mProgress:  15%|██████▎                                  |  ETA: 1:05:27[39m[K

[32mProgress:  15%|██████▍                                  |  ETA: 1:05:18[39m[K

[32mProgress:  15%|██████▍                                  |  ETA: 1:05:09[39m[K

test reward at step 620000: 20542.137776277905


[32mProgress:  16%|██████▍                                  |  ETA: 1:05:31[39m[K

[32mProgress:  16%|██████▍                                  |  ETA: 1:05:24[39m[K

[32mProgress:  16%|██████▍                                  |  ETA: 1:05:17[39m[K

[32mProgress:  16%|██████▍                                  |  ETA: 1:05:08[39m[K

[32mProgress:  16%|██████▌                                  |  ETA: 1:05:00[39m[K

[32mProgress:  16%|██████▌                                  |  ETA: 1:05:20[39m[K

test reward at step 630000: 21522.51679200048


[32mProgress:  16%|██████▌                                  |  ETA: 1:05:07[39m[K

[32mProgress:  16%|██████▌                                  |  ETA: 1:04:59[39m[K

[32mProgress:  16%|██████▌                                  |  ETA: 1:04:50[39m[K

[32mProgress:  16%|██████▌                                  |  ETA: 1:04:41[39m[K

test reward at step 640000: 22957.533914391784


[32mProgress:  16%|██████▋                                  |  ETA: 1:05:03[39m[K

[32mProgress:  16%|██████▋                                  |  ETA: 1:04:54[39m[K

[32mProgress:  16%|██████▋                                  |  ETA: 1:04:46[39m[K

[32mProgress:  16%|██████▋                                  |  ETA: 1:04:39[39m[K

[32mProgress:  16%|██████▋                                  |  ETA: 1:04:33[39m[K

[32mProgress:  16%|██████▋                                  |  ETA: 1:04:56[39m[K

test reward at step 650000: 24268.717375162367


[32mProgress:  16%|██████▋                                  |  ETA: 1:04:51[39m[K

[32mProgress:  16%|██████▊                                  |  ETA: 1:04:45[39m[K

[32mProgress:  16%|██████▊                                  |  ETA: 1:04:36[39m[K

[32mProgress:  16%|██████▊                                  |  ETA: 1:04:28[39m[K

[32mProgress:  16%|██████▊                                  |  ETA: 1:04:23[39m[K

[32mProgress:  17%|██████▊                                  |  ETA: 1:04:46[39m[K

test reward at step 660000: 24009.930202837044


[32mProgress:  17%|██████▊                                  |  ETA: 1:04:38[39m[K

[32mProgress:  17%|██████▊                                  |  ETA: 1:04:33[39m[K

[32mProgress:  17%|██████▉                                  |  ETA: 1:04:26[39m[K

[32mProgress:  17%|██████▉                                  |  ETA: 1:04:19[39m[K

[32mProgress:  17%|██████▉                                  |  ETA: 1:04:11[39m[K

test reward at step 670000: 24872.872115424576


[32mProgress:  17%|██████▉                                  |  ETA: 1:04:31[39m[K

[32mProgress:  17%|██████▉                                  |  ETA: 1:04:21[39m[K

[32mProgress:  17%|██████▉                                  |  ETA: 1:04:12[39m[K

[32mProgress:  17%|███████                                  |  ETA: 1:04:05[39m[K

[32mProgress:  17%|███████                                  |  ETA: 1:03:57[39m[K

test reward at step 680000: 24628.25814170765


[32mProgress:  17%|███████                                  |  ETA: 1:04:17[39m[K

[32mProgress:  17%|███████                                  |  ETA: 1:04:08[39m[K

[32mProgress:  17%|███████                                  |  ETA: 1:04:00[39m[K

[32mProgress:  17%|███████                                  |  ETA: 1:03:53[39m[K

[32mProgress:  17%|███████▏                                 |  ETA: 1:04:11[39m[K

test reward at step 690000: 24995.79598019414


[32mProgress:  17%|███████▏                                 |  ETA: 1:04:06[39m[K

[32mProgress:  17%|███████▏                                 |  ETA: 1:03:57[39m[K

[32mProgress:  17%|███████▏                                 |  ETA: 1:03:50[39m[K

[32mProgress:  17%|███████▏                                 |  ETA: 1:03:41[39m[K

[32mProgress:  17%|███████▏                                 |  ETA: 1:03:34[39m[K

test reward at step 700000: 24856.655998822553


[32mProgress:  18%|███████▎                                 |  ETA: 1:03:53[39m[K

[32mProgress:  18%|███████▎                                 |  ETA: 1:03:46[39m[K

[32mProgress:  18%|███████▎                                 |  ETA: 1:03:41[39m[K

[32mProgress:  18%|███████▎                                 |  ETA: 1:03:34[39m[K

[32mProgress:  18%|███████▎                                 |  ETA: 1:03:25[39m[K

test reward at step 710000: 25738.560265636013


[32mProgress:  18%|███████▎                                 |  ETA: 1:03:44[39m[K

[32mProgress:  18%|███████▎                                 |  ETA: 1:03:38[39m[K

[32mProgress:  18%|███████▍                                 |  ETA: 1:03:31[39m[K

[32mProgress:  18%|███████▍                                 |  ETA: 1:03:23[39m[K

[32mProgress:  18%|███████▍                                 |  ETA: 1:03:16[39m[K

[32mProgress:  18%|███████▍                                 |  ETA: 1:03:08[39m[K

test reward at step 720000: 26250.272882721805


[32mProgress:  18%|███████▍                                 |  ETA: 1:03:26[39m[K

[32mProgress:  18%|███████▍                                 |  ETA: 1:03:20[39m[K

[32mProgress:  18%|███████▍                                 |  ETA: 1:03:13[39m[K

[32mProgress:  18%|███████▌                                 |  ETA: 1:03:05[39m[K

[32mProgress:  18%|███████▌                                 |  ETA: 1:02:58[39m[K

test reward at step 730000: 26077.804216517205


[32mProgress:  18%|███████▌                                 |  ETA: 1:03:17[39m[K

[32mProgress:  18%|███████▌                                 |  ETA: 1:03:09[39m[K

[32mProgress:  18%|███████▌                                 |  ETA: 1:03:03[39m[K

[32mProgress:  18%|███████▌                                 |  ETA: 1:02:59[39m[K

[32mProgress:  18%|███████▋                                 |  ETA: 1:02:52[39m[K

[32mProgress:  19%|███████▋                                 |  ETA: 1:03:11[39m[K

test reward at step 740000: 26980.746181599523


[32mProgress:  19%|███████▋                                 |  ETA: 1:03:02[39m[K

[32mProgress:  19%|███████▋                                 |  ETA: 1:02:54[39m[K

[32mProgress:  19%|███████▋                                 |  ETA: 1:02:47[39m[K

[32mProgress:  19%|███████▋                                 |  ETA: 1:02:39[39m[K

test reward at step 750000: 26814.600865314613


[32mProgress:  19%|███████▊                                 |  ETA: 1:02:55[39m[K

[32mProgress:  19%|███████▊                                 |  ETA: 1:02:51[39m[K

[32mProgress:  19%|███████▊                                 |  ETA: 1:02:43[39m[K

[32mProgress:  19%|███████▊                                 |  ETA: 1:02:37[39m[K

[32mProgress:  19%|███████▊                                 |  ETA: 1:02:30[39m[K

[32mProgress:  19%|███████▊                                 |  ETA: 1:02:23[39m[K

test reward at step 760000: 28325.6089095522


[32mProgress:  19%|███████▊                                 |  ETA: 1:02:42[39m[K

[32mProgress:  19%|███████▉                                 |  ETA: 1:02:34[39m[K

[32mProgress:  19%|███████▉                                 |  ETA: 1:02:29[39m[K

[32mProgress:  19%|███████▉                                 |  ETA: 1:02:22[39m[K

[32mProgress:  19%|███████▉                                 |  ETA: 1:02:17[39m[K

test reward at step 770000: 28971.149238368944


[32mProgress:  19%|███████▉                                 |  ETA: 1:02:34[39m[K

[32mProgress:  19%|███████▉                                 |  ETA: 1:02:25[39m[K

[32mProgress:  19%|████████                                 |  ETA: 1:02:19[39m[K

[32mProgress:  19%|████████                                 |  ETA: 1:02:14[39m[K

[32mProgress:  19%|████████                                 |  ETA: 1:02:08[39m[K

[32mProgress:  20%|████████                                 |  ETA: 1:02:25[39m[K

test reward at step 780000: 30348.717770676485


[32mProgress:  20%|████████                                 |  ETA: 1:02:20[39m[K

[32mProgress:  20%|████████                                 |  ETA: 1:02:15[39m[K

[32mProgress:  20%|████████                                 |  ETA: 1:02:08[39m[K

[32mProgress:  20%|████████▏                                |  ETA: 1:02:01[39m[K

[32mProgress:  20%|████████▏                                |  ETA: 1:01:54[39m[K

test reward at step 790000: 31584.58882079313


[32mProgress:  20%|████████▏                                |  ETA: 1:02:10[39m[K

[32mProgress:  20%|████████▏                                |  ETA: 1:02:03[39m[K

[32mProgress:  20%|████████▏                                |  ETA: 1:01:57[39m[K

[32mProgress:  20%|████████▏                                |  ETA: 1:01:53[39m[K

[32mProgress:  20%|████████▎                                |  ETA: 1:01:47[39m[K

test reward at step 800000: 31256.320477065397


[32mProgress:  20%|████████▎                                |  ETA: 1:02:05[39m[K

[32mProgress:  20%|████████▎                                |  ETA: 1:01:57[39m[K

[32mProgress:  20%|████████▎                                |  ETA: 1:01:53[39m[K

[32mProgress:  20%|████████▎                                |  ETA: 1:01:47[39m[K

[32mProgress:  20%|████████▎                                |  ETA: 1:01:43[39m[K

[32mProgress:  20%|████████▎                                |  ETA: 1:01:37[39m[K

test reward at step 810000: 32802.799481965056


[32mProgress:  20%|████████▎                                |  ETA: 1:01:53[39m[K

[32mProgress:  20%|████████▍                                |  ETA: 1:01:49[39m[K

[32mProgress:  20%|████████▍                                |  ETA: 1:01:42[39m[K

[32mProgress:  20%|████████▍                                |  ETA: 1:01:35[39m[K

[32mProgress:  20%|████████▍                                |  ETA: 1:01:28[39m[K

[32mProgress:  21%|████████▍                                |  ETA: 1:01:42[39m[K

test reward at step 820000: 33499.40776516114


[32mProgress:  21%|████████▍                                |  ETA: 1:01:36[39m[K

[32mProgress:  21%|████████▌                                |  ETA: 1:01:32[39m[K

[32mProgress:  21%|████████▌                                |  ETA: 1:01:26[39m[K

[32mProgress:  21%|████████▌                                |  ETA: 1:01:19[39m[K

[32mProgress:  21%|████████▌                                |  ETA: 1:01:12[39m[K

test reward at step 830000: 34649.27880244291


[32mProgress:  21%|████████▌                                |  ETA: 1:01:27[39m[K

[32mProgress:  21%|████████▌                                |  ETA: 1:01:24[39m[K

[32mProgress:  21%|████████▌                                |  ETA: 1:01:17[39m[K

[32mProgress:  21%|████████▋                                |  ETA: 1:01:11[39m[K

[32mProgress:  21%|████████▋                                |  ETA: 1:01:04[39m[K

test reward at step 840000: 35731.44728143884


[32mProgress:  21%|████████▋                                |  ETA: 1:01:18[39m[K

[32mProgress:  21%|████████▋                                |  ETA: 1:01:14[39m[K

[32mProgress:  21%|████████▋                                |  ETA: 1:01:06[39m[K

[32mProgress:  21%|████████▋                                |  ETA: 1:01:00[39m[K

[32mProgress:  21%|████████▊                                |  ETA: 1:00:54[39m[K

test reward at step 850000: 36788.10921195774


[32mProgress:  21%|████████▊                                |  ETA: 1:01:07[39m[K

[32mProgress:  21%|████████▊                                |  ETA: 1:00:56[39m[K

[32mProgress:  21%|████████▊                                |  ETA: 1:00:50[39m[K

[32mProgress:  21%|████████▊                                |  ETA: 1:00:44[39m[K

In [None]:
plot(episode_test_reward_hook.rewards)

In [None]:
close_visualization(); # closes the MeshCat visualization