# Init Bionic VTOL

In [1]:
include("../Flyonic.jl");
using .Flyonic;

using Rotations; # used for initial position

using ReinforcementLearning;
using StableRNGs;
using Flux;
using Flux.Losses;
using Random;
using IntervalSets;
using LinearAlgebra;
using Distributions;

using Plots;
using Statistics;

using BSON: @save, @load # save mode

In [2]:
create_visualization();

┌ Info: MeshCat server started. You can open the visualizer by visiting the following URL in your browser:
│ http://127.0.0.1:8700
└ @ MeshCat /Users/leonardoigler/.julia/packages/MeshCat/Ax8pH/src/visualizer.jl:73


In [3]:
# indicates how many threads Julia was started with. This is important for the multi-threaded environment
Threads.nthreads()

1

In [4]:
eth_vtol_param["gravity"] = 9.81;

In [5]:
# TODO: All this stuff must be replaced later by your guiding paths.

DESIRED_x = [-4.0, 0.0, 4.0] # desired distance    
angle = calculateAngle([0.0 ,0.0, 1.0], DESIRED_x) # 
DESIRED_R = Matrix(UnitQuaternion(RotY(angle)*RotX(pi/2.0)*RotZ(pi/2.0)))

create_VTOL("fixgoal", actuators = false, color_vec=[0.0; 1.0; 0.0; 1.0]);
set_transform("fixgoal", DESIRED_x ,QuatRotation(DESIRED_R)); 

# Create Reinforcement Learning Environment

In [6]:
mutable struct VtolEnv{A,T,ACT,R<:AbstractRNG} <: AbstractEnv # Parametric Constructor for a subtype of AbstractEnv
    action_space::A
    observation_space::Space{Vector{ClosedInterval{T}}}
    state::Vector{T}
    action::ACT
    done::Bool
    t::T
    rng::R

    name::String #for multible environoments
    visualization::Bool
    realtime::Bool
    
    # Everything you need aditionaly can also go in here.
    x_W::Vector{T}
    v_B::Vector{T}
    R_W::Matrix{T}
    ω_B::Vector{T}
    wind_W::Vector{T}
    Δt::T
    
    # Bonus / Target
    x_d_W::Vector{T}
    R_d_W::Matrix{T}

    # NEW
    covered_line::T
    previously_covered_line::T
end

In [7]:
# define a keyword-based constructor for the type declared in the mutable struct typedef. 
# It could also be done with the macro Base.@kwdef.
function VtolEnv(;
     
    #continuous = true,
    rng = Random.GLOBAL_RNG, # Random number generation
    name = "vtol",
    visualization = false,
    realtime = false,
    kwargs... # let the function take an arbitrary number of keyword arguments 
)
    
    T = Float64; # explicit type which is used e.g. in state. Cannot be altered due to the poor matrix defininon.

    #action_space = Base.OneTo(21) # 21 discrete positions for the flaps
    
    action_space = Space(
        ClosedInterval{T}[
            0.0..2.0, # propeller 1
            0.0..2.0, # propeller 2
            ], 
    )

    
    state_space = Space( # Three continuous values in state space.
        ClosedInterval{T}[
            
            # If you are not flying horizontally, you can later switch gravitation 
            # back on and counteract it with the rotors as well.
            # In addition, once the drone has flown over its target, 
            # it can "fall down" and does not have to turn around.
            
            # orientate yourself on the state space from the paper
            typemin(T)..typemax(T), # position along x
            typemin(T)..typemax(T), # position along z
            typemin(T)..typemax(T), # orientation along x
            typemin(T)..typemax(T), # orientation along z
            typemin(T)..typemax(T), # velocity along x BODY coordinates
            typemin(T)..typemax(T), # velocity along y BODY coordinates
            typemin(T)..typemax(T), # rotational velocity along z BODY coordinates
            
            typemin(T)..typemax(T), # position error along x
            typemin(T)..typemax(T), # position error along z
            # Not used in Paper!!!
            typemin(T)..typemax(T), # target rotation along x (better than angle for neural networks)
            typemin(T)..typemax(T), # target rotation along z (better than angle for neural networks)
            
            # NEW 
            typemin(T)..typemax(T), # The distance along the connecting line which has been passed
            typemin(T)..typemax(T), # The distance along the connecting line which has been previously passed         
            ], 
    )
    
    if visualization
        create_VTOL(name, actuators = true, color_vec=[1.0; 1.0; 0.6; 1.0]);
    end

    environment = VtolEnv(
        action_space,
        state_space,
        zeros(T, 11), # current state, needs to be extended. 
        rand(action_space),
        false, # episode done ?
        0.0, # time
        rng, # random number generator  
        name,
        visualization,
        realtime,
        zeros(T, 3), # x_W
        zeros(T, 3), # v_B
        #Matrix(UnitQuaternion((RotX(pi)))),
        [1.0 0.0 0.0; 0.0 1.0 0.0; 0.0 0.0 1.0], # Float64... so T needs to be Float64
        zeros(T, 3), # ω_B
        zeros(T, 3), # wind_W
        T(0.025), # Δt 
        # TODO Random
        DESIRED_x, # desired distance 
        [1.0 0.0 0.0; 0.0 1.0 0.0; 0.0 0.0 1.0], # desired orientation
    
        0.0, # Covered line
        0.0,    # Previously covered line
    )
    
    
    reset!(environment)
    
    return environment
    
end;

In [8]:
# TODO Don't get that part. Ask next meeting
print(typeof(RotY(-pi/2.0)*RotX(pi)))
print(typeof(UnitQuaternion(RotY(-pi/2.0)*RotX(pi))))
print(typeof(QuatRotation(UnitQuaternion(RotY(-pi/2.0)*RotX(pi)))))

RotYX{Float64}QuatRotation{Float64}QuatRotation{Float64}

Just for explanation:

1. A mutable Struct is created. A struct is a constructor and a constructor is a function that creates new objects.
2. A outer keyword-based constructor method is added for the type declared in the mutable struct typedef before.

So now we have a function with two methods. Julia will decide which method to call by multiple dispatch.

In [9]:
methods(VtolEnv)

# Define the RL interface

In [10]:
Random.seed!(env::VtolEnv, seed) = Random.seed!(env.rng, seed)
RLBase.action_space(env::VtolEnv) = env.action_space
RLBase.state_space(env::VtolEnv) = env.observation_space
RLBase.is_terminated(env::VtolEnv) = env.done
RLBase.state(env::VtolEnv) = env.state

In [11]:
function computeReward(env::VtolEnv{A,T}) where {A,T}
    
    # TODO: Add tolerance for VTOL-Drone
    if norm(env.x_W - env.x_d_W)<2
        near_goal = exp(-norm(env.x_W - env.x_d_W))*10
    else
        near_goal = 0
    end

    distance_goal = norm(env.x_W - env.x_d_W) * 20
    
    limit_rotation = 0.1 * env.ω_B[3]^2 #* 10.0

    if env.covered_line > 1
        new_progress = -(env.covered_line-env.previously_covered_line)*100
        progress = - env.covered_line * 100#sign(env.covered_line)*abs(env.covered_line-norm(env.x_d_W))*10
    else
        new_progress = (env.covered_line-env.previously_covered_line)*100
        progress = env.covered_line * 100
    end
    # TODO: Make yourself comfortable with what this is
    #difference_angle = sum((env.R_W[:,1] - env.R_d_W[:,1]).^2)*10.0
    
    
    #difference_angle = abs(env.state[3])*50.0

    #distance_goal = norm(env.x_d_W-[env.state[1], env.state[2], 0])*100.0

    #difference_angle = abs(env.state[3]-env.angle_d_W)*50.0
    

    # TODO Save last position or last projection somewhere (env.last) --> Compare 
    # to current project along line
    #print(difference_angle)
    #not_upright_orientation = abs(env.state[1]-pi*0.5)*10.0
    #not_centered_position = abs(env.state[2])*10.0
    #hight = env.state[4]*100.0

    env.previously_covered_line = env.covered_line
    
    return near_goal - distance_goal + progress + new_progress - limit_rotation #- difference_angle
end


RLBase.reward(env::VtolEnv{A,T}) where {A,T} = computeReward(env)

In [12]:
x = [-4.0, 0.0, 4.0]
pos = [1.0, 0.0, 0.0]
dot(pos, x)/(norm(x)^2)

-0.12499999999999997

In [13]:
function RLBase.reset!(env::VtolEnv{A,T}) where {A,T}
    
    # Visualize initial state
    if env.visualization
        set_transform(env.name, env.x_W, QuatRotation(env.R_W));
        set_actuators(env.name, [0.0; 0.0; 0.0; 0.0])
    end
    
    env.x_W = [0.0; 0.0; 0.0];
    env.v_B = [0.0; 0.0; 0.0];
    env.R_W = Matrix(UnitQuaternion(RotZ(-pi/2.0)*RotY(-pi/2.0)*RotX(pi)));
    #env.R_W = Matrix(UnitQuaternion(RotX(pi)));
    #DESIRED_R = Matrix(UnitQuaternion(env.R_W))

    env.ω_B = [0.0; 0.0; 0.0];
    env.wind_W = [0.0; 0.0; 0.0];


    #env.x_d_W = DESIRED_x
    env.x_d_W = [rand(Uniform(-5,5)), 0.0, rand(Uniform(0,5))]

    #env.R_d_W = DESIRED_R
    env.R_d_W = UnitQuaternion(RotY(calculateAngle([0.0 ,0.0, 1.0], env.x_d_W))*env.R_W)#Matrix(UnitQuaternion(RotZ(calculateAngle([0.0 ,0.0, 1.0], DESIRED_x))*RotZ(-pi/2.0)*RotY(-pi/2.0)*RotX(pi)))

    if env.visualization
        create_VTOL("fixgoal", actuators = false, color_vec=[0.0; 1.0; 0.0; 1.0]);
        set_transform("fixgoal", env.x_d_W ,QuatRotation(env.R_d_W)); 
    end

    env.covered_line = 0.0
    env.previously_covered_line = 0.0
    
    env.state = [env.x_W[1];
                 env.x_W[3];
                 env.R_W[1,1];
                 env.R_W[3,1];
                 env.v_B[1];
                 env.v_B[2];
                 env.ω_B[3];
                 env.x_W[1] - env.x_d_W[1];
                 env.x_W[3] - env.x_d_W[3]; 
                 env.R_d_W[1,1]; 
                 env.R_d_W[3,1];
                 env.covered_line;
                 env.previously_covered_line]
    
    env.t = 0.0
    env.action = [0.0, 0.0]
    env.done = false
    nothing

end;

In [14]:
# defines a methods for a callable object.
# So when a VtolEnv object is created, it has this method that can be called
function (env::VtolEnv)(a)

    # set the propeller trust and the two flaps 2D case
    next_action = [a[1], a[2], 0.0, 0.0]
   
    _step!(env, next_action)
end

In [15]:
env = VtolEnv();

In [16]:
methods(env) # Just to explain which methods the object has

In [17]:
function _step!(env::VtolEnv, next_action)
        
    # caluclate wind impact
    v_in_wind_B = vtol_add_wind(env.v_B, env.R_W, env.wind_W)
    # caluclate aerodynamic forces
    torque_B, force_B = vtol_model(v_in_wind_B, next_action, eth_vtol_param);
    # Limit to 2D
    force_B[3] = 0.0; # Body Z
    env.v_B[3] = 0.0;
    torque_B[1] = 0.0; torque_B[2] = 0.0;  # Body X and Y
    env.ω_B[1] = 0.0; env.ω_B[2] = 0.0;
    # integrate rigid body dynamics for Δt
    env.x_W, env.v_B, env.R_W, env.ω_B, time = rigid_body_simple(torque_B, force_B, env.x_W, env.v_B, env.R_W, env.ω_B, env.t, env.Δt, eth_vtol_param)
    
    # NEW    
    env.covered_line = dot(env.x_W, env.x_d_W)/(norm(env.x_d_W)^2)

    if env.realtime
        sleep(env.Δt) # TODO: just a dirty hack. this is of course slower than real time.
    end

    # Visualize the new state 
    if env.visualization
        set_transform(env.name, env.x_W, QuatRotation(env.R_W));
        set_actuators(env.name, next_action)
    end
 
    env.t += env.Δt
    
    # State space
    #rot = Rotations.params(RotYXZ(env.R_W))[3]
    #env.state[1] = env.x_W[1] # world position in x
    #env.state[2] = env.ω_B[2] # world position in y
    #env.state[3] = rot # rotation around z
    #rot = Rotations.params(RotYXZ(env.R_W))[1]
    
    
    env.state[1] = env.x_W[1];
    env.state[2] = env.x_W[3];
    env.state[3] = env.R_W[1,1];
    env.state[4] = env.R_W[3,1];
    env.state[5] = env.v_B[1];
    env.state[6] = env.v_B[2];
    env.state[7] = env.ω_B[3];
    env.state[8] = env.x_W[1] - env.x_d_W[1];
    env.state[9] = env.x_W[3] - env.x_d_W[3]; 
    env.state[10] = env.R_d_W[1,1]; 
    env.state[11] = env.R_d_W[3,1];
    env.state[12] = env.covered_line;    # Covered distance along line after step
    env.state[13] = env.previously_covered_line; # Covered distance along line before step
    
    
    # Termination criteria
    # TODO: Use many termination criteria so that you do not train unnecessarily in wrong areas
    env.done = #true

        # After time... How fast is drone+Range of desired point
        # After reaching position (circle of r_tol)
        norm(env.ω_B) > 100.0 || 
        norm(env.v_B) > 100.0 || # stop if body is too fast
        env.x_W[3] < -10.0 || # stop if body is below -10m
        #0.0 > rot || # Stop if the drone is pitched 90°.
        #rot > pi || # Stop if the drone is pitched 90°.
        sum((env.x_W - env.x_d_W).^2) < 1.0 ||
        env.t > 10.0 # stop after 10s
    nothing
end;

In [18]:
RLBase.test_runnable!(env)

[0m[1mTest Summary:              | [22m[32m[1mPass  [22m[39m[36m[1mTotal  [22m[39m[0m[1mTime[22m
random policy with VtolEnv | [32m2000  [39m[36m 2000  [39m[0m0.8s


Test.DefaultTestSet("random policy with VtolEnv", Any[], 2000, false, false, true, 1.669073622562759e9, 1.669073623396665e9)

Show an overview of the environment.

# Setup of a reinforcement learning experiment.

In [19]:
seed = 123    
rng = StableRNG(seed)
    N_ENV = 8
    UPDATE_FREQ = 1024
    
    
    # define multiple environments for parallel training
    env = MultiThreadEnv([
        # use different names for the visualization
        VtolEnv(; rng = StableRNG(hash(seed+i)), name = "vtol$i") for i in 1:N_ENV
    ])

MultiThreadEnv(8 x VtolEnv)

In [20]:
# Define the function approximator
    ns, na = length(state(env[1])), length(action_space(env[1]))
    approximator = ActorCritic(
                actor = GaussianNetwork(
                    pre = Chain(
                    Dense(ns, 32, relu; initW = glorot_uniform(rng)),#
                    Dense(32, 32, relu; initW = glorot_uniform(rng)),
                    ),
                    μ = Chain(Dense(32, na; initW = glorot_uniform(rng))),
                    logσ = Chain(Dense(32, na; initW = glorot_uniform(rng))),
                ),
                critic = Chain(
                    Dense(ns, 32, relu; initW = glorot_uniform(rng)),
                    Dense(32, 32, relu; initW = glorot_uniform(rng)),
                    Dense(32, 1; initW = glorot_uniform(rng)),
                ),
                optimizer = ADAM(1e-3),
            );

In [21]:
    agent = Agent( # A wrapper of an AbstractPolicy
        # AbstractPolicy: the policy to use
        policy = PPOPolicy(;
                    approximator = approximator |> gpu,
                    update_freq=UPDATE_FREQ,
                    dist = Normal,
                    # For parameters visit the docu: https://juliareinforcementlearning.org/docs/rlzoo/#ReinforcementLearningZoo.PPOPolicy
                    ),
        
        # AbstractTrajectory: used to store transitions between an agent and an environment source
        trajectory = PPOTrajectory(;
            capacity = UPDATE_FREQ,
            state = Matrix{Float64} => (ns, N_ENV),
            action = Matrix{Float64} => (na, N_ENV),
            action_log_prob = Vector{Float64} => (N_ENV,),
            reward = Vector{Float64} => (N_ENV,),
            terminal = Vector{Bool} => (N_ENV,),
        ),
    );




┌ Info: The GPU function is being called but the GPU is not accessible. 
│ Defaulting back to the CPU. (No action is required if you want to run on the CPU).
└ @ Flux /Users/leonardoigler/.julia/packages/Flux/7nTyc/src/functor.jl:187


In [22]:
function saveModel(t, agent, env)
    model = cpu(agent.policy.approximator)   
    f = joinpath("./RL_models/", "vtol_2D_ppo_$t.bson")
    @save f model
    println("parameters at step $t saved to $f")
end;

In [23]:
function loadModel()
    f = joinpath("./RL_models/", "vtol_2D_ppo_2000000.bson") # TODO: evtl anpassen
    @load f model
    return model
end;

In [24]:
function validate_policy(t, agent, env)
    run(agent.policy, test_env, StopAfterEpisode(1), episode_test_reward_hook)
    # the result of the hook
    println("test reward at step $t: $(mean(episode_test_reward_hook.rewards))")
    
end;

episode_test_reward_hook = TotalRewardPerEpisode(;is_display_on_exit=false)
# create a env only for reward test
test_env = VtolEnv(;name = "testVTOL", visualization = true, realtime = true);

In [25]:
#agent.policy.approximator = loadModel();

In [26]:
ReinforcementLearning.run(
    agent,
    env,
    StopAfterStep(4_000_000),
    ComposedHook(
        DoEveryNStep(saveModel, n=100_000), 
        DoEveryNStep(validate_policy, n=10_000)),
)

[32mProgress:   0%|                                         |  ETA: 36.22 days[39m[K

[32mProgress:   0%|                                         |  ETA: 11:27:46[39m[K

[32mProgress:   0%|▏                                        |  ETA: 3:05:54[39m[K

[32mProgress:   0%|▏                                        |  ETA: 1:58:40[39m[K

[32mProgress:   0%|▎                                        |  ETA: 1:27:57[39m[K

test reward at step 10000: -29971.209736784487


[32mProgress:   1%|▎                                        |  ETA: 1:17:35[39m[K

[32mProgress:   1%|▎                                        |  ETA: 1:05:01[39m[K

[32mProgress:   1%|▍                                        |  ETA: 0:56:57[39m[K

[32mProgress:   1%|▍                                        |  ETA: 0:51:14[39m[K

test reward at step 20000: -29609.67629526103


[32mProgress:   1%|▌                                        |  ETA: 0:49:22[39m[K

[32mProgress:   1%|▌                                        |  ETA: 0:46:00[39m[K

[32mProgress:   1%|▌                                        |  ETA: 0:43:02[39m[K

[32mProgress:   1%|▋                                        |  ETA: 0:40:14[39m[K

test reward at step 30000: -30717.378542431747


[32mProgress:   2%|▋                                        |  ETA: 0:39:44[39m[K

[32mProgress:   2%|▊                                        |  ETA: 0:37:38[39m[K

[32mProgress:   2%|▊                                        |  ETA: 0:35:58[39m[K

[32mProgress:   2%|▉                                        |  ETA: 0:36:07[39m[K

test reward at step 40000: -28373.429341024483


[32mProgress:   2%|▉                                        |  ETA: 0:33:14[39m[K

[32mProgress:   2%|█                                        |  ETA: 0:32:02[39m[K

test reward at step 50000: -25155.005753579142


[32mProgress:   3%|█                                        |  ETA: 0:32:12[39m[K

[32mProgress:   3%|█▏                                       |  ETA: 0:31:10[39m[K

[32mProgress:   3%|█▏                                       |  ETA: 0:30:17[39m[K

[32mProgress:   3%|█▎                                       |  ETA: 0:29:27[39m[K

test reward at step 60000: -22773.648357775994


[32mProgress:   3%|█▎                                       |  ETA: 0:29:36[39m[K

[32mProgress:   3%|█▎                                       |  ETA: 0:28:53[39m[K

[32mProgress:   3%|█▍                                       |  ETA: 0:28:11[39m[K

[32mProgress:   3%|█▍                                       |  ETA: 0:27:33[39m[K

test reward at step 70000: -21066.05328216015


[32mProgress:   4%|█▌                                       |  ETA: 0:27:44[39m[K

[32mProgress:   4%|█▌                                       |  ETA: 0:27:13[39m[K

[32mProgress:   4%|█▋                                       |  ETA: 0:26:38[39m[K

[32mProgress:   4%|█▋                                       |  ETA: 0:26:09[39m[K

test reward at step 80000: -20119.31873006939


[32mProgress:   4%|█▋                                       |  ETA: 0:26:23[39m[K

[32mProgress:   4%|█▊                                       |  ETA: 0:25:58[39m[K

[32mProgress:   4%|█▊                                       |  ETA: 0:25:30[39m[K

[32mProgress:   4%|█▉                                       |  ETA: 0:25:05[39m[K

test reward at step 90000: -19139.25967335423


[32mProgress:   5%|█▉                                       |  ETA: 0:25:17[39m[K

[32mProgress:   5%|██                                       |  ETA: 0:24:56[39m[K

[32mProgress:   5%|██                                       |  ETA: 0:24:32[39m[K

[32mProgress:   5%|██                                       |  ETA: 0:24:11[39m[K

parameters at step 100000 saved to ./RL_models/vtol_2D_ppo_100000.bson


test reward at step 100000: -18546.945317475045


[32mProgress:   5%|██▏                                      |  ETA: 0:24:49[39m[K

[32mProgress:   5%|██▏                                      |  ETA: 0:24:30[39m[K

[32mProgress:   5%|██▎                                      |  ETA: 0:24:08[39m[K

test reward at step 110000: -18005.893726476133


[32mProgress:   6%|██▎                                      |  ETA: 0:24:17[39m[K

[32mProgress:   6%|██▍                                      |  ETA: 0:23:59[39m[K

[32mProgress:   6%|██▍                                      |  ETA: 0:23:40[39m[K

[32mProgress:   6%|██▍                                      |  ETA: 0:23:24[39m[K

test reward at step 120000: -17598.484743765246


[32mProgress:   6%|██▌                                      |  ETA: 0:23:36[39m[K

[32mProgress:   6%|██▌                                      |  ETA: 0:23:17[39m[K

[32mProgress:   6%|██▋                                      |  ETA: 0:23:03[39m[K

[32mProgress:   6%|██▋                                      |  ETA: 0:22:46[39m[K

test reward at step 130000: -17234.090906783775


[32mProgress:   7%|██▊                                      |  ETA: 0:22:56[39m[K

[32mProgress:   7%|██▊                                      |  ETA: 0:22:41[39m[K

[32mProgress:   7%|██▊                                      |  ETA: 0:22:26[39m[K

[32mProgress:   7%|██▉                                      |  ETA: 0:22:13[39m[K

test reward at step 140000: -16832.85915305336


[32mProgress:   7%|██▉                                      |  ETA: 0:22:22[39m[K

[32mProgress:   7%|███                                      |  ETA: 0:22:11[39m[K

[32mProgress:   7%|███                                      |  ETA: 0:22:03[39m[K

[32mProgress:   7%|███▏                                     |  ETA: 0:21:53[39m[K

test reward at step 150000: -16426.890808344142


[32mProgress:   8%|███▏                                     |  ETA: 0:22:02[39m[K

[32mProgress:   8%|███▏                                     |  ETA: 0:21:51[39m[K

[32mProgress:   8%|███▎                                     |  ETA: 0:21:40[39m[K

[32mProgress:   8%|███▎                                     |  ETA: 0:21:29[39m[K

test reward at step 160000: -15991.31658399578


[32mProgress:   8%|███▍                                     |  ETA: 0:21:36[39m[K

[32mProgress:   8%|███▍                                     |  ETA: 0:21:26[39m[K

[32mProgress:   8%|███▍                                     |  ETA: 0:21:17[39m[K

[32mProgress:   8%|███▌                                     |  ETA: 0:21:07[39m[K

test reward at step 170000: -15645.372353112407


[32mProgress:   9%|███▌                                     |  ETA: 0:21:15[39m[K

[32mProgress:   9%|███▋                                     |  ETA: 0:21:06[39m[K

[32mProgress:   9%|███▋                                     |  ETA: 0:20:57[39m[K

[32mProgress:   9%|███▊                                     |  ETA: 0:20:48[39m[K

test reward at step 180000: -15454.70719804959


[32mProgress:   9%|███▊                                     |  ETA: 0:21:16[39m[K

[32mProgress:   9%|███▊                                     |  ETA: 0:21:07[39m[K

[32mProgress:   9%|███▉                                     |  ETA: 0:20:59[39m[K

test reward at step 190000: -15239.876046758038


[32mProgress:  10%|███▉                                     |  ETA: 0:21:11[39m[K

[32mProgress:  10%|████                                     |  ETA: 0:21:01[39m[K

[32mProgress:  10%|████                                     |  ETA: 0:20:51[39m[K

[32mProgress:  10%|████                                     |  ETA: 0:20:43[39m[K

parameters at step 200000 saved to ./RL_models/vtol_2D_ppo_200000.bson


test reward at step 200000: -15027.929703748141


[32mProgress:  10%|████▏                                    |  ETA: 0:20:48[39m[K

[32mProgress:  10%|████▏                                    |  ETA: 0:20:39[39m[K

[32mProgress:  10%|████▎                                    |  ETA: 0:20:29[39m[K

[32mProgress:  10%|████▎                                    |  ETA: 0:20:21[39m[K

test reward at step 210000: -14841.141693994161


[32mProgress:  11%|████▍                                    |  ETA: 0:20:30[39m[K

[32mProgress:  11%|████▍                                    |  ETA: 0:20:20[39m[K

[32mProgress:  11%|████▌                                    |  ETA: 0:20:12[39m[K

[32mProgress:  11%|████▌                                    |  ETA: 0:20:04[39m[K

test reward at step 220000: -14606.161037919255


[32mProgress:  11%|████▌                                    |  ETA: 0:20:11[39m[K

[32mProgress:  11%|████▋                                    |  ETA: 0:20:04[39m[K

[32mProgress:  11%|████▋                                    |  ETA: 0:19:56[39m[K

[32mProgress:  11%|████▊                                    |  ETA: 0:19:48[39m[K

test reward at step 230000: -14504.573549596413


[32mProgress:  12%|████▊                                    |  ETA: 0:19:54[39m[K

[32mProgress:  12%|████▉                                    |  ETA: 0:19:47[39m[K

[32mProgress:  12%|████▉                                    |  ETA: 0:19:39[39m[K

test reward at step 240000: -14308.252618100385


[32mProgress:  12%|████▉                                    |  ETA: 0:19:45[39m[K

[32mProgress:  12%|█████                                    |  ETA: 0:19:39[39m[K

[32mProgress:  12%|█████                                    |  ETA: 0:19:32[39m[K

[32mProgress:  12%|█████▏                                   |  ETA: 0:19:25[39m[K

test reward at step 250000: -14176.000375051384


[32mProgress:  13%|█████▏                                   |  ETA: 0:19:34[39m[K

[32mProgress:  13%|█████▎                                   |  ETA: 0:19:28[39m[K

[32mProgress:  13%|█████▎                                   |  ETA: 0:19:20[39m[K

[32mProgress:  13%|█████▎                                   |  ETA: 0:19:14[39m[K

test reward at step 260000: -14090.216645704639


[32mProgress:  13%|█████▍                                   |  ETA: 0:19:24[39m[K

[32mProgress:  13%|█████▍                                   |  ETA: 0:19:17[39m[K

[32mProgress:  13%|█████▌                                   |  ETA: 0:19:11[39m[K

[32mProgress:  13%|█████▌                                   |  ETA: 0:19:06[39m[K

test reward at step 270000: -13939.051215539088


[32mProgress:  14%|█████▌                                   |  ETA: 0:19:14[39m[K

[32mProgress:  14%|█████▋                                   |  ETA: 0:19:08[39m[K

[32mProgress:  14%|█████▋                                   |  ETA: 0:19:02[39m[K

[32mProgress:  14%|█████▊                                   |  ETA: 0:18:56[39m[K

test reward at step 280000: -13753.639827093537


[32mProgress:  14%|█████▊                                   |  ETA: 0:19:01[39m[K

[32mProgress:  14%|█████▉                                   |  ETA: 0:18:54[39m[K

[32mProgress:  14%|█████▉                                   |  ETA: 0:18:48[39m[K

[32mProgress:  14%|██████                                   |  ETA: 0:18:42[39m[K

test reward at step 290000: -13565.562086198592


[32mProgress:  15%|██████                                   |  ETA: 0:18:46[39m[K

[32mProgress:  15%|██████                                   |  ETA: 0:18:40[39m[K

[32mProgress:  15%|██████▏                                  |  ETA: 0:18:35[39m[K

parameters at step 300000 saved to ./RL_models/vtol_2D_ppo_300000.bson


test reward at step 300000: -13385.639845123967


[32mProgress:  15%|██████▏                                  |  ETA: 0:18:39[39m[K

[32mProgress:  15%|██████▎                                  |  ETA: 0:18:34[39m[K

[32mProgress:  15%|██████▎                                  |  ETA: 0:18:29[39m[K

[32mProgress:  15%|██████▎                                  |  ETA: 0:18:24[39m[K

[32mProgress:  16%|██████▍                                  |  ETA: 0:18:28[39m[K

test reward at step 310000: -13216.436893918773


[32mProgress:  16%|██████▍                                  |  ETA: 0:18:24[39m[K

[32mProgress:  16%|██████▌                                  |  ETA: 0:18:19[39m[K

[32mProgress:  16%|██████▌                                  |  ETA: 0:18:14[39m[K

test reward at step 320000: -13051.974885201236


[32mProgress:  16%|██████▌                                  |  ETA: 0:18:18[39m[K

[32mProgress:  16%|██████▋                                  |  ETA: 0:18:13[39m[K

[32mProgress:  16%|██████▋                                  |  ETA: 0:18:08[39m[K

[32mProgress:  16%|██████▊                                  |  ETA: 0:18:03[39m[K

test reward at step 330000: -12889.497866847249


[32mProgress:  17%|██████▊                                  |  ETA: 0:18:06[39m[K

[32mProgress:  17%|██████▉                                  |  ETA: 0:18:01[39m[K

[32mProgress:  17%|██████▉                                  |  ETA: 0:17:57[39m[K

[32mProgress:  17%|██████▉                                  |  ETA: 0:17:52[39m[K

test reward at step 340000: -12740.82631908961


[32mProgress:  17%|███████                                  |  ETA: 0:17:55[39m[K

[32mProgress:  17%|███████                                  |  ETA: 0:17:50[39m[K

[32mProgress:  17%|███████▏                                 |  ETA: 0:17:46[39m[K

[32mProgress:  17%|███████▏                                 |  ETA: 0:17:41[39m[K

test reward at step 350000: -12592.562383256925


[32mProgress:  18%|███████▎                                 |  ETA: 0:17:44[39m[K

[32mProgress:  18%|███████▎                                 |  ETA: 0:17:39[39m[K

[32mProgress:  18%|███████▎                                 |  ETA: 0:17:35[39m[K

[32mProgress:  18%|███████▍                                 |  ETA: 0:17:30[39m[K

test reward at step 360000: -12454.976470913662


[32mProgress:  18%|███████▍                                 |  ETA: 0:17:34[39m[K

[32mProgress:  18%|███████▌                                 |  ETA: 0:17:30[39m[K

[32mProgress:  18%|███████▌                                 |  ETA: 0:17:26[39m[K

[32mProgress:  18%|███████▌                                 |  ETA: 0:17:21[39m[K

test reward at step 370000: -12317.49420720302


[32mProgress:  19%|███████▋                                 |  ETA: 0:17:24[39m[K

[32mProgress:  19%|███████▋                                 |  ETA: 0:17:19[39m[K

[32mProgress:  19%|███████▊                                 |  ETA: 0:17:16[39m[K

[32mProgress:  19%|███████▊                                 |  ETA: 0:17:13[39m[K

test reward at step 380000: -12191.987092096862


[32mProgress:  19%|███████▊                                 |  ETA: 0:17:18[39m[K

[32mProgress:  19%|███████▉                                 |  ETA: 0:17:12[39m[K

[32mProgress:  19%|███████▉                                 |  ETA: 0:17:08[39m[K

[32mProgress:  19%|████████                                 |  ETA: 0:17:04[39m[K

test reward at step 390000: -12073.119967871964


[32mProgress:  20%|████████                                 |  ETA: 0:17:08[39m[K

[32mProgress:  20%|████████▏                                |  ETA: 0:17:05[39m[K

[32mProgress:  20%|████████▏                                |  ETA: 0:17:01[39m[K

[32mProgress:  20%|████████▏                                |  ETA: 0:16:57[39m[K

parameters at step 400000 saved to ./RL_models/vtol_2D_ppo_400000.bson


test reward at step 400000: -11961.01582227986


[32mProgress:  20%|████████▎                                |  ETA: 0:17:00[39m[K

[32mProgress:  20%|████████▎                                |  ETA: 0:16:55[39m[K

[32mProgress:  20%|████████▍                                |  ETA: 0:16:51[39m[K

[32mProgress:  20%|████████▍                                |  ETA: 0:16:47[39m[K

test reward at step 410000: -11847.063143022897


[32mProgress:  21%|████████▍                                |  ETA: 0:16:50[39m[K

[32mProgress:  21%|████████▌                                |  ETA: 0:16:46[39m[K

[32mProgress:  21%|████████▌                                |  ETA: 0:16:42[39m[K

[32mProgress:  21%|████████▋                                |  ETA: 0:16:38[39m[K

test reward at step 420000: -11743.782797564689


[32mProgress:  21%|████████▋                                |  ETA: 0:16:41[39m[K

[32mProgress:  21%|████████▊                                |  ETA: 0:16:37[39m[K

[32mProgress:  21%|████████▊                                |  ETA: 0:16:34[39m[K

[32mProgress:  21%|████████▊                                |  ETA: 0:16:30[39m[K

test reward at step 430000: -11645.253638873419


[32mProgress:  22%|████████▉                                |  ETA: 0:16:34[39m[K

[32mProgress:  22%|████████▉                                |  ETA: 0:16:30[39m[K

[32mProgress:  22%|█████████                                |  ETA: 0:16:27[39m[K

[32mProgress:  22%|█████████                                |  ETA: 0:16:24[39m[K

test reward at step 440000: -11550.412689660327


[32mProgress:  22%|█████████                                |  ETA: 0:16:26[39m[K

[32mProgress:  22%|█████████▏                               |  ETA: 0:16:23[39m[K

[32mProgress:  22%|█████████▏                               |  ETA: 0:16:19[39m[K

[32mProgress:  22%|█████████▎                               |  ETA: 0:16:16[39m[K

test reward at step 450000: -11461.903097615761


[32mProgress:  23%|█████████▎                               |  ETA: 0:16:18[39m[K

[32mProgress:  23%|█████████▎                               |  ETA: 0:16:15[39m[K

[32mProgress:  23%|█████████▍                               |  ETA: 0:16:11[39m[K

[32mProgress:  23%|█████████▍                               |  ETA: 0:16:07[39m[K

test reward at step 460000: -11376.270850802352


[32mProgress:  23%|█████████▌                               |  ETA: 0:16:09[39m[K

[32mProgress:  23%|█████████▌                               |  ETA: 0:16:05[39m[K

[32mProgress:  23%|█████████▋                               |  ETA: 0:16:02[39m[K

[32mProgress:  23%|█████████▋                               |  ETA: 0:15:58[39m[K

test reward at step 470000: -11290.299471721128


[32mProgress:  24%|█████████▋                               |  ETA: 0:16:00[39m[K

[32mProgress:  24%|█████████▊                               |  ETA: 0:15:57[39m[K

[32mProgress:  24%|█████████▊                               |  ETA: 0:15:53[39m[K

[32mProgress:  24%|█████████▉                               |  ETA: 0:15:50[39m[K

test reward at step 480000: -11210.259267273854


[32mProgress:  24%|█████████▉                               |  ETA: 0:15:52[39m[K

[32mProgress:  24%|█████████▉                               |  ETA: 0:15:48[39m[K

[32mProgress:  24%|██████████                               |  ETA: 0:15:45[39m[K

[32mProgress:  24%|██████████                               |  ETA: 0:15:42[39m[K

test reward at step 490000: -11134.444738328762


[32mProgress:  25%|██████████▏                              |  ETA: 0:15:44[39m[K

[32mProgress:  25%|██████████▏                              |  ETA: 0:15:41[39m[K

[32mProgress:  25%|██████████▎                              |  ETA: 0:15:38[39m[K

[32mProgress:  25%|██████████▎                              |  ETA: 0:15:34[39m[K

parameters at step 500000 saved to ./RL_models/vtol_2D_ppo_500000.bson


test reward at step 500000: -11057.0338741731


[32mProgress:  25%|██████████▎                              |  ETA: 0:15:35[39m[K

[32mProgress:  25%|██████████▍                              |  ETA: 0:15:32[39m[K

[32mProgress:  25%|██████████▍                              |  ETA: 0:15:30[39m[K

[32mProgress:  26%|██████████▌                              |  ETA: 0:15:31[39m[K

test reward at step 510000: -10982.902081904127


[32mProgress:  26%|██████████▌                              |  ETA: 0:15:26[39m[K

[32mProgress:  26%|██████████▋                              |  ETA: 0:15:23[39m[K

[32mProgress:  26%|██████████▋                              |  ETA: 0:15:19[39m[K

test reward at step 520000: -10916.037748712442


[32mProgress:  26%|██████████▊                              |  ETA: 0:15:22[39m[K

[32mProgress:  26%|██████████▊                              |  ETA: 0:15:18[39m[K

[32mProgress:  26%|██████████▊                              |  ETA: 0:15:15[39m[K

[32mProgress:  26%|██████████▉                              |  ETA: 0:15:12[39m[K

test reward at step 530000: -10846.060082946093


[32mProgress:  27%|██████████▉                              |  ETA: 0:15:14[39m[K

[32mProgress:  27%|███████████                              |  ETA: 0:15:11[39m[K

[32mProgress:  27%|███████████                              |  ETA: 0:15:08[39m[K

[32mProgress:  27%|███████████▏                             |  ETA: 0:15:04[39m[K

test reward at step 540000: -10785.502620712246


[32mProgress:  27%|███████████▏                             |  ETA: 0:15:06[39m[K

[32mProgress:  27%|███████████▏                             |  ETA: 0:15:03[39m[K

[32mProgress:  27%|███████████▎                             |  ETA: 0:15:00[39m[K

[32mProgress:  28%|███████████▎                             |  ETA: 0:15:01[39m[K

test reward at step 550000: -10720.71209128514


[32mProgress:  28%|███████████▍                             |  ETA: 0:14:58[39m[K

[32mProgress:  28%|███████████▍                             |  ETA: 0:14:54[39m[K

[32mProgress:  28%|███████████▌                             |  ETA: 0:14:51[39m[K

test reward at step 560000: -10656.932143378475


[32mProgress:  28%|███████████▌                             |  ETA: 0:14:53[39m[K

[32mProgress:  28%|███████████▌                             |  ETA: 0:14:50[39m[K

[32mProgress:  28%|███████████▋                             |  ETA: 0:14:47[39m[K

[32mProgress:  28%|███████████▋                             |  ETA: 0:14:44[39m[K

test reward at step 570000: -10597.416562333872


[32mProgress:  29%|███████████▊                             |  ETA: 0:14:46[39m[K

[32mProgress:  29%|███████████▊                             |  ETA: 0:14:43[39m[K

[32mProgress:  29%|███████████▊                             |  ETA: 0:14:40[39m[K

[32mProgress:  29%|███████████▉                             |  ETA: 0:14:37[39m[K

test reward at step 580000: -10542.25060851051


[32mProgress:  29%|███████████▉                             |  ETA: 0:14:38[39m[K

[32mProgress:  29%|████████████                             |  ETA: 0:14:35[39m[K

[32mProgress:  29%|████████████                             |  ETA: 0:14:33[39m[K

[32mProgress:  29%|████████████▏                            |  ETA: 0:14:30[39m[K

test reward at step 590000: -10487.585041611399


[32mProgress:  30%|████████████▏                            |  ETA: 0:14:31[39m[K

[32mProgress:  30%|████████████▏                            |  ETA: 0:14:28[39m[K

[32mProgress:  30%|████████████▎                            |  ETA: 0:14:25[39m[K

[32mProgress:  30%|████████████▎                            |  ETA: 0:14:22[39m[K

parameters at step 600000 saved to ./RL_models/vtol_2D_ppo_600000.bson


test reward at step 600000: -10432.509665082982


[32mProgress:  30%|████████████▍                            |  ETA: 0:14:23[39m[K

[32mProgress:  30%|████████████▍                            |  ETA: 0:14:20[39m[K

[32mProgress:  30%|████████████▌                            |  ETA: 0:14:17[39m[K

[32mProgress:  30%|████████████▌                            |  ETA: 0:14:14[39m[K

test reward at step 610000: -10380.245377052684


[32mProgress:  31%|████████████▌                            |  ETA: 0:14:15[39m[K

[32mProgress:  31%|████████████▋                            |  ETA: 0:14:12[39m[K

[32mProgress:  31%|████████████▋                            |  ETA: 0:14:09[39m[K

test reward at step 620000: -10330.969636812157


[32mProgress:  31%|████████████▊                            |  ETA: 0:14:10[39m[K

[32mProgress:  31%|████████████▊                            |  ETA: 0:14:07[39m[K

[32mProgress:  31%|████████████▉                            |  ETA: 0:14:04[39m[K

[32mProgress:  31%|████████████▉                            |  ETA: 0:14:02[39m[K

[32mProgress:  32%|████████████▉                            |  ETA: 0:14:04[39m[K

test reward at step 630000: -10282.41778862149


[32mProgress:  32%|█████████████                            |  ETA: 0:14:00[39m[K

[32mProgress:  32%|█████████████                            |  ETA: 0:13:57[39m[K

[32mProgress:  32%|█████████████▏                           |  ETA: 0:13:54[39m[K

test reward at step 640000: -10235.481986346795


[32mProgress:  32%|█████████████▏                           |  ETA: 0:13:56[39m[K

[32mProgress:  32%|█████████████▎                           |  ETA: 0:13:53[39m[K

[32mProgress:  32%|█████████████▎                           |  ETA: 0:13:50[39m[K

[32mProgress:  32%|█████████████▎                           |  ETA: 0:13:48[39m[K

test reward at step 650000: -10189.36283410217


[32mProgress:  33%|█████████████▍                           |  ETA: 0:13:49[39m[K

[32mProgress:  33%|█████████████▍                           |  ETA: 0:13:46[39m[K

[32mProgress:  33%|█████████████▌                           |  ETA: 0:13:44[39m[K

[32mProgress:  33%|█████████████▌                           |  ETA: 0:13:41[39m[K

test reward at step 660000: -10145.965662102737


[32mProgress:  33%|█████████████▋                           |  ETA: 0:13:41[39m[K

[32mProgress:  33%|█████████████▋                           |  ETA: 0:13:39[39m[K

[32mProgress:  33%|█████████████▋                           |  ETA: 0:13:37[39m[K

[32mProgress:  33%|█████████████▊                           |  ETA: 0:13:34[39m[K

test reward at step 670000: -10103.242311739177


[32mProgress:  34%|█████████████▊                           |  ETA: 0:13:35[39m[K

[32mProgress:  34%|█████████████▉                           |  ETA: 0:13:33[39m[K

[32mProgress:  34%|█████████████▉                           |  ETA: 0:13:30[39m[K

[32mProgress:  34%|█████████████▉                           |  ETA: 0:13:28[39m[K

test reward at step 680000: -10059.184399441054


[32mProgress:  34%|██████████████                           |  ETA: 0:13:29[39m[K

[32mProgress:  34%|██████████████                           |  ETA: 0:13:27[39m[K

[32mProgress:  34%|██████████████▏                          |  ETA: 0:13:24[39m[K

[32mProgress:  34%|██████████████▏                          |  ETA: 0:13:22[39m[K

test reward at step 690000: -10018.566766718832


[32mProgress:  35%|██████████████▏                          |  ETA: 0:13:23[39m[K

[32mProgress:  35%|██████████████▎                          |  ETA: 0:13:21[39m[K

[32mProgress:  35%|██████████████▎                          |  ETA: 0:13:18[39m[K

[32mProgress:  35%|██████████████▍                          |  ETA: 0:13:16[39m[K

parameters at step 700000 saved to ./RL_models/vtol_2D_ppo_700000.bson


test reward at step 700000: -9980.172490588368


[32mProgress:  35%|██████████████▍                          |  ETA: 0:13:17[39m[K

[32mProgress:  35%|██████████████▍                          |  ETA: 0:13:15[39m[K

[32mProgress:  35%|██████████████▌                          |  ETA: 0:13:12[39m[K

[32mProgress:  35%|██████████████▌                          |  ETA: 0:13:10[39m[K

test reward at step 710000: -9942.009849046333


[32mProgress:  36%|██████████████▋                          |  ETA: 0:13:11[39m[K

[32mProgress:  36%|██████████████▋                          |  ETA: 0:13:09[39m[K

[32mProgress:  36%|██████████████▋                          |  ETA: 0:13:06[39m[K

[32mProgress:  36%|██████████████▊                          |  ETA: 0:13:03[39m[K

test reward at step 720000: -9905.35989954765


[32mProgress:  36%|██████████████▊                          |  ETA: 0:13:04[39m[K

[32mProgress:  36%|██████████████▉                          |  ETA: 0:13:01[39m[K

[32mProgress:  36%|██████████████▉                          |  ETA: 0:12:59[39m[K

[32mProgress:  36%|███████████████                          |  ETA: 0:12:56[39m[K

test reward at step 730000: -9868.105618942553


[32mProgress:  37%|███████████████                          |  ETA: 0:12:57[39m[K

[32mProgress:  37%|███████████████                          |  ETA: 0:12:55[39m[K

[32mProgress:  37%|███████████████▏                         |  ETA: 0:12:52[39m[K

[32mProgress:  37%|███████████████▏                         |  ETA: 0:12:50[39m[K

test reward at step 740000: -9831.642036997295


[32mProgress:  37%|███████████████▎                         |  ETA: 0:12:51[39m[K

[32mProgress:  37%|███████████████▎                         |  ETA: 0:12:48[39m[K

[32mProgress:  37%|███████████████▍                         |  ETA: 0:12:45[39m[K

[32mProgress:  37%|███████████████▍                         |  ETA: 0:12:43[39m[K

test reward at step 750000: -9795.124986735142


[32mProgress:  38%|███████████████▍                         |  ETA: 0:12:44[39m[K

[32mProgress:  38%|███████████████▌                         |  ETA: 0:12:43[39m[K

[32mProgress:  38%|███████████████▌                         |  ETA: 0:12:41[39m[K

[32mProgress:  38%|███████████████▌                         |  ETA: 0:12:39[39m[K

test reward at step 760000: -9822.455979459171


[32mProgress:  38%|███████████████▋                         |  ETA: 0:12:41[39m[K

[32mProgress:  38%|███████████████▋                         |  ETA: 0:12:39[39m[K

[32mProgress:  38%|███████████████▊                         |  ETA: 0:12:37[39m[K

[32mProgress:  38%|███████████████▊                         |  ETA: 0:12:34[39m[K

test reward at step 770000: -9790.921785793533


[32mProgress:  39%|███████████████▊                         |  ETA: 0:12:35[39m[K

[32mProgress:  39%|███████████████▉                         |  ETA: 0:12:32[39m[K

[32mProgress:  39%|███████████████▉                         |  ETA: 0:12:30[39m[K

[32mProgress:  39%|████████████████                         |  ETA: 0:12:27[39m[K

test reward at step 780000: -9759.21258897706


[32mProgress:  39%|████████████████                         |  ETA: 0:12:28[39m[K

[32mProgress:  39%|████████████████                         |  ETA: 0:12:26[39m[K

[32mProgress:  39%|████████████████▏                        |  ETA: 0:12:24[39m[K

[32mProgress:  39%|████████████████▏                        |  ETA: 0:12:21[39m[K

[32mProgress:  40%|████████████████▎                        |  ETA: 0:12:22[39m[K

test reward at step 790000: -9727.599348296491


[32mProgress:  40%|████████████████▎                        |  ETA: 0:12:20[39m[K

[32mProgress:  40%|████████████████▎                        |  ETA: 0:12:18[39m[K

[32mProgress:  40%|████████████████▍                        |  ETA: 0:12:15[39m[K

parameters at step 800000 saved to ./RL_models/vtol_2D_ppo_800000.bson


test reward at step 800000: -9693.214791801485


[32mProgress:  40%|████████████████▍                        |  ETA: 0:12:16[39m[K

[32mProgress:  40%|████████████████▌                        |  ETA: 0:12:13[39m[K

[32mProgress:  40%|████████████████▌                        |  ETA: 0:12:11[39m[K

[32mProgress:  40%|████████████████▋                        |  ETA: 0:12:08[39m[K

test reward at step 810000: -9661.469521296925


[32mProgress:  41%|████████████████▋                        |  ETA: 0:12:09[39m[K

[32mProgress:  41%|████████████████▋                        |  ETA: 0:12:06[39m[K

[32mProgress:  41%|████████████████▊                        |  ETA: 0:12:04[39m[K

[32mProgress:  41%|████████████████▊                        |  ETA: 0:12:02[39m[K

test reward at step 820000: -9626.06244515685


[32mProgress:  41%|████████████████▉                        |  ETA: 0:12:02[39m[K

[32mProgress:  41%|████████████████▉                        |  ETA: 0:12:00[39m[K

[32mProgress:  41%|████████████████▉                        |  ETA: 0:11:58[39m[K

[32mProgress:  41%|█████████████████                        |  ETA: 0:11:55[39m[K

test reward at step 830000: -9592.645433098827


[32mProgress:  42%|█████████████████                        |  ETA: 0:11:56[39m[K

[32mProgress:  42%|█████████████████▏                       |  ETA: 0:11:55[39m[K

[32mProgress:  42%|█████████████████▏                       |  ETA: 0:11:53[39m[K

[32mProgress:  42%|█████████████████▏                       |  ETA: 0:11:50[39m[K

[32mProgress:  42%|█████████████████▎                       |  ETA: 0:11:51[39m[K

test reward at step 840000: -9561.25246411115


[32mProgress:  42%|█████████████████▎                       |  ETA: 0:11:49[39m[K

[32mProgress:  42%|█████████████████▍                       |  ETA: 0:11:47[39m[K

[32mProgress:  42%|█████████████████▍                       |  ETA: 0:11:44[39m[K

test reward at step 850000: -9530.595123028806


[32mProgress:  43%|█████████████████▍                       |  ETA: 0:11:45[39m[K

[32mProgress:  43%|█████████████████▌                       |  ETA: 0:11:42[39m[K

[32mProgress:  43%|█████████████████▌                       |  ETA: 0:11:39[39m[K

[32mProgress:  43%|█████████████████▋                       |  ETA: 0:11:37[39m[K

test reward at step 860000: -9500.932189030655


[32mProgress:  43%|█████████████████▋                       |  ETA: 0:11:38[39m[K

[32mProgress:  43%|█████████████████▊                       |  ETA: 0:11:35[39m[K

[32mProgress:  43%|█████████████████▊                       |  ETA: 0:11:33[39m[K

[32mProgress:  43%|█████████████████▊                       |  ETA: 0:11:32[39m[K

test reward at step 870000: -9503.078389596389


[32mProgress:  44%|█████████████████▉                       |  ETA: 0:11:32[39m[K

[32mProgress:  44%|█████████████████▉                       |  ETA: 0:11:30[39m[K

[32mProgress:  44%|██████████████████                       |  ETA: 0:11:28[39m[K

[32mProgress:  44%|██████████████████                       |  ETA: 0:11:26[39m[K

[32mProgress:  44%|██████████████████                       |  ETA: 0:11:25[39m[K

test reward at step 880000: -9503.424386953324


[32mProgress:  44%|██████████████████▏                      |  ETA: 0:11:26[39m[K

[32mProgress:  44%|██████████████████▏                      |  ETA: 0:11:23[39m[K

[32mProgress:  44%|██████████████████▎                      |  ETA: 0:11:21[39m[K

test reward at step 890000: -9489.489267004008


[32mProgress:  45%|██████████████████▎                      |  ETA: 0:11:21[39m[K

[32mProgress:  45%|██████████████████▍                      |  ETA: 0:11:18[39m[K

[32mProgress:  45%|██████████████████▍                      |  ETA: 0:11:16[39m[K

[32mProgress:  45%|██████████████████▍                      |  ETA: 0:11:14[39m[K

parameters at step 900000 saved to ./RL_models/vtol_2D_ppo_900000.bson


test reward at step 900000: -9469.442145679546


[32mProgress:  45%|██████████████████▌                      |  ETA: 0:11:15[39m[K

[32mProgress:  45%|██████████████████▌                      |  ETA: 0:11:13[39m[K

[32mProgress:  45%|██████████████████▋                      |  ETA: 0:11:10[39m[K

[32mProgress:  45%|██████████████████▋                      |  ETA: 0:11:08[39m[K

test reward at step 910000: -9444.355857121736


[32mProgress:  46%|██████████████████▋                      |  ETA: 0:11:08[39m[K

[32mProgress:  46%|██████████████████▊                      |  ETA: 0:11:06[39m[K

[32mProgress:  46%|██████████████████▊                      |  ETA: 0:11:04[39m[K

[32mProgress:  46%|██████████████████▉                      |  ETA: 0:11:02[39m[K

test reward at step 920000: -9415.612617883417


[32mProgress:  46%|██████████████████▉                      |  ETA: 0:11:02[39m[K

[32mProgress:  46%|██████████████████▉                      |  ETA: 0:11:00[39m[K

[32mProgress:  46%|███████████████████                      |  ETA: 0:10:58[39m[K

[32mProgress:  46%|███████████████████                      |  ETA: 0:10:56[39m[K

test reward at step 930000: -9387.393530755631


[32mProgress:  47%|███████████████████▏                     |  ETA: 0:10:56[39m[K

[32mProgress:  47%|███████████████████▏                     |  ETA: 0:10:54[39m[K

[32mProgress:  47%|███████████████████▎                     |  ETA: 0:10:52[39m[K

[32mProgress:  47%|███████████████████▎                     |  ETA: 0:10:50[39m[K

test reward at step 940000: -9357.947418799298


[32mProgress:  47%|███████████████████▎                     |  ETA: 0:10:50[39m[K

[32mProgress:  47%|███████████████████▍                     |  ETA: 0:10:48[39m[K

[32mProgress:  47%|███████████████████▍                     |  ETA: 0:10:46[39m[K

[32mProgress:  47%|███████████████████▌                     |  ETA: 0:10:43[39m[K

test reward at step 950000: -9329.119180865027


[32mProgress:  48%|███████████████████▌                     |  ETA: 0:10:44[39m[K

[32mProgress:  48%|███████████████████▋                     |  ETA: 0:10:41[39m[K

[32mProgress:  48%|███████████████████▋                     |  ETA: 0:10:39[39m[K

[32mProgress:  48%|███████████████████▋                     |  ETA: 0:10:37[39m[K

test reward at step 960000: -9300.919941258708


[32mProgress:  48%|███████████████████▊                     |  ETA: 0:10:37[39m[K

[32mProgress:  48%|███████████████████▊                     |  ETA: 0:10:35[39m[K

[32mProgress:  48%|███████████████████▉                     |  ETA: 0:10:33[39m[K

[32mProgress:  48%|███████████████████▉                     |  ETA: 0:10:31[39m[K

test reward at step 970000: -9273.26497794507


[32mProgress:  49%|███████████████████▉                     |  ETA: 0:10:31[39m[K

[32mProgress:  49%|████████████████████                     |  ETA: 0:10:29[39m[K

[32mProgress:  49%|████████████████████                     |  ETA: 0:10:27[39m[K

test reward at step 980000: -9246.060881101357


[32mProgress:  49%|████████████████████▏                    |  ETA: 0:10:27[39m[K

[32mProgress:  49%|████████████████████▎                    |  ETA: 0:10:22[39m[K

[32mProgress:  49%|████████████████████▎                    |  ETA: 0:10:19[39m[K

test reward at step 990000: -9219.589833459884


[32mProgress:  50%|████████████████████▍                    |  ETA: 0:10:20[39m[K

[32mProgress:  50%|████████████████████▍                    |  ETA: 0:10:18[39m[K

[32mProgress:  50%|████████████████████▍                    |  ETA: 0:10:15[39m[K

[32mProgress:  50%|████████████████████▌                    |  ETA: 0:10:13[39m[K

parameters at step 1000000 saved to ./RL_models/vtol_2D_ppo_1000000.bson


test reward at step 1000000: -9193.645001217637


[32mProgress:  50%|████████████████████▌                    |  ETA: 0:10:14[39m[K

[32mProgress:  50%|████████████████████▋                    |  ETA: 0:10:11[39m[K

[32mProgress:  50%|████████████████████▋                    |  ETA: 0:10:09[39m[K

[32mProgress:  50%|████████████████████▋                    |  ETA: 0:10:08[39m[K

test reward at step 1010000: -9170.68314968883


[32mProgress:  51%|████████████████████▊                    |  ETA: 0:10:08[39m[K

[32mProgress:  51%|████████████████████▊                    |  ETA: 0:10:06[39m[K

[32mProgress:  51%|████████████████████▉                    |  ETA: 0:10:04[39m[K

[32mProgress:  51%|████████████████████▉                    |  ETA: 0:10:02[39m[K

test reward at step 1020000: -9148.036103407763


[32mProgress:  51%|████████████████████▉                    |  ETA: 0:10:02[39m[K

[32mProgress:  51%|█████████████████████                    |  ETA: 0:10:00[39m[K

[32mProgress:  51%|█████████████████████                    |  ETA: 0:09:58[39m[K

[32mProgress:  51%|█████████████████████▏                   |  ETA: 0:09:56[39m[K

test reward at step 1030000: -9125.155574415585


[32mProgress:  52%|█████████████████████▏                   |  ETA: 0:09:56[39m[K

[32mProgress:  52%|█████████████████████▏                   |  ETA: 0:09:54[39m[K

[32mProgress:  52%|█████████████████████▎                   |  ETA: 0:09:52[39m[K

[32mProgress:  52%|█████████████████████▎                   |  ETA: 0:09:49[39m[K

test reward at step 1040000: -9105.606391900783


[32mProgress:  52%|█████████████████████▍                   |  ETA: 0:09:49[39m[K

[32mProgress:  52%|█████████████████████▍                   |  ETA: 0:09:47[39m[K

[32mProgress:  52%|█████████████████████▌                   |  ETA: 0:09:45[39m[K

[32mProgress:  52%|█████████████████████▌                   |  ETA: 0:09:43[39m[K

test reward at step 1050000: -9083.271679842217


[32mProgress:  53%|█████████████████████▌                   |  ETA: 0:09:43[39m[K

[32mProgress:  53%|█████████████████████▋                   |  ETA: 0:09:41[39m[K

[32mProgress:  53%|█████████████████████▋                   |  ETA: 0:09:39[39m[K

[32mProgress:  53%|█████████████████████▊                   |  ETA: 0:09:37[39m[K

test reward at step 1060000: -9062.376929994629


[32mProgress:  53%|█████████████████████▊                   |  ETA: 0:09:36[39m[K

[32mProgress:  53%|█████████████████████▊                   |  ETA: 0:09:34[39m[K

[32mProgress:  53%|█████████████████████▉                   |  ETA: 0:09:32[39m[K

[32mProgress:  53%|█████████████████████▉                   |  ETA: 0:09:30[39m[K

test reward at step 1070000: -9041.555738473895


[32mProgress:  54%|██████████████████████                   |  ETA: 0:09:30[39m[K

[32mProgress:  54%|██████████████████████                   |  ETA: 0:09:28[39m[K

[32mProgress:  54%|██████████████████████▏                  |  ETA: 0:09:26[39m[K

[32mProgress:  54%|██████████████████████▏                  |  ETA: 0:09:24[39m[K

test reward at step 1080000: -9019.751889172563


[32mProgress:  54%|██████████████████████▏                  |  ETA: 0:09:24[39m[K

[32mProgress:  54%|██████████████████████▎                  |  ETA: 0:09:22[39m[K

[32mProgress:  54%|██████████████████████▎                  |  ETA: 0:09:20[39m[K

[32mProgress:  54%|██████████████████████▍                  |  ETA: 0:09:18[39m[K

test reward at step 1090000: -8997.651380754434


[32mProgress:  55%|██████████████████████▍                  |  ETA: 0:09:18[39m[K

[32mProgress:  55%|██████████████████████▌                  |  ETA: 0:09:15[39m[K

[32mProgress:  55%|██████████████████████▌                  |  ETA: 0:09:13[39m[K

[32mProgress:  55%|██████████████████████▌                  |  ETA: 0:09:11[39m[K

parameters at step 1100000 saved to ./RL_models/vtol_2D_ppo_1100000.bson


test reward at step 1100000: -8977.745550540823


[32mProgress:  55%|██████████████████████▋                  |  ETA: 0:09:11[39m[K

[32mProgress:  55%|██████████████████████▋                  |  ETA: 0:09:09[39m[K

[32mProgress:  55%|██████████████████████▊                  |  ETA: 0:09:07[39m[K

test reward at step 1110000: -8957.55045455693


[32mProgress:  56%|██████████████████████▊                  |  ETA: 0:09:07[39m[K

[32mProgress:  56%|██████████████████████▉                  |  ETA: 0:09:04[39m[K

[32mProgress:  56%|██████████████████████▉                  |  ETA: 0:09:02[39m[K

[32mProgress:  56%|██████████████████████▉                  |  ETA: 0:09:00[39m[K

test reward at step 1120000: -8936.676694382468


[32mProgress:  56%|███████████████████████                  |  ETA: 0:09:00[39m[K

[32mProgress:  56%|███████████████████████                  |  ETA: 0:08:58[39m[K

[32mProgress:  56%|███████████████████████▏                 |  ETA: 0:08:56[39m[K

[32mProgress:  56%|███████████████████████▏                 |  ETA: 0:08:55[39m[K

test reward at step 1130000: -8918.517885367224


[32mProgress:  57%|███████████████████████▏                 |  ETA: 0:08:54[39m[K

[32mProgress:  57%|███████████████████████▎                 |  ETA: 0:08:52[39m[K

[32mProgress:  57%|███████████████████████▎                 |  ETA: 0:08:50[39m[K

[32mProgress:  57%|███████████████████████▍                 |  ETA: 0:08:49[39m[K

[32mProgress:  57%|███████████████████████▍                 |  ETA: 0:08:49[39m[K

test reward at step 1140000: -8900.342157319355


[32mProgress:  57%|███████████████████████▍                 |  ETA: 0:08:47[39m[K

[32mProgress:  57%|███████████████████████▌                 |  ETA: 0:08:45[39m[K

[32mProgress:  57%|███████████████████████▌                 |  ETA: 0:08:43[39m[K

test reward at step 1150000: -8880.631789419207


[32mProgress:  58%|███████████████████████▋                 |  ETA: 0:08:42[39m[K

[32mProgress:  58%|███████████████████████▋                 |  ETA: 0:08:40[39m[K

[32mProgress:  58%|███████████████████████▊                 |  ETA: 0:08:38[39m[K

[32mProgress:  58%|███████████████████████▊                 |  ETA: 0:08:36[39m[K

test reward at step 1160000: -8861.208431164823


[32mProgress:  58%|███████████████████████▊                 |  ETA: 0:08:36[39m[K

[32mProgress:  58%|███████████████████████▉                 |  ETA: 0:08:34[39m[K

[32mProgress:  58%|███████████████████████▉                 |  ETA: 0:08:32[39m[K

[32mProgress:  58%|████████████████████████                 |  ETA: 0:08:30[39m[K

test reward at step 1170000: -8842.003288370308


[32mProgress:  59%|████████████████████████                 |  ETA: 0:08:30[39m[K

[32mProgress:  59%|████████████████████████                 |  ETA: 0:08:28[39m[K

[32mProgress:  59%|████████████████████████▏                |  ETA: 0:08:26[39m[K

[32mProgress:  59%|████████████████████████▏                |  ETA: 0:08:24[39m[K

test reward at step 1180000: -8823.245935623192


[32mProgress:  59%|████████████████████████▎                |  ETA: 0:08:24[39m[K

In [None]:
plot(episode_test_reward_hook.rewards)

In [None]:
close_visualization(); # closes the MeshCat visualization