# Init Bionic VTOL

In [1]:
include("../Flyonic.jl");
using .Flyonic;

using Rotations; # used for initial position

using ReinforcementLearning;
using StableRNGs;
using Flux;
using Flux.Losses;
using Random;
using IntervalSets;
using LinearAlgebra;
using Distributions;

using Plots;
using Statistics;

using BSON: @save, @load # save mode

In [2]:
R_TOL = 0.5
N_WAYPOINTS = 4
SLOW_MODE = true

true

TODO:
evtl einfügen, dass wenn man über ziel drüber fliegt trotzdem den current point updatet 

In [3]:
create_visualization();

┌ Info: MeshCat server started. You can open the visualizer by visiting the following URL in your browser:
│ http://127.0.0.1:8701
└ @ MeshCat /Users/leonardoigler/.julia/packages/MeshCat/Ax8pH/src/visualizer.jl:73


In [4]:
# indicates how many threads Julia was started with. This is important for the multi-threaded environment
Threads.nthreads()

1

# Create Reinforcement Learning Environment

In [5]:
mutable struct VtolEnv{A,T,ACT,R<:AbstractRNG} <: AbstractEnv # Parametric Constructor for a subtype of AbstractEnv
    action_space::A # action space
    observation_space::Space{Vector{ClosedInterval{T}}} # observation space
    state::Vector{T} # current state space
    action::ACT # action space
    done::Bool # done
    t::T # time
    rng::R # random number generator

    name::String # for multible environoments
    visualization::Bool # visualization
    realtime::Bool # realtime
    
    # Everything you need aditionaly can also go in here.
    x_W::Vector{T} # current position
    v_B::Vector{T} # velocity
    R_W::Matrix{T} # current rotation
    ω_B::Vector{T} # rotation velocitiy
    wind_W::Vector{T} # wind
    Δt::T # Δ time
    
    # Current Bonus / Target
    num_waypoints::Int # includig start point
    waypoints::Vector{Vector{T}}
    reached_goal::BitVector
    
    progress::T
    progress_prev::T
    current_point::Int
    reached_goal_in_step::Bool
    
    r_tol::T
    projected_Position::Vector{T}

    slow_mode::Bool
end

In [6]:
# define a keyword-based constructor for the type declared in the mutable struct typedef. 
# It could also be done with the macro Base.@kwdef.
function VtolEnv(;
    rng = Random.GLOBAL_RNG, # Random number generation
    name = "vtol",
    visualization = false,
    realtime = false,
    kwargs... # let the function take an arbitrary number of keyword arguments 
)
    
    T = Float64; # explicit type which is used e.g. in state. Cannot be altered due to the poor matrix defininon.

    
    action_space = Space(
        ClosedInterval{T}[
            0.0..2.0, # propeller 1
            0.0..2.0, # propeller 2
            ], 
    )

    
    state_space = Space( # Three continuous values in state space.
        ClosedInterval{T}[
            # orientate yourself on the state space from the paper
            typemin(T)..typemax(T), # position along x
            typemin(T)..typemax(T), # position along z
            
            typemin(T)..typemax(T), # orientation along x
            typemin(T)..typemax(T), # orientation along z
            
            typemin(T)..typemax(T), # velocity along x BODY coordinates
            typemin(T)..typemax(T), # velocity along y BODY coordinates
            
            typemin(T)..typemax(T), # rotational velocity along z BODY coordinates
            
            typemin(T)..typemax(T), # position error along x (next gate - current position)
            typemin(T)..typemax(T), # position error along z (next gate - current position)
            
            typemin(T)..typemax(T), # way to next next gate x (next next gate - next gate)
            typemin(T)..typemax(T), # way to next next gate z (next next gate - next gate)
            # TODO: more points?
            ], 
    )
    
    num_waypoints = N_WAYPOINTS # number of waypoints, includig start point
    waypoints = generate_trajectory(num_waypoints) # trajectory with num_waypoints waypoints (+ start point) 
    reached_goal = falses(num_waypoints)
    
    if visualization #visualizes VTOL and waypoints
        create_VTOL(name, actuators = true, color_vec=[1.0; 1.0; 0.6; 1.0]);
        visualize_waypoints(waypoints, 0.15)
    end

    environment = VtolEnv(
        action_space,
        state_space, 
        zeros(T, length(state_space)), # current state, needs to be extended
        rand(action_space), #initialization action
        false, # episode done 
        0.0, # time
        rng, # random number generator  
        
        name,
        visualization,
        realtime,
        
        zeros(T, 3), # x_W, current position
        zeros(T, 3), # v_B, velocity
        [1.0 0.0 0.0; 0.0 1.0 0.0; 0.0 0.0 1.0], # R_W, current rotation, Float64... so T needs to be Float64
        zeros(T, 3), # ω_B
        zeros(T, 3), # wind_W
        T(0.025), # Δt 
        
        num_waypoints, # includig start point
        waypoints, 
        reached_goal,
        
        0.0, # progress
        0.0, # progress_prev
        2, # current point
        false, # reached_goal_in_step
        
        R_TOL, # r_tol
        zeros(T, 3),

        SLOW_MODE
    )
    
    
    reset!(environment)
    
    return environment
    
end;

Just for explanation:

1. A mutable Struct is created. A struct is a constructor and a constructor is a function that creates new objects.
2. A outer keyword-based constructor method is added for the type declared in the mutable struct typedef before.

So now we have a function with two methods. Julia will decide which method to call by multiple dispatch.

In [7]:
methods(VtolEnv)

# Define the RL interface

In [8]:
Random.seed!(env::VtolEnv, seed) = Random.seed!(env.rng, seed)
RLBase.action_space(env::VtolEnv) = env.action_space
RLBase.state_space(env::VtolEnv) = env.observation_space
RLBase.is_terminated(env::VtolEnv) = env.done
RLBase.state(env::VtolEnv) = env.state

In [9]:
function scale_for_slowmode(slow_mode::Bool, v_min::T, v_max::T, d_max::T, x_W::Vector{T}, projected_Position::Vector{T}, v_B::Vector{T}) where T
    
    if slow_mode == false
        return 1
    else
        if norm(v_B) > v_max
            s_vmax = 10^(v_max - norm(v_B))
        else
            s_vmax = 1
        end

        if norm(v_B) < v_min
            s_vmin = 10^(norm(v_B) - v_min)
        else
            s_vmin = 1
        end

        if norm(x_W - projected_Position) > d_max
            s_gd = exp(-norm(x_W - projected_Position) + d_max)
        else
            s_gd = 1
        end
        s = s_vmax * s_vmin * s_gd
    end
    return s
end



scale_for_slowmode (generic function with 1 method)

In [10]:
function computeReward(env::VtolEnv{A,T}) where {A,T}
    
    norm_way = 0.0 # DEBUG: save in environment
    for i in 1:(env.num_waypoints - 1)
        norm_way += norm(env.waypoints[i] - env.waypoints[i + 1])
    end

    # Slow 1
    #v_min = 1.0 # min velocity
    #v_max = 2.0 # max velocity
    #d_max = 0.1

    # Slow 2
    v_min = 4.0 # min velocity
    v_max = 7.0 # max velocity
    d_max = 0.25


    s = scale_for_slowmode(env.slow_mode, v_min, v_max, d_max, env.x_W, env.projected_Position, env.v_B)
    

    k_p = 5.0 * s#env.num_waypoints / norm_way;# factor for progress (between current position and last position) reward 
    r_p = (env.progress - env.progress_prev); # reward for progress (between current position and last position)

    k_s = s * (2 * v_max * env.Δt)/norm_way #5.0 # factor for reached distance (overall) reward, TODO later add factor as in paper (p. 4)
    r_s = env.progress # reward for reached distance (overall)
    
    k_wp = 10.0 * env.num_waypoints # factor for reached gate reward
    r_wp = 0.0 # reward for reached gate, TODO: change to gates later (when gates != waypoints)
    if env.reached_goal_in_step
        r_wp = exp(-norm(env.x_W - env.waypoints[env.current_point - 1])/env.r_tol)
    end 

    # Slow 1
    #k_ω = 0.01 # factor for too high body rate penalty
    # Slow 2
    k_ω = 0.0 # factor for too high body rate penalty
    norm_ω = norm(env.ω_B[3]) # penalty for body rate

    if env.x_W[3]<-2
        fall = 1
    else
        fall = 0
    end

    return - env.t*0.22 + k_p * r_p + k_s * r_s + k_wp * r_wp - k_ω * norm_ω -fall# - k_v * norm_v
    #return k_p * r_p + k_s * r_s + k_wp * r_wp - k_ω * norm_ω -fall# - k_v * norm_v


end


RLBase.reward(env::VtolEnv{A,T}) where {A,T} = computeReward(env)

In [11]:
function RLBase.reset!(env::VtolEnv{A,T}) where {A,T}
    # Visualize initial state
    if env.visualization
        set_transform(env.name, env.x_W, QuatRotation(env.R_W));
        set_actuators(env.name, [0.0; 0.0; 0.0; 0.0]);
    end
    
    env.x_W = [0.0; 0.0; 0.0];
    env.v_B = [0.0; 0.0; 0.0];
    env.R_W = Matrix(UnitQuaternion(RotZ(-pi/2.0)*RotY(-pi/2.0)*RotX(pi)));

    env.ω_B = [0.0; 0.0; 0.0];
    env.wind_W = [0.0; 0.0; 0.0];
    
    #env.num_waypoints = 4; # includig start point
    env.waypoints = generate_trajectory(env.num_waypoints);
    env.reached_goal = falses(env.num_waypoints);
    env.reached_goal[1] = true; # set first point to reached (start point)
    
    env.current_point = 2;
    env.reached_goal_in_step = false;
    #env.r_tol = 0.3;
    
    if env.visualization
        visualize_waypoints(env.waypoints, 0.15); # debug: other radius?
    end
    

    env.progress = 0.0;
    env.progress_prev = 0.0;
    
    
    env.state = [env.x_W[1]; # position along x
                 env.x_W[3]; # position along z
        
                 env.R_W[1,1]; # orientation along x
                 env.R_W[3,1]; # orientation along z
        
                 env.v_B[1]; # velocity along x BODY coordinates
                 env.v_B[2]; # velocity along y BODY coordinates  
        
                 env.ω_B[3]; # rotational velocity along z BODY coordinates
        
                 env.waypoints[2][1] - env.x_W[1]; # position error along x
                 env.waypoints[2][3] - env.x_W[3]; # position error along z
                 
                 0.0; # way to next next gate x (next next gate - next gate)
                 0.0] # way to next next gate z (next next gate - next gate)
    
    if env.num_waypoints >= 3
        env.state[10] = env.waypoints[3][1] - env.waypoints[2][1]; # way to next next gate x (next next gate - next gate)
        env.state[11] = env.waypoints[3][3] - env.waypoints[2][1]; # way to next next gate z (next next gate - next gate)
    end
        
    env.t = 0.0;
    env.action = [0.0, 0.0];
    env.done = false;

    env.projected_Position = [0; 0; 0]
    
    nothing

end;

In [12]:
# defines a methods for a callable object.
# So when a VtolEnv object is created, it has this method that can be called
function (env::VtolEnv)(a)
    # TODO: set flaps later in 3D
    # set the propeller trust and the two flaps 2D case
    next_action = [a[1], a[2], 0.0, 0.0]
   
    _step!(env, next_action)
end

In [13]:
env = VtolEnv();

In [14]:
methods(env) # Just to explain which methods the object has

In [15]:
function _step!(env::VtolEnv, next_action)
    # caluclate wind impact
    v_in_wind_B = vtol_add_wind(env.v_B, env.R_W, env.wind_W)
    # caluclate aerodynamic forces
    torque_B, force_B = vtol_model(v_in_wind_B, next_action, eth_vtol_param);
    # Limit to 2D
    force_B[3] = 0.0; # Body Z
    env.v_B[3] = 0.0;
    torque_B[1] = 0.0; torque_B[2] = 0.0;  # Body X and Y
    env.ω_B[1] = 0.0; env.ω_B[2] = 0.0;
    # integrate rigid body dynamics for Δt
    env.x_W, env.v_B, env.R_W, env.ω_B, time = rigid_body_simple(torque_B, force_B, env.x_W, env.v_B, env.R_W, env.ω_B, env.t, env.Δt, eth_vtol_param)
    
    
    env.reached_goal_in_step = false;
    if norm(env.x_W - env.waypoints[env.current_point]) < env.r_tol
        env.reached_goal_in_step = true;
        env.reached_goal[env.current_point] = true;
        env.current_point += 1;
    end
        
            
    # calculate progress on trajectory
    env.progress_prev = env.progress
    
    current_progress = 0.0
    line_segment, env.projected_Position = calculate_progress(env.waypoints, env.x_W)
    
    #env.current_point = line_segment + 1

    for i in 2:(line_segment)
       current_progress +=  norm(env.waypoints[i] - env.waypoints[i - 1])  
    end
    current_progress += norm(env.waypoints[line_segment] - env.projected_Position)
    
    env.progress = current_progress
    

    if env.realtime
        sleep(env.Δt) # TODO: just a dirty hack. this is of course slower than real time.
    end

    # Visualize the new state 
    if env.visualization
        set_transform(env.name, env.x_W, QuatRotation(env.R_W));
        set_actuators(env.name, next_action)
        
        for i in eachindex(env.reached_goal)
            if env.reached_goal[i]
                create_sphere("fixgoal_$i", 0.2, color=RGBA{Float32}(1.0, 0.0, 0.0, 1.0));
                set_transform("fixgoal_$i", env.waypoints[i]);
            end
        end
    end
 

    env.t += env.Δt
    
    env.state[1] = env.x_W[1]; # position along x
    env.state[2] = env.x_W[3]; # position along z
    
    env.state[3] = env.R_W[1,1]; # orientation along x
    env.state[4] = env.R_W[3,1]; # orientation along z
    
    env.state[5] = env.v_B[1]; # velocity along x BODY coordinates
    env.state[6] = env.v_B[2]; # velocity along y BODY coordinates
    
    env.state[7] = env.ω_B[3];  # rotational velocity along z BODY coordinates
    
    
    if env.current_point <= env.num_waypoints
        env.state[8] = (env.waypoints[env.current_point][1] - env.x_W[1]); # position error along x
        env.state[9] = (env.waypoints[env.current_point][3] - env.x_W[3]); # position error along z
        
        if env.current_point <= env.num_waypoints - 1
            env.state[10] = (env.waypoints[env.current_point + 1][1] - env.x_W[1]); #env.waypoints[env.current_point][1]; # way to next next gate x (next next gate - next gate)
            env.state[11] = (env.waypoints[env.current_point + 1][3] - env.x_W[3]); #env.waypoints[env.current_point][3]; # way to next next gate z (next next gate - next gate)
        else
            env.state[10] = 0.0 # way to next next gate x (next next gate - next gate)
            env.state[11] = 0.0 # way to next next gate z (next next gate - next gate)
        end
    else
        env.state[8] = 0.0; # position error along x
        env.state[9] = 0.0; # position error along z
        env.state[10] = 0.0 # way to next next gate x (next next gate - next gate)
        env.state[11] = 0.0 # way to next next gate z (next next gate - next gate)
    end
        
    
    # Termination criteria
    # TODO: Use many termination criteria so that you do not train unnecessarily in wrong areas
    env.done = #true
        # After time... How fast is drone+Range of desired point
        # After reaching position (circle of r_tol)
        norm(env.ω_B) > 100.0 || 
        norm(env.v_B) > 100.0 || # stop if body is too fast # TODO: set higher later in fast training phase
        env.x_W[3] < -5.0 || # stop if body is below -5m
        env.t > env.num_waypoints * 10.0 ||# stop after 10s per point
        norm(env.x_W - env.projected_Position) > 5.0 || # too far off the path 
        env.current_point > env.num_waypoints && #||# all points reached
        norm(env.x_W - env.waypoints[end])<env.r_tol

    nothing
end;

changed to 10s (5s before) per point and 5.0m too far off path (2.0 before)

In [16]:
RLBase.test_runnable!(env)

[0m[1mTest Summary:              | [22m[32m[1mPass  [22m[39m[36m[1mTotal  [22m[39m[0m[1mTime[22m
random policy with VtolEnv | [32m2000  [39m[36m 2000  [39m[0m0.9s


Test.DefaultTestSet("random policy with VtolEnv", Any[], 2000, false, false, true, 1.673957549414223e9, 1.673957550329323e9)

Show an overview of the environment.

# Setup of a reinforcement learning experiment.

In [17]:
seed = 123    
rng = StableRNG(seed)
    N_ENV = 8
    UPDATE_FREQ = 1024
    
    
    # define multiple environments for parallel training
    env = MultiThreadEnv([
        # use different names for the visualization
        VtolEnv(; rng = StableRNG(hash(seed+i)), name = "vtol$i") for i in 1:N_ENV
    ])

MultiThreadEnv(8 x VtolEnv)

In [18]:
# Define the function approximator
# TODO: change architecture eventually 
    ns, na = length(state(env[1])), length(action_space(env[1]))
    approximator = ActorCritic(
                actor = GaussianNetwork(
                    pre = Chain(
                    Dense(ns, 128, tanh; initW = glorot_uniform(rng)),#
                    Dense(128, 128, tanh; initW = glorot_uniform(rng)),
                    ),
                    μ = Chain(Dense(128, na; initW = glorot_uniform(rng))),
                    logσ = Chain(Dense(128, na; initW = glorot_uniform(rng))),
                ),
                critic = Chain(
                    Dense(ns, 128, tanh; initW = glorot_uniform(rng)),
                    Dense(128, 128, tanh; initW = glorot_uniform(rng)),
                    Dense(128, 1; initW = glorot_uniform(rng)),
                ),
                optimizer = ADAM(1e-3),
            );

In [19]:
    agent = Agent( # A wrapper of an AbstractPolicy
        # AbstractPolicy: the policy to use
        # TODO: change eventually
        policy = PPOPolicy(;
                    approximator = approximator |> gpu,
                    update_freq=UPDATE_FREQ,
                    dist = Normal,
                    # For parameters visit the docu: https://juliareinforcementlearning.org/docs/rlzoo/#ReinforcementLearningZoo.PPOPolicy
                    ),
        
        # AbstractTrajectory: used to store transitions between an agent and an environment source
        trajectory = PPOTrajectory(;
            capacity = UPDATE_FREQ,
            state = Matrix{Float64} => (ns, N_ENV),
            action = Matrix{Float64} => (na, N_ENV),
            action_log_prob = Vector{Float64} => (N_ENV,),
            reward = Vector{Float64} => (N_ENV,),
            terminal = Vector{Bool} => (N_ENV,),
        ),
    );


┌ Info: The GPU function is being called but the GPU is not accessible. 
│ Defaulting back to the CPU. (No action is required if you want to run on the CPU).
└ @ Flux /Users/leonardoigler/.julia/packages/Flux/7nTyc/src/functor.jl:187


In [20]:
function saveModel(t, agent, env)
    model = cpu(agent.policy.approximator)   
    f = joinpath("./RL_models_fast/", "vtol_2D_ppo_$t.bson") # TODO: evtl anpassen
    @save f model
    println("parameters at step $t saved to $f")
end;

In [21]:
function loadModel()
    f = joinpath("./RL_models_slow/", "vtol_2D_ppo_2000000.bson") # TODO: evtl anpassen
    @load f model
    return model
end;

In [22]:
function validate_policy(t, agent, env)
    run(agent.policy, test_env, StopAfterEpisode(1), episode_test_reward_hook)
    # the result of the hook
    println("test reward at step $t: $(episode_test_reward_hook.rewards[end])")
    
end;

episode_test_reward_hook = TotalRewardPerEpisode(;is_display_on_exit=false)
# create a env only for reward test
test_env = VtolEnv(;name = "testVTOL", visualization = true, realtime = true);

In [23]:
agent.policy.approximator = loadModel(); # TODO: un/comment

In [24]:
ReinforcementLearning.run(
    agent,
    env,
    StopAfterStep(2_000_000),
    ComposedHook(
        DoEveryNStep(saveModel, n=100_000), 
        DoEveryNStep(validate_policy, n=10_000)),
)

[32mProgress:   1%|▎                                        |  ETA: 1:54:53[39m[K[K

test reward at step 10000: 57.99675418730234


[32mProgress:   1%|▍                                        |  ETA: 1:09:35[39m[K

test reward at step 20000: 108.18970356331639


[32mProgress:   1%|▋                                        |  ETA: 0:57:53[39m[K

test reward at step 30000: 114.13655176281785


[32mProgress:   2%|▉                                        |  ETA: 0:51:41[39m[K

test reward at step 40000: 109.83573847082019


[32mProgress:   2%|█                                        |  ETA: 0:50:39[39m[K

test reward at step 50000: 100.69767807034424


[32mProgress:   3%|█▎                                       |  ETA: 0:46:10[39m[K

test reward at step 60000: 100.32719635721898


[32mProgress:   3%|█▍                                       |  ETA: 0:43:26[39m[K

test reward at step 70000: 114.03754393825311


[32mProgress:   4%|█▋                                       |  ETA: 0:41:54[39m[K

test reward at step 80000: 120.66071554619612


[32mProgress:   4%|█▉                                       |  ETA: 0:40:23[39m[K

test reward at step 90000: 101.96861854732619


[32mProgress:   5%|██                                       |  ETA: 0:39:26[39m[K

parameters at step 100000 saved to ./RL_models_fast/vtol_2D_ppo_100000.bson


[32mProgress:   5%|██                                       |  ETA: 0:40:33[39m[K

test reward at step 100000: 86.68857243188815


[32mProgress:   5%|██▎                                      |  ETA: 0:38:37[39m[K

test reward at step 110000: 97.33688961152927


[32mProgress:   6%|██▍                                      |  ETA: 0:37:47[39m[K

test reward at step 120000: 88.36050758521847


[32mProgress:   6%|██▋                                      |  ETA: 0:37:29[39m[K

test reward at step 130000: 108.80242739453696


[32mProgress:   7%|██▉                                      |  ETA: 0:36:39[39m[K

test reward at step 140000: 44.14933307363161


[32mProgress:   7%|███                                      |  ETA: 0:36:30[39m[K

test reward at step 150000: 79.95660412276735


[32mProgress:   8%|███▎                                     |  ETA: 0:36:13[39m[K

test reward at step 160000: 92.76288773946567


[32mProgress:   9%|███▌                                     |  ETA: 0:36:12[39m[K

test reward at step 170000: 103.87285761850107


[32mProgress:   9%|███▋                                     |  ETA: 0:35:04[39m[K

test reward at step 180000: 125.08790966253213


[32mProgress:   9%|███▉                                     |  ETA: 0:34:44[39m[K

test reward at step 190000: 110.98119417006663


[32mProgress:  10%|████                                     |  ETA: 0:34:27[39m[K

parameters at step 200000 saved to ./RL_models_fast/vtol_2D_ppo_200000.bson


[32mProgress:  10%|████▏                                    |  ETA: 0:34:53[39m[K

test reward at step 200000: 124.47358136352489


[32mProgress:  10%|████▎                                    |  ETA: 0:33:59[39m[K

test reward at step 210000: 98.32799455802005


[32mProgress:  11%|████▌                                    |  ETA: 0:33:42[39m[K

test reward at step 220000: 116.72627434473725


[32mProgress:  11%|████▊                                    |  ETA: 0:33:17[39m[K

test reward at step 230000: 106.48421937839207


[32mProgress:  12%|████▉                                    |  ETA: 0:33:00[39m[K

test reward at step 240000: 117.188366115304


[32mProgress:  12%|█████▏                                   |  ETA: 0:32:53[39m[K

test reward at step 250000: 101.63729705028166


[32mProgress:  13%|█████▎                                   |  ETA: 0:32:36[39m[K

test reward at step 260000: 130.02929248755595


[32mProgress:  13%|█████▌                                   |  ETA: 0:32:13[39m[K

test reward at step 270000: 111.83151051714032


[32mProgress:  14%|█████▊                                   |  ETA: 0:31:52[39m[K

test reward at step 280000: 103.83760128989343


[32mProgress:  14%|█████▉                                   |  ETA: 0:31:35[39m[K

test reward at step 290000: 81.03386643676356


[32mProgress:  15%|██████▏                                  |  ETA: 0:31:20[39m[K

parameters at step 300000 saved to ./RL_models_fast/vtol_2D_ppo_300000.bson
test reward at step 300000: 99.36619461026993


[32mProgress:  16%|██████▍                                  |  ETA: 0:31:13[39m[K

test reward at step 310000: 82.45253652464727


[32mProgress:  16%|██████▌                                  |  ETA: 0:30:41[39m[K

test reward at step 320000: 131.94490913523876


[32mProgress:  17%|██████▊                                  |  ETA: 0:30:45[39m[K

test reward at step 330000: 122.14456210096472


[32mProgress:  17%|███████                                  |  ETA: 0:30:11[39m[K

test reward at step 340000: -180.3074027741668


[32mProgress:  17%|███████▏                                 |  ETA: 0:30:29[39m[K

test reward at step 350000: 79.21415569180604


[32mProgress:  18%|███████▍                                 |  ETA: 0:30:04[39m[K

test reward at step 360000: 103.74650554583327


[32mProgress:  18%|███████▌                                 |  ETA: 0:29:54[39m[K

test reward at step 370000: 98.46697677280613


[32mProgress:  19%|███████▊                                 |  ETA: 0:29:38[39m[K

test reward at step 380000: 92.26888563044555


[32mProgress:  20%|████████                                 |  ETA: 0:29:29[39m[K

test reward at step 390000: 94.85984739132961


[32mProgress:  20%|████████▎                                |  ETA: 0:28:59[39m[K

parameters at step 400000 saved to ./RL_models_fast/vtol_2D_ppo_400000.bson
test reward at step 400000: 104.39216605873743


[32mProgress:  21%|████████▍                                |  ETA: 0:28:56[39m[K

test reward at step 410000: 109.55408996669124


[32mProgress:  21%|████████▋                                |  ETA: 0:28:25[39m[K

test reward at step 420000: 130.68968888758695


[32mProgress:  21%|████████▊                                |  ETA: 0:28:10[39m[K

test reward at step 430000: 120.60349321419854


[32mProgress:  22%|█████████                                |  ETA: 0:27:57[39m[K

test reward at step 440000: 130.2520237546723


[32mProgress:  23%|█████████▎                               |  ETA: 0:27:59[39m[K

test reward at step 450000: 51.017197858770814


[32mProgress:  23%|█████████▍                               |  ETA: 0:27:33[39m[K

test reward at step 460000: 9.893740226241137


[32mProgress:  23%|█████████▋                               |  ETA: 0:27:29[39m[K

test reward at step 470000: 105.89334370406613


[32mProgress:  24%|█████████▉                               |  ETA: 0:27:08[39m[K

test reward at step 480000: 98.92337046715225


[32mProgress:  24%|██████████                               |  ETA: 0:26:57[39m[K

test reward at step 490000: 119.56278482300425


[32mProgress:  25%|██████████▎                              |  ETA: 0:26:35[39m[K

parameters at step 500000 saved to ./RL_models_fast/vtol_2D_ppo_500000.bson
test reward at step 500000: 105.63079738989744


[32mProgress:  25%|██████████▌                              |  ETA: 0:26:20[39m[K

test reward at step 510000: 89.32643452890152


[32mProgress:  26%|██████████▋                              |  ETA: 0:26:09[39m[K

test reward at step 520000: -4.2645014300775905


[32mProgress:  26%|██████████▉                              |  ETA: 0:26:05[39m[K

test reward at step 530000: 71.83840206799816


[32mProgress:  27%|███████████                              |  ETA: 0:25:55[39m[K

test reward at step 540000: 111.0988211705981


[32mProgress:  28%|███████████▎                             |  ETA: 0:25:52[39m[K

test reward at step 550000: 102.47341859377612


[32mProgress:  28%|███████████▌                             |  ETA: 0:25:31[39m[K

test reward at step 560000: 122.93778239789832


[32mProgress:  29%|███████████▊                             |  ETA: 0:25:26[39m[K

test reward at step 570000: 122.86462932304471


[32mProgress:  29%|███████████▉                             |  ETA: 0:25:07[39m[K

test reward at step 580000: 78.50746968673332


[32mProgress:  29%|████████████▏                            |  ETA: 0:24:55[39m[K

test reward at step 590000: 128.23964379419422


[32mProgress:  30%|████████████▎                            |  ETA: 0:24:45[39m[K

parameters at step 600000 saved to ./RL_models_fast/vtol_2D_ppo_600000.bson
test reward at step 600000: 97.27513111661324


[32mProgress:  30%|████████████▌                            |  ETA: 0:24:35[39m[K

test reward at step 610000: 112.69364946091063


[32mProgress:  31%|████████████▊                            |  ETA: 0:24:25[39m[K

test reward at step 620000: 106.84649821625348


[32mProgress:  31%|████████████▉                            |  ETA: 0:24:17[39m[K

test reward at step 630000: 81.20350338050363


[32mProgress:  32%|█████████████▏                           |  ETA: 0:24:07[39m[K

test reward at step 640000: 112.67659597954706


[32mProgress:  32%|█████████████▍                           |  ETA: 0:23:53[39m[K

test reward at step 650000: 86.48749291567167


[32mProgress:  33%|█████████████▌                           |  ETA: 0:23:43[39m[K

test reward at step 660000: 93.8437820052193


[32mProgress:  34%|█████████████▊                           |  ETA: 0:23:38[39m[K

test reward at step 670000: 110.58220718244266


[32mProgress:  34%|██████████████                           |  ETA: 0:23:27[39m[K

test reward at step 680000: 114.19624135231311


[32mProgress:  34%|██████████████▏                          |  ETA: 0:23:09[39m[K

test reward at step 690000: 111.70433832988955


[32mProgress:  35%|██████████████▍                          |  ETA: 0:22:57[39m[K

parameters at step 700000 saved to ./RL_models_fast/vtol_2D_ppo_700000.bson
test reward at step 700000: 85.52097372252123


[32mProgress:  35%|██████████████▌                          |  ETA: 0:22:49[39m[K

test reward at step 710000: 93.9078591067583


[32mProgress:  36%|██████████████▊                          |  ETA: 0:22:40[39m[K

test reward at step 720000: 115.05687431256683


[32mProgress:  37%|███████████████                          |  ETA: 0:22:38[39m[K

test reward at step 730000: 116.41430102190773


[32mProgress:  37%|███████████████▏                         |  ETA: 0:22:21[39m[K

test reward at step 740000: 99.67821449952187


[32mProgress:  38%|███████████████▍                         |  ETA: 0:22:17[39m[K

test reward at step 750000: 74.99388744404102


[32mProgress:  38%|███████████████▌                         |  ETA: 0:22:02[39m[K

test reward at step 760000: 114.6559750658511


[32mProgress:  38%|███████████████▊                         |  ETA: 0:21:51[39m[K

test reward at step 770000: 72.404910390244


[32mProgress:  39%|████████████████                         |  ETA: 0:21:38[39m[K

test reward at step 780000: 104.86532380711824


[32mProgress:  39%|████████████████▏                        |  ETA: 0:21:27[39m[K

test reward at step 790000: 128.94394302126577


[32mProgress:  40%|████████████████▍                        |  ETA: 0:21:18[39m[K

parameters at step 800000 saved to ./RL_models_fast/vtol_2D_ppo_800000.bson
test reward at step 800000: 124.23243185904147


[32mProgress:  40%|████████████████▋                        |  ETA: 0:21:05[39m[K

test reward at step 810000: 128.96003493416862


[32mProgress:  41%|████████████████▊                        |  ETA: 0:20:59[39m[K

test reward at step 820000: 100.7465112108391


[32mProgress:  41%|█████████████████                        |  ETA: 0:20:42[39m[K

test reward at step 830000: 116.3730687020046


[32mProgress:  42%|█████████████████▎                       |  ETA: 0:20:30[39m[K

test reward at step 840000: 121.24151449879449


[32mProgress:  43%|█████████████████▍                       |  ETA: 0:20:22[39m[K

test reward at step 850000: 100.29781693474045


[32mProgress:  43%|█████████████████▋                       |  ETA: 0:20:06[39m[K

test reward at step 860000: 125.30181694423945


[32mProgress:  43%|█████████████████▊                       |  ETA: 0:19:57[39m[K

test reward at step 870000: 135.66161941226434


[32mProgress:  44%|██████████████████                       |  ETA: 0:19:44[39m[K

test reward at step 880000: 123.29809950371948


[32mProgress:  44%|██████████████████▎                      |  ETA: 0:19:35[39m[K

test reward at step 890000: -30.22159592578809


[32mProgress:  45%|██████████████████▍                      |  ETA: 0:19:30[39m[K

parameters at step 900000 saved to ./RL_models_fast/vtol_2D_ppo_900000.bson
test reward at step 900000: 107.17719553749292


[32mProgress:  45%|██████████████████▋                      |  ETA: 0:19:17[39m[K

test reward at step 910000: 92.59302416398275


[32mProgress:  46%|██████████████████▉                      |  ETA: 0:19:07[39m[K

test reward at step 920000: 121.29775132137163


[32mProgress:  46%|███████████████████                      |  ETA: 0:18:55[39m[K

test reward at step 930000: 72.06715706391432


[32mProgress:  47%|███████████████████▎                     |  ETA: 0:18:42[39m[K

test reward at step 940000: 96.16945160026052


[32mProgress:  47%|███████████████████▌                     |  ETA: 0:18:28[39m[K

test reward at step 950000: 106.70737476652045


[32mProgress:  48%|███████████████████▋                     |  ETA: 0:18:16[39m[K

test reward at step 960000: 90.1491073391054


[32mProgress:  48%|███████████████████▉                     |  ETA: 0:18:06[39m[K

test reward at step 970000: 108.48178309479407


[32mProgress:  49%|████████████████████                     |  ETA: 0:17:55[39m[K

test reward at step 980000: 93.19794580985469


[32mProgress:  49%|████████████████████▎                    |  ETA: 0:17:42[39m[K

test reward at step 990000: 99.35730586228343


[32mProgress:  50%|████████████████████▌                    |  ETA: 0:17:30[39m[K

parameters at step 1000000 saved to ./RL_models_fast/vtol_2D_ppo_1000000.bson
test reward at step 1000000: 94.76697907628295


[32mProgress:  50%|████████████████████▋                    |  ETA: 0:17:19[39m[K

test reward at step 1010000: 113.45429024416963


[32mProgress:  51%|████████████████████▉                    |  ETA: 0:17:09[39m[K

test reward at step 1020000: 123.86744665939746


[32mProgress:  52%|█████████████████████▏                   |  ETA: 0:17:05[39m[K

test reward at step 1030000: -221.3367874731361


[32mProgress:  52%|█████████████████████▎                   |  ETA: 0:16:52[39m[K

test reward at step 1040000: 108.56290115924538


[32mProgress:  52%|█████████████████████▌                   |  ETA: 0:16:41[39m[K

test reward at step 1050000: 79.32244428708708


[32mProgress:  53%|█████████████████████▊                   |  ETA: 0:16:30[39m[K

test reward at step 1060000: 103.99589351919788


[32mProgress:  53%|█████████████████████▉                   |  ETA: 0:16:18[39m[K

test reward at step 1070000: 100.41111559619966


[32mProgress:  54%|██████████████████████▏                  |  ETA: 0:16:05[39m[K

test reward at step 1080000: 104.94200854138053


[32mProgress:  54%|██████████████████████▎                  |  ETA: 0:15:55[39m[K

test reward at step 1090000: 87.09941447416608


[32mProgress:  55%|██████████████████████▌                  |  ETA: 0:15:45[39m[K

parameters at step 1100000 saved to ./RL_models_fast/vtol_2D_ppo_1100000.bson


[32mProgress:  55%|██████████████████████▌                  |  ETA: 0:15:45[39m[K

test reward at step 1100000: 106.32070416694054


[32mProgress:  55%|██████████████████████▊                  |  ETA: 0:15:32[39m[K

test reward at step 1110000: 119.76649673745544


[32mProgress:  56%|██████████████████████▉                  |  ETA: 0:15:23[39m[K

test reward at step 1120000: 127.57452035316517


[32mProgress:  56%|███████████████████████▏                 |  ETA: 0:15:10[39m[K

test reward at step 1130000: 131.50140145848988


[32mProgress:  57%|███████████████████████▍                 |  ETA: 0:15:01[39m[K

test reward at step 1140000: 129.04222562241173


[32mProgress:  57%|███████████████████████▌                 |  ETA: 0:14:50[39m[K

test reward at step 1150000: 118.6095135565034


[32mProgress:  58%|███████████████████████▊                 |  ETA: 0:14:39[39m[K

test reward at step 1160000: 121.57811726128756


[32mProgress:  58%|████████████████████████                 |  ETA: 0:14:29[39m[K

test reward at step 1170000: 101.4974351705707


[32mProgress:  59%|████████████████████████▏                |  ETA: 0:14:18[39m[K

test reward at step 1180000: 114.24211598688085


[32mProgress:  60%|████████████████████████▍                |  ETA: 0:14:06[39m[K

test reward at step 1190000: 109.24330885904732


[32mProgress:  60%|████████████████████████▋                |  ETA: 0:13:55[39m[K

parameters at step 1200000 saved to ./RL_models_fast/vtol_2D_ppo_1200000.bson
test reward at step 1200000: 118.3496467177548


[32mProgress:  60%|████████████████████████▊                |  ETA: 0:13:43[39m[K

test reward at step 1210000: 108.51711504175154


[32mProgress:  61%|█████████████████████████                |  ETA: 0:13:32[39m[K

test reward at step 1220000: 91.0255942419634


[32mProgress:  61%|█████████████████████████▎               |  ETA: 0:13:22[39m[K

test reward at step 1230000: 117.73113829317094


[32mProgress:  62%|█████████████████████████▍               |  ETA: 0:13:13[39m[K

test reward at step 1240000: 123.62145479527571


[32mProgress:  62%|█████████████████████████▋               |  ETA: 0:12:59[39m[K

test reward at step 1250000: 115.38230686748288


[32mProgress:  63%|█████████████████████████▊               |  ETA: 0:12:49[39m[K

test reward at step 1260000: 106.70067457249274


[32mProgress:  63%|██████████████████████████               |  ETA: 0:12:38[39m[K

test reward at step 1270000: 75.29852234349111


[32mProgress:  64%|██████████████████████████▎              |  ETA: 0:12:29[39m[K

test reward at step 1280000: 130.40244185722577


[32mProgress:  65%|██████████████████████████▌              |  ETA: 0:12:19[39m[K

test reward at step 1290000: 123.10246176204355


[32mProgress:  65%|██████████████████████████▋              |  ETA: 0:12:07[39m[K

parameters at step 1300000 saved to ./RL_models_fast/vtol_2D_ppo_1300000.bson
test reward at step 1300000: 100.43005070039322


[32mProgress:  65%|██████████████████████████▉              |  ETA: 0:11:57[39m[K

test reward at step 1310000: 83.11752720457937


[32mProgress:  66%|███████████████████████████              |  ETA: 0:11:46[39m[K

test reward at step 1320000: 82.14616305655977


[32mProgress:  66%|███████████████████████████▎             |  ETA: 0:11:36[39m[K

test reward at step 1330000: 93.81752726443861


[32mProgress:  67%|███████████████████████████▌             |  ETA: 0:11:25[39m[K

test reward at step 1340000: 121.3202023943622


[32mProgress:  67%|███████████████████████████▋             |  ETA: 0:11:15[39m[K

test reward at step 1350000: 87.19359354303718


[32mProgress:  68%|███████████████████████████▉             |  ETA: 0:11:04[39m[K

test reward at step 1360000: 120.02791928578169


[32mProgress:  68%|████████████████████████████             |  ETA: 0:10:55[39m[K

test reward at step 1370000: 99.96574670690987


[32mProgress:  69%|████████████████████████████▎            |  ETA: 0:10:44[39m[K

test reward at step 1380000: 95.82876896519089


[32mProgress:  69%|████████████████████████████▌            |  ETA: 0:10:33[39m[K

test reward at step 1390000: 133.30809963365618


[32mProgress:  70%|████████████████████████████▋            |  ETA: 0:10:23[39m[K

parameters at step 1400000 saved to ./RL_models_fast/vtol_2D_ppo_1400000.bson
test reward at step 1400000: 115.82207490217087


[32mProgress:  70%|████████████████████████████▉            |  ETA: 0:10:12[39m[K

test reward at step 1410000: 61.636504877425295


[32mProgress:  71%|█████████████████████████████▏           |  ETA: 0:10:02[39m[K

test reward at step 1420000: 127.0589142534098


[32mProgress:  71%|█████████████████████████████▎           |  ETA: 0:09:51[39m[K

test reward at step 1430000: 105.31114967361536


[32mProgress:  72%|█████████████████████████████▌           |  ETA: 0:09:42[39m[K

test reward at step 1440000: 70.82706550159615


[32mProgress:  72%|█████████████████████████████▊           |  ETA: 0:09:31[39m[K

test reward at step 1450000: 115.87755226772396


[32mProgress:  73%|█████████████████████████████▉           |  ETA: 0:09:20[39m[K

test reward at step 1460000: 108.80496789528443


[32mProgress:  73%|██████████████████████████████▏          |  ETA: 0:09:09[39m[K

test reward at step 1470000: 108.91423769543115


[32mProgress:  74%|██████████████████████████████▍          |  ETA: 0:09:00[39m[K

test reward at step 1480000: 95.55529061945839


[32mProgress:  74%|██████████████████████████████▌          |  ETA: 0:08:50[39m[K

test reward at step 1490000: 121.66902314485817


[32mProgress:  75%|██████████████████████████████▊          |  ETA: 0:08:38[39m[K

parameters at step 1500000 saved to ./RL_models_fast/vtol_2D_ppo_1500000.bson
test reward at step 1500000: 116.98723344482866


[32mProgress:  76%|███████████████████████████████          |  ETA: 0:08:29[39m[K

test reward at step 1510000: 20.112060367964546


[32mProgress:  76%|███████████████████████████████▏         |  ETA: 0:08:17[39m[K

test reward at step 1520000: 118.97053698018982


[32mProgress:  76%|███████████████████████████████▍         |  ETA: 0:08:07[39m[K

test reward at step 1530000: 119.69984812009542


[32mProgress:  77%|███████████████████████████████▌         |  ETA: 0:07:57[39m[K

test reward at step 1540000: 108.97933839059796


[32mProgress:  77%|███████████████████████████████▊         |  ETA: 0:07:47[39m[K

test reward at step 1550000: 113.12984724667903


[32mProgress:  78%|████████████████████████████████         |  ETA: 0:07:35[39m[K

test reward at step 1560000: 112.05277986819385


[32mProgress:  78%|████████████████████████████████▏        |  ETA: 0:07:25[39m[K

test reward at step 1570000: 85.57313375649326


[32mProgress:  79%|████████████████████████████████▍        |  ETA: 0:07:15[39m[K

test reward at step 1580000: 94.55304076349981


[32mProgress:  79%|████████████████████████████████▋        |  ETA: 0:07:05[39m[K

test reward at step 1590000: 100.24319148788996


[32mProgress:  80%|████████████████████████████████▊        |  ETA: 0:06:55[39m[K

parameters at step 1600000 saved to ./RL_models_fast/vtol_2D_ppo_1600000.bson


[32mProgress:  80%|████████████████████████████████▊        |  ETA: 0:06:54[39m[K

test reward at step 1600000: 112.82778832928858


[32mProgress:  80%|█████████████████████████████████        |  ETA: 0:06:44[39m[K

test reward at step 1610000: 120.74248790495352


[32mProgress:  81%|█████████████████████████████████▎       |  ETA: 0:06:33[39m[K

test reward at step 1620000: 107.80069697063996


[32mProgress:  81%|█████████████████████████████████▍       |  ETA: 0:06:22[39m[K

test reward at step 1630000: 90.69757009897134


[32mProgress:  82%|█████████████████████████████████▋       |  ETA: 0:06:13[39m[K

test reward at step 1640000: 126.04490949227284


[32mProgress:  82%|█████████████████████████████████▉       |  ETA: 0:06:01[39m[K

test reward at step 1650000: 72.63088037107642


[32mProgress:  83%|██████████████████████████████████       |  ETA: 0:05:51[39m[K

test reward at step 1660000: 131.9267663293801


[32mProgress:  83%|██████████████████████████████████▎      |  ETA: 0:05:41[39m[K

test reward at step 1670000: 102.32275813402092


[32mProgress:  84%|██████████████████████████████████▍      |  ETA: 0:05:32[39m[K

test reward at step 1680000: 128.5779882549977


[32mProgress:  84%|██████████████████████████████████▋      |  ETA: 0:05:20[39m[K

test reward at step 1690000: 65.68468172753407


[32mProgress:  85%|██████████████████████████████████▉      |  ETA: 0:05:11[39m[K

parameters at step 1700000 saved to ./RL_models_fast/vtol_2D_ppo_1700000.bson
test reward at step 1700000: 132.2528259390977


[32mProgress:  85%|███████████████████████████████████      |  ETA: 0:05:00[39m[K

test reward at step 1710000: 120.4009106456948


[32mProgress:  86%|███████████████████████████████████▎     |  ETA: 0:04:50[39m[K

test reward at step 1720000: 117.66832726414417


[32mProgress:  86%|███████████████████████████████████▌     |  ETA: 0:04:39[39m[K

test reward at step 1730000: 120.37953389259556


[32mProgress:  87%|███████████████████████████████████▋     |  ETA: 0:04:29[39m[K

test reward at step 1740000: 103.87038634711246


[32mProgress:  87%|███████████████████████████████████▉     |  ETA: 0:04:20[39m[K

test reward at step 1750000: 103.61329229274007


[32mProgress:  88%|████████████████████████████████████▏    |  ETA: 0:04:09[39m[K

test reward at step 1760000: 117.74186987433835


[32mProgress:  88%|████████████████████████████████████▎    |  ETA: 0:03:58[39m[K

test reward at step 1770000: 109.46714862103715


[32mProgress:  89%|████████████████████████████████████▌    |  ETA: 0:03:48[39m[K

test reward at step 1780000: 131.0485864218125


[32mProgress:  89%|████████████████████████████████████▋    |  ETA: 0:03:38[39m[K

test reward at step 1790000: 124.6856323886459


[32mProgress:  90%|████████████████████████████████████▉    |  ETA: 0:03:28[39m[K

parameters at step 1800000 saved to ./RL_models_fast/vtol_2D_ppo_1800000.bson
test reward at step 1800000: 70.24373462823901


[32mProgress:  90%|█████████████████████████████████████▏   |  ETA: 0:03:18[39m[K

test reward at step 1810000: 133.82031680767645


[32mProgress:  91%|█████████████████████████████████████▎   |  ETA: 0:03:06[39m[K

test reward at step 1820000: 111.21612919690537


[32mProgress:  91%|█████████████████████████████████████▌   |  ETA: 0:02:57[39m[K

test reward at step 1830000: 75.05354713326652


[32mProgress:  92%|█████████████████████████████████████▋   |  ETA: 0:02:47[39m[K

test reward at step 1840000: 122.88455948227309


[32mProgress:  92%|█████████████████████████████████████▉   |  ETA: 0:02:35[39m[K

test reward at step 1850000: 100.4513015440927


[32mProgress:  93%|██████████████████████████████████████▏  |  ETA: 0:02:26[39m[K

test reward at step 1860000: 107.4770344387765


[32mProgress:  93%|██████████████████████████████████████▎  |  ETA: 0:02:16[39m[K

test reward at step 1870000: 114.53142575498734


[32mProgress:  94%|██████████████████████████████████████▌  |  ETA: 0:02:06[39m[K

test reward at step 1880000: 113.80211067828743


[32mProgress:  94%|██████████████████████████████████████▊  |  ETA: 0:01:55[39m[K

test reward at step 1890000: 122.08488621967972


[32mProgress:  95%|███████████████████████████████████████  |  ETA: 0:01:43[39m[K

parameters at step 1900000 saved to ./RL_models_fast/vtol_2D_ppo_1900000.bson
test reward at step 1900000: 13.827332216010351


[32mProgress:  95%|███████████████████████████████████████▏ |  ETA: 0:01:34[39m[K

test reward at step 1910000: 84.32728086415838


[32mProgress:  96%|███████████████████████████████████████▍ |  ETA: 0:01:24[39m[K

test reward at step 1920000: 110.95688807858161


[32mProgress:  97%|███████████████████████████████████████▋ |  ETA: 0:01:12[39m[K

test reward at step 1930000: 103.85799870001637


[32mProgress:  97%|███████████████████████████████████████▊ |  ETA: 0:01:02[39m[K

test reward at step 1940000: 118.14238399849178


[32mProgress:  97%|████████████████████████████████████████ |  ETA: 0:00:53[39m[K

test reward at step 1950000: 122.57995068634682


[32mProgress:  98%|████████████████████████████████████████▏|  ETA: 0:00:41[39m[K

test reward at step 1960000: 95.26209660125623


[32mProgress:  99%|████████████████████████████████████████▍|  ETA: 0:00:31[39m[K

test reward at step 1970000: -19.52344091128349


[32mProgress:  99%|████████████████████████████████████████▋|  ETA: 0:00:21[39m[K

test reward at step 1980000: 122.98445674374081


[32mProgress:  99%|████████████████████████████████████████▊|  ETA: 0:00:11[39m[K

test reward at step 1990000: 98.90788902408799


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:34:29[39m[K


parameters at step 2000000 saved to ./RL_models_fast/vtol_2D_ppo_2000000.bson
test reward at step 2000000: 116.3734651135977


In [25]:
plot([1:171]*10000,episode_test_reward_hook.rewards, xlabel="Steps", ylabel="Reward", legend = false, xformatter = :scientific)
savefig("Reward_fast.png")

BoundsError: BoundsError: attempt to access 171-element StepRangeLen{Int64, Int64, Int64, Int64} at index [1:200]

In [26]:
close_visualization(); # closes the MeshCat visualization