# Init Bionic VTOL

In [1]:
include("../Flyonic.jl");
using .Flyonic;

using Rotations; # used for initial position

using ReinforcementLearning;
using StableRNGs;
using Flux;
using Flux.Losses;
using Random;
using IntervalSets;
using LinearAlgebra;
using Distributions;

using Plots;
using Statistics;

using BSON: @save, @load # save mode

In [2]:
calculateAngle([1.0 ,0, 0], [0.0 ,1.0, 0.0])

1.5707963267948966

In [3]:
create_visualization();

┌ Info: MeshCat server started. You can open the visualizer by visiting the following URL in your browser:
│ http://127.0.0.1:8700
└ @ MeshCat /Users/leonardoigler/.julia/packages/MeshCat/Ax8pH/src/visualizer.jl:73


In [4]:
# indicates how many threads Julia was started with. This is important for the multi-threaded environment
Threads.nthreads()

1

# Create Reinforcement Learning Environment

In [5]:
mutable struct VtolEnv{A,T,ACT,R<:AbstractRNG} <: AbstractEnv # Parametric Constructor for a subtype of AbstractEnv
    action_space::A
    observation_space::Space{Vector{ClosedInterval{T}}}
    state::Vector{T}
    action::ACT
    done::Bool
    t::T
    rng::R

    name::String #for multible environoments
    
    # Everything you need aditionaly can also go in here.
    x_W::Vector{T}
    v_B::Vector{T}
    R_W::Matrix{T}
    ω_B::Vector{T}
    wind_W::Vector{T}
    Δt::T
    # Bonus
    x_d_W::Vector{T}
    angle_d_W::T
end

In [6]:
# define a keyword-based constructor for the type declared in the mutable struct typedef. 
# It could also be done with the macro Base.@kwdef.
function VtolEnv(;
     
    #continuous = true,
    rng = Random.GLOBAL_RNG, # Random number generation
    name = "vtol",
    kwargs... # let the function take an arbitrary number of keyword arguments 
)
    
    T = Float64; # explicit type which is used e.g. in state. Cannot be altered due to the poor matrix defininon.

    #action_space = Base.OneTo(21) # 21 discrete positions for the flaps
    
    action_space = Space(
        ClosedInterval{T}[
            0.0..2.0, # propeller 1
            0.0..2.0, # propeller 2
            ], 
    )

    
    state_space = Space( # Three continuous values in state space.
        ClosedInterval{T}[
            # TODO: Maybe add rotation velovity around z-axis...
            typemin(T)..typemax(T), # world position along x
            typemin(T)..typemax(T), # world position along y
            typemin(T)..typemax(T), # rotation arround z
            #########
            #typemin(T)..typemax(T), # GARBAGE
            #########
            ], 
    )

    # Calculate random vector & angle

    #################################
    
    create_VTOL(name, actuators = true, color_vec=[1.0; 1.0; 0.6; 1.0]);
    # TODO Why exactly give a matrix as input + Output is not quaternion...
    #set_transform(name, [0.0; 0.0; 0.0] ,QuatRotation(UnitQuaternion(RotY(-pi/2.0)*RotX(pi))));
    set_transform(name, [0.0; 0.0; 0.0] ,QuatRotation(UnitQuaternion(RotX(pi))));
    set_actuators(name, [0.0; 0.0])#; 0.0; 0.0]) 
    # TODO: Set desired position

    environment = VtolEnv(
        action_space,
        state_space,
        zeros(T, 3), # current state, needs to be extended.
        rand(action_space),
        false, # episode done ?
        0.0, # time
        rng, # random number generator  
        name,
        zeros(T, 3), # x_W
        zeros(T, 3), # v_B
        Matrix(UnitQuaternion((RotX(pi)))),
        #Matrix(UnitQuaternion(RotY(-pi/2.0)*RotX(pi))), # Float64... so T needs to be Float64
        zeros(T, 3), # ω_B
        zeros(T, 3), # wind_W
        T(0.025), # Δt 
        # TODO Random
        [1.0, 1.0, 0.0], # desired distance 
        calculateAngle([1.0 ,0.0, 0.0], [1.0, 1.0, 0.0]), # desired angle
    )
    
    
    reset!(environment)
    
    return environment
    
end;

In [7]:
# TODO Don't get that part. Ask next meeting
print(RotY(-pi/2.0)*RotX(pi))
print(UnitQuaternion(RotY(-pi/2.0)*RotX(pi)))
print(QuatRotation(UnitQuaternion(RotY(-pi/2.0)*RotX(pi))))

[6.123233995736766e-17 -1.2246467991473532e-16 1.0; 0.0 -1.0 -1.2246467991473532e-16; 1.0 7.498798913309288e-33 -6.123233995736766e-17]

[0.0 -1.224646799147353e-16 0.9999999999999998; 0.0 -0.9999999999999998 -1.224646799147353e-16; 0.9999999999999998 0.0 0.0][0.0 -1.224646799147353e-16 0.9999999999999998; 0.0 -0.9999999999999998 -1.224646799147353e-16; 0.9999999999999998 0.0 0.0]

Just for explanation:

1. A mutable Struct is created. A struct is a constructor and a constructor is a function that creates new objects.
2. A outer keyword-based constructor method is added for the type declared in the mutable struct typedef before.

So now we have a function with two methods. Julia will decide which method to call by multiple dispatch.

In [8]:
methods(VtolEnv)

# Define the RL interface

In [9]:
Random.seed!(env::VtolEnv, seed) = Random.seed!(env.rng, seed)
RLBase.action_space(env::VtolEnv) = env.action_space
RLBase.state_space(env::VtolEnv) = env.observation_space
RLBase.is_terminated(env::VtolEnv) = env.done
RLBase.state(env::VtolEnv) = env.state

In [10]:
a = [1,0,0]
b = [-1,1,0]
acos(clamp(a⋅b/(norm(a)*norm(b)), -1, 1))

2.356194490192345

In [11]:
function computeReward(env::VtolEnv{A,T}) where {A,T}
    
    stay_alive = 3.0

    distance_goal = norm(env.x_d_W-[env.state[1], env.state[2], 0])*100.0

    difference_angle = abs(env.state[3]-env.angle_d_W)*100.0
    

    # TODO Save last position or last projection somewhere (env.last) --> Compare 
    # to current project along line

    #not_upright_orientation = abs(env.state[1]-pi*0.5)*10.0
    #not_centered_position = abs(env.state[2])*10.0
    #hight = env.state[4]*100.0
    
    #return stay_alive - not_upright_orientation - not_centered_position #+ hight
    return stay_alive - distance_goal - difference_angle
end


RLBase.reward(env::VtolEnv{A,T}) where {A,T} = computeReward(env)

In [12]:
function RLBase.reset!(env::VtolEnv{A,T}) where {A,T}
    
    # Visualize initial state
    set_transform(env.name, env.x_W,QuatRotation(env.R_W));
    set_actuators(env.name, [0.0; 0.0])#; 0.0; 0.0])
    
    env.x_W = [0.0; 0.0; 0.0];
    env.v_B = [0.0; 0.0; 0.0];
    #env.R_W = Matrix(UnitQuaternion(RotY(-pi/2.0)*RotX(pi)));
    env.R_W = Matrix(UnitQuaternion(RotX(pi)));
    env.ω_B = [0.0; 0.0; 0.0];
    env.wind_W = [0.0; 0.0; 0.0];


    env.x_d_W = [1.0, 1.0, 0.0] # desired distance 
    env.angle_d_W = calculateAngle([1.0 ,0.0, 0.0], env.x_d_W) # desired angle
    
    # TODO: Check why NaN
    env.state = [env.x_W[1]; env.x_W[2]; Rotations.params(RotYXZ(env.R_W))[3]]
    env.t = 0.0
    env.action = [0.0, 0.0]
    env.done = false
    nothing
end;

In [13]:
R_W = Matrix(UnitQuaternion(RotX(pi)))
Rotations.params(RotYXZ(R_W))


3-element StaticArraysCore.SVector{3, Float64} with indices SOneTo(3):
 3.141592653589793
 1.2246467991473532e-16
 3.141592653589793

In [14]:
# defines a methods for a callable object.
# So when a VtolEnv object is created, it has this method that can be called
function (env::VtolEnv)(a)

    # set the propeller trust and the two flaps 2D case
    next_action = [a[1], a[2]]
   
    _step!(env, next_action)
end

In [15]:
env = VtolEnv()

# VtolEnv

## Traits

| Trait Type        |                  Value |
|:----------------- | ----------------------:|
| NumAgentStyle     |          SingleAgent() |
| DynamicStyle      |           Sequential() |
| InformationStyle  | ImperfectInformation() |
| ChanceStyle       |           Stochastic() |
| RewardStyle       |           StepReward() |
| UtilityStyle      |           GeneralSum() |
| ActionStyle       |     MinimalActionSet() |
| StateStyle        |     Observation{Any}() |
| DefaultStateStyle |     Observation{Any}() |

## Is Environment Terminated?

No

## State Space

`Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])`

## Action Space

`Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[0.0..2.0, 0.0..2.0])`

## Current State

```
[0.0, 0.0, 3.141592653589793]
```


In [16]:
methods(env) # Just to explain which methods the object has

In [17]:
function _step!(env::VtolEnv, next_action)
        
    # caluclate wind impact
    v_in_wind_B = vtol_add_wind(env.v_B, env.R_W, env.wind_W)
    # caluclate aerodynamic forces
    torque_B, force_B = vtol_model(v_in_wind_B, next_action, eth_vtol_param);
    # integrate rigid body dynamics for Δt
    env.x_W, env.v_B, env.R_W, env.ω_B, time = rigid_body_simple(torque_B, force_B, env.x_W, env.v_B, env.R_W, env.ω_B, env.t, env.Δt, eth_vtol_param)


    # Visualize the new state 
    # TODO: Can be removed for real trainings
    set_transform(env.name, env.x_W, QuatRotation(env.R_W));
    set_actuators(env.name, next_action)
 
    env.t += env.Δt
    
    # State space
    rot = Rotations.params(RotYXZ(env.R_W))[3]
    env.state[1] = env.x_W[1] # world position in x
    env.state[2] = env.ω_B[2] # world position in y
    env.state[3] = rot # rotation around z    
    
    # Termination criteria
    env.done = #true

        # TODO Zu lang
        # TODO Zu weit weg
        # TODO Ziel erreicht

        # After time... How fast is drone+Range of desired point
        # After reaching position (circle of r_tol)

        #norm(env.v_B) > 2.0 || # stop if body is too fast
        #env.x_W[2] < -1.0 || # stop if body is below -1m
        #0.0 > rot || # Stop if the drone is pitched 90°.
        #rot > pi || # Stop if the drone is pitched 90°.
        env.t > 10 # stop after 10s
    nothing
end;

In [18]:
RLBase.test_runnable!(env)

random policy with VtolEnv: 

[91m[1mTest Failed[22m[39m at 

[39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [14.58722547660592, -1.21919822176656e230, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:


 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[

[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vecto

[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..I

[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][

[90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Res

[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [in

random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, F

[39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m

s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[3

random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, F

[39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m

s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[3


 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[

[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: 

[0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}

[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m


random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, F

[39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m

s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[3


 

[1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22

[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: 

[0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}

::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/

 [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runna

[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vect

random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, F

[39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m

random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, F

[39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m

s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[3

[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m 

, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/R

random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [-5.104359995692889, -1.4201859506962243e185, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{

[91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Ran

random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, F

[39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m

s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[3

[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m 

, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/R

[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mT

random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, F

[39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m

[4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLear

[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m 

, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/R

random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [9.985783404866831, -2.649883659647983e204, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Ve

[0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/R

[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[

[0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearning

[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[

[0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearning

[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m 

, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/R

[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mT


 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Flo

[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m::[0mInt64; [90mrng[39m::[0mRandom._GLOBAL_RNG[0m[1m)[22m
[90m   @ [39m[35mReinforcementLearningBase[39m [90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:267[24m[39m
random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[

random policy with VtolEnv: [91m[1mTest Failed[22m[39m at [39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, F

[39m[1m/Users/leonardoigler/.julia/packages/ReinforcementLearningBase/E7jI5/src/base.jl:280[22m
  Expression: s in S
   Evaluated: [NaN, NaN, NaN] in Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf])
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:464[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/.julia/packages/ReinforcementLearningBase/E7jI5/src/[39m[90m[4mbase.jl:280[24m[39m[90m [inlined][39m
 [3] [0m[1mmacro expansion[22m
[90m   @ [39m[90m/Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/[39m[90m[4mTest.jl:1357[24m[39m[90m [inlined][39m
 [4] [0m[1mtest_runnable![22m[0m[1m([22m[90menv[39m::[0mVtolEnv[90m{Space{Vector{ClosedInterval{Float64}}}, Float64, Vector{Float64}, Random._GLOBAL_RNG}[39m, [90mn[39m

Test.TestSetException: Some tests did not pass: 1627 passed, 373 failed, 0 errored, 0 broken.

Show an overview of the environment.

# Setup of a reinforcement learning experiment.

In [19]:
seed = 123    
rng = StableRNG(seed)
    N_ENV = 8
    UPDATE_FREQ = 1024
    
    
    # define multiple environments for parallel training
    env = MultiThreadEnv([
        # use different names for the visualization
        VtolEnv(; rng = StableRNG(hash(seed+i)), name = "vtol$i") for i in 1:N_ENV
    ])

MultiThreadEnv(8 x VtolEnv)

In [20]:
# Define the function approximator
    ns, na = length(state(env[1])), length(action_space(env[1]))
    approximator = ActorCritic(
                actor = GaussianNetwork(
                    pre = Chain(
                    Dense(ns, 16, relu; initW = glorot_uniform(rng)),#
                    Dense(16, 16, relu; initW = glorot_uniform(rng)),
                    ),
                    μ = Chain(Dense(16, na; initW = glorot_uniform(rng))),
                    logσ = Chain(Dense(16, na; initW = glorot_uniform(rng))),
                ),
                critic = Chain(
                    Dense(ns, 16, relu; initW = glorot_uniform(rng)),
                    Dense(16, 16, relu; initW = glorot_uniform(rng)),
                    Dense(16, 1; initW = glorot_uniform(rng)),
                ),
                optimizer = ADAM(1e-3),
            );

In [21]:
    agent = Agent( # A wrapper of an AbstractPolicy
        # AbstractPolicy: the policy to use
        policy = PPOPolicy(;
                    approximator = approximator |> gpu,
                    update_freq=UPDATE_FREQ,
                    dist = Normal,
                    # For parameters visit the docu: https://juliareinforcementlearning.org/docs/rlzoo/#ReinforcementLearningZoo.PPOPolicy
                    ),
        
        # AbstractTrajectory: used to store transitions between an agent and an environment source
        trajectory = PPOTrajectory(;
            capacity = UPDATE_FREQ,
            state = Matrix{Float64} => (ns, N_ENV),
            action = Matrix{Float64} => (na, N_ENV),
            action_log_prob = Vector{Float64} => (N_ENV,),
            reward = Vector{Float64} => (N_ENV,),
            terminal = Vector{Bool} => (N_ENV,),
        ),
    )


┌ Info: The GPU function is being called but the GPU is not accessible. 
│ Defaulting back to the CPU. (No action is required if you want to run on the CPU).
└ @ Flux /Users/leonardoigler/.julia/packages/Flux/7nTyc/src/functor.jl:187


typename(Agent)
├─ policy => typename(PPOPolicy)
│  ├─ approximator => typename(ActorCritic)
│  │  ├─ actor => typename(GaussianNetwork)
│  │  │  ├─ pre => typename(Chain)
│  │  │  │  └─ layers
│  │  │  │     ├─ 1
│  │  │  │     │  └─ typename(Dense)
│  │  │  │     │     ├─ weight => 16×3 Matrix{Float32}
│  │  │  │     │     ├─ bias => 16-element Vector{Float32}
│  │  │  │     │     └─ σ => typename(typeof(relu))
│  │  │  │     └─ 2
│  │  │  │        └─ typename(Dense)
│  │  │  │           ├─ weight => 16×16 Matrix{Float32}
│  │  │  │           ├─ bias => 16-element Vector{Float32}
│  │  │  │           └─ σ => typename(typeof(relu))
│  │  │  ├─ μ => typename(Chain)
│  │  │  │  └─ layers
│  │  │  │     └─ 1
│  │  │  │        └─ typename(Dense)
│  │  │  │           ├─ weight => 2×16 Matrix{Float32}
│  │  │  │           ├─ bias => 2-element Vector{Float32}
│  │  │  │           └─ σ => typename(typeof(identity))
│  │  │  ├─ logσ => typename(Chain)
│  │  │  │  └─ layers
│  │  │  │     └─ 1


In [22]:
function saveModel(t, agent, env)
    model = cpu(agent.policy.approximator)   
    f = joinpath("./RL_models/", "vtol_ppo_2_$t.bson")
    @save f model
    println("parameters at step $t saved to $f")
end

saveModel (generic function with 1 method)

In [23]:
function loadModel()
    f = joinpath("./RL_models/", "vtol_ppo_2_9320000.bson")
    @load f model
    return model
end

loadModel (generic function with 1 method)

In [24]:
#agent.policy.approximator = loadModel();

In [25]:
run(
           agent,
           env,
           StopAfterStep(100_000),
           DoEveryNStep(saveModel, n=40_000)
       )

[32mProgress:   0%|                                         |  ETA: 1 days, 20:34:18[39m[K

ArgumentError: ArgumentError: Normal: the condition σ >= zero(σ) is not satisfied.

In [26]:
close_visualization(); # closes the MeshCat visualization