In [1]:
using Pkg
if isfile("Project.toml") && isfile("Manifest.toml")
    Pkg.activate(".")
end

# Init Bionic VTOL

In [2]:
include("../Flyonic.jl");
using .Flyonic;

using Rotations; # used for initial position

using ReinforcementLearning;
using StableRNGs;
using Flux;
using Flux.Losses;
using Random;
using IntervalSets;
using LinearAlgebra;
using Distributions;

using Plots;
using Statistics;

using TensorBoardLogger
using Logging

using BSON: @save, @load # save mode

In [34]:
# TODO: set as desired
R_TOL = 0.5;
N_WAYPOINTS = 4; # including startpoint, >= 2
SLOW_MODE = true;
TRAINING = true;
WAYPOINTS = generate_trajectory(N_WAYPOINTS + 1)
# EVALUATION = true;
# VIDEO = false;

5-element Vector{Vector{Float64}}:
 [0.0, 0.0, 0.0]
 [-0.3633798472271099, -0.7025872365085171, 2.812468506047548]
 [-1.2786859581833756, -3.588701739494832, 4.1325260146927425]
 [0.15082554683236182, -5.66033683324271, 5.537634378972934]
 [1.2366869264048796, -4.104417979735828, 8.141000288055723]

In [35]:
create_remote_visualization();

[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mMeshCat server started. You can open the visualizer by visiting the following URL in your browser:
[36m[1m└ [22m[39mhttp://192.168.178.67:8702


In [36]:
# TensorBoard
logger = TBLogger("tensorboard_SAC", tb_increment)

TBLogger:
	- Log level     : Info
	- Current step  : 0
	- Output        : /home/larissa/Documents/Projects/ADLR/ADLR_project/src/RL_SAC_quadcopter_3D/tensorboard_SAC_1
	- open files    : 1


In [37]:
# indicates how many threads Julia was started with. This is important for the multi-threaded environment
Threads.nthreads()

1

In [38]:
Δt = 0.025;

# Create Reinforcement Learning Environment

In [39]:
mutable struct VtolEnv{A,T,ACT,R<:AbstractRNG} <: AbstractEnv # Parametric Constructor for a subtype of AbstractEnv
    action_space::A # All possible actions the agent can take
    observation_space::Space{Vector{ClosedInterval{T}}} # All possible states that the agent can observe.
    state::Vector{T} # Current state
    action::ACT # next action the agent wants to apply in the environment.
    done::Bool # shows whether a terminal condition has been reached.
    t::T # time
    rng::R # random number generator

    name::String # for different naming of multible environoments
    visualization::Bool # activate visualisation (Faster computation without visualisation)
    realtime::Bool # visualization in "real-world" time (only for watching or filming).
    
    # Overall state of the environment. This does not correspond to the observation space of the agent but contains all states that describe the environment.
    x_W::Vector{T} # Position in World frame
    v_B::Vector{T} # Velocity in Body frame
    R_W::Matrix{T} # Rotation (matrix) in World frame
    ω_B::Vector{T} # Rotation velocity in Body frame
    wind_W::Vector{T} # Externel linear velocity acting on the drone
    Δt::T # Time step for physics simulation in seconds

    ###NEW###
    # Current Bonus / Target
    num_waypoints::Int # includig start point
    waypoints::Vector{Vector{T}}
    reached_goal::BitVector
    
    norm_way::T
    progress::T
    progress_prev::T
    current_point::Int
    reached_goal_in_step::Bool
    
    r_tol::T
    projected_position::Vector{T}

    slow_mode::Bool
    ######
end

In [40]:
# define a keyword-based constructor for the type declared in the mutable struct typedef. 
# It could also be done with the macro Base.@kwdef.
function VtolEnv(;
    rng = Random.GLOBAL_RNG, # Random number generation
    name = "Crazyflie",
    visualization = false,
    realtime = false,
    kwargs... # let the function take an arbitrary number of keyword arguments
)
    
    T = Float64; # explicit type which is used e.g. in state. Cannot be altered due to the poor matrix defininon.
    
    # final PWM Values for Crazyflie. The interval definition has no effect in the current implementation.
    action_space = Space(
        ClosedInterval{T}[
            0..1#0..65535, # motor 1
            0..1#0..65535, # motor 2
            0..1#0..65535, # motor 3
            0..1#0..65535, # motor 4
            ], 
    )

    
    state_space = Space( # Three continuous values in state space.
        ClosedInterval{T}[#todo
            typemin(T)..typemax(T), # 1 x
            typemin(T)..typemax(T), # 2 y
            typemin(T)..typemax(T), # 3 z

            typemin(T)..typemax(T), # 4  World Vector UP x
            typemin(T)..typemax(T), # 5  World Vector UP y
            typemin(T)..typemax(T), # 6  World Vector UP z

            typemin(T)..typemax(T), # 7  World Vector FRONT x
            typemin(T)..typemax(T), # 8  World Vector FRONT y
            typemin(T)..typemax(T), # 9  World Vector FRONT z
            
            typemin(T)..typemax(T), # 10 Body velocity along x
            typemin(T)..typemax(T), # 11 Body velocity along y
            typemin(T)..typemax(T), # 12 Body velocity along z
            
            typemin(T)..typemax(T), # 13 Body rotational velocity around x
            typemin(T)..typemax(T), # 14 Body rotational velocity around y
            typemin(T)..typemax(T), # 15 Body rotational velocity around z
            
            ###NEW###
            typemin(T)..typemax(T), # 16 position error along x (next gate - current position)
            typemin(T)..typemax(T), # 17 position error along y (next gate - current position)
            typemin(T)..typemax(T), # 18 position error along z (next gate - current position)
            
            typemin(T)..typemax(T), # 19 way to next next gate x (next next gate - next gate)
            typemin(T)..typemax(T), # 20 way to next next gate y (next next gate - next gate)
            typemin(T)..typemax(T), # 21 way to next next gate z (next next gate - next gate)
            ######
            ], 
    )

    ###NEW###
    num_waypoints = N_WAYPOINTS # number of waypoints, includig start point
    waypoints = WAYPOINTS#generate_trajectory(num_waypoints + 1) # trajectory with num_waypoints waypoints (+ start point), (with dummy points) 
    reached_goal = falses(num_waypoints)
    
    norm_way = 0.0 
    for i in 1:(num_waypoints - 1)
        norm_way += norm(waypoints[i] - waypoints[i + 1])
    end
    ######
    
    if visualization
        create_Crazyflie(name, actuators = true);
        visualize_waypoints(waypoints[1:num_waypoints], 0.05)

        set_Crazyflie_actuators(name, [0.0; 0.0; 0.0; 0.0]);
        set_transform(name, [0.0; 0.0; 0.0] ,one(QuatRotation));
        set_arrow(string(name, "vel"), color_vec=[0.0; 1.0; 0.0; 1.0]);
        transform_arrow(string(name, "vel"), [0.0; 0.0; 0.0], [0.0; 0.0; 1.0], max_head_radius=0.05)
#         set_arrow(string(name, "_vel_current"), color_vec=[1.0; 0.0; 0.0; 1.0]);
#         transform_arrow(string(name, "_vel_current"), [0.0; 0.0; 0.0], [0.0; 0.0; 1.0], max_head_radius=0.02)                  
    end
    


    environment = VtolEnv(
        action_space,
        state_space,
        zeros(T, length(state_space)), # current state, needs to be extended.
        [1.0; 1.0; 1.0; 1.0],#rand(action_space), #todo test with random
        false, # episode done ?
        0.0, # time
        rng, # random number generator  

        name,
        visualization,
        realtime,

        zeros(T, 3), # x_W
        zeros(T, 3), # v_B
        Matrix(one(QuatRotation)), # Float64... so T needs to be Float64
        zeros(T, 3), # ω_B
        zeros(T, 3), # wind_W
        Δt, # Δt

        num_waypoints, # includig start point
        waypoints, 
        reached_goal,

        norm_way, # norm_way
        0.0, # progress
        0.0, # progress_prev
        2, # current point
        false, # reached_goal_in_step
        
        R_TOL, # r_tol
        zeros(T, 3), # projected_position

        SLOW_MODE # slow_mode

        )
    
    
    RLBase.reset!(environment)
    
    return environment
    
end;

Just for explanation:

1. A mutable Struct is created. A struct is a constructor and a constructor is a function that creates new objects.
2. A outer keyword-based constructor method is added for the type declared in the mutable struct typedef before.

So now we have a function with two methods. Julia will decide which method to call by multiple dispatch.

# Define the RL interface

In [41]:
Random.seed!(env::VtolEnv, seed) = Random.seed!(env.rng, seed)
RLBase.action_space(env::VtolEnv) = env.action_space
RLBase.state_space(env::VtolEnv) = env.observation_space
RLBase.is_terminated(env::VtolEnv) = env.done
RLBase.state(env::VtolEnv) = env.state

In [42]:
function scale_for_slowmode(slow_mode::Bool, v_min::T, v_max::T, d_max::T, x_W::Vector{T}, projected_position::Vector{T}, v_B::Vector{T}) where T
    
    if slow_mode == false
        return 1
    else
        if norm(v_B) > v_max
            s_vmax = 10^(v_max - norm(v_B))
        else
            s_vmax = 1
        end

        if norm(v_B) < v_min
            s_vmin = 10^(norm(v_B) - v_min)
        else
            s_vmin = 1
        end

        if norm(x_W - projected_position) > d_max
            s_gd = exp(-norm(x_W - projected_position) + d_max)
        else
            s_gd = 1
        end
        s = s_vmax * s_vmin * s_gd
    end
    return s
end;

In [43]:
function computeReward(env::VtolEnv{A,T}) where {A,T}
    
    
    if env.slow_mode
        v_min = 1.0 # min velocity
        v_max = 3.0 # max velocity
        d_max = 0.5 
    else
        v_min = 4.0 # min velocity
        v_max = 50.0 # max velocity
        d_max = 1.0 
    end


    s = scale_for_slowmode(true, v_min, v_max, d_max, env.x_W, env.projected_position, env.v_B)
    
    # TODO: test norming
    k_p = 5.0 * s #/ env.norm_way # factor for progress (between current position and last position) reward 
    r_p = (env.progress - env.progress_prev); # reward for progress (between current position and last position)

    k_s = s * (2 * v_max * env.Δt) / env.norm_way # factor for reached distance (overall) reward
    r_s = env.progress # reward for reached distance (overall)
    
    k_wp = 50.0 # factor for reached gate reward
    r_wp = 0.0 # reward for reached gate
    if env.reached_goal_in_step
        r_wp = exp(-norm(env.x_W - env.waypoints[env.current_point - 1])/env.r_tol)
    end 

    # factor for too high body rate penalty
    if env.slow_mode
        k_ω = 0.001
    else
        k_ω = 0.0001
    end
    #norm_ω = norm(env.ω_B[3]) # penalty for body rate
    norm_ω = norm(env.ω_B) # penalty for body rate

    if env.x_W[3] < 0
        fall = env.x_W[3]
    else
        fall = 0
    end
    
    if !env.slow_mode
        k_s /= env.norm_way
        k_p /= env.norm_way
    end
    
    return k_p * r_p + k_s * r_s + k_wp * r_wp - k_ω * norm_ω - fall
    
end


RLBase.reward(env::VtolEnv{A,T}) where {A,T} = computeReward(env)

In [44]:
function RLBase.reset!(env::VtolEnv{A,T}) where {A,T}
    
    env.x_W = [0.0; 0.0; 0.0];
    env.v_B = [0.0; 0.0; 0.0];
    env.R_W = Matrix(one(QuatRotation)); # Identity matrix (no rotation)
    env.ω_B = [0.0; 0.0; 0.0];
    env.wind_W = [0.0; 0.0; 0.0];


    env.waypoints = WAYPOINTS #generate_trajectory(env.num_waypoints + 1); # Dummy points
    env.reached_goal = falses(env.num_waypoints);
    env.reached_goal[1] = true; # set first point to reached (start point)
    
    env.current_point = 2;
    env.reached_goal_in_step = false;
    #env.r_tol = 0.3;
    
    if env.visualization
        visualize_waypoints(env.waypoints[1:env.num_waypoints], 0.05); 
    end
    
    norm_way = 0.0 
    for i in 1:(env.num_waypoints - 1)
        norm_way += norm(env.waypoints[i] - env.waypoints[i + 1])
    end
    
    env.norm_way = norm_way
    env.progress = 0.0;
    env.progress_prev = 0.0;
        
 #todo
    env.state = [env.x_W[1]; # 1 position along x
                 env.x_W[2]; # 2 position along y
                 env.x_W[3]; # 3 position along z
#todo right R_W?
                 env.R_W[1,1];
                 env.R_W[2,1];
                 env.R_W[3,1];
        
                 env.R_W[1,3]; # 4  World Vector UP x
                 env.R_W[2,3]; # 5  World Vector UP y
                 env.R_W[3,3]; # 6  World Vector UP z

#                  env.R_W[1,1]; # 7  World Vector FRONT x
#                  env.R_W[2,1]; # 8  World Vector FRONT y
#                  env.R_W[3,1]; # 9  World Vector FRONT z

                 env.v_B[1]; #  10 Body velocity along x
                 env.v_B[2]; #  11 Body velocity along y
                 env.v_B[3]; #  12 Body velocity along z

                 env.ω_B[1]; #  13  Body rotational velocity around x
                 env.ω_B[2]; #  14  Body rotational velocity around y
                 env.ω_B[3]; #  15  Body rotational velocity around z

                 env.waypoints[2][1] - env.x_W[1]; # 16 position error to next gate along x
                 env.waypoints[2][2] - env.x_W[2]; # 17 position error to next gate along z
                 env.waypoints[2][3] - env.x_W[3]; # 18 position error to next gate along z
                 
                 env.waypoints[3][1] - env.waypoints[2][1]; # 19 way to next next gate x 
                 env.waypoints[3][2] - env.waypoints[2][2]; # 20 way to next next gate y
                 env.waypoints[3][3] - env.waypoints[2][3]]  # 21 way to next next gate z 
    

    env.t = 0.0; # time 0s
    env.action = [1.0; 1.0; 1.0; 1.0] # normalized # todo try with 0.0
    #env.last_action = [0.255; 0.255; 0.255; 0.255] # normalized
    #env.current_action = [0.255; 0.255; 0.255; 0.255] # normalized

    env.done = false # reset termination

    env.projected_position = [0; 0; 0]
    
    if env.visualization
        # Visualize initial state
        set_transform(env.name, env.x_W,QuatRotation(env.R_W));
        set_Crazyflie_actuators(env.name, [0.0; 0.0; 0.0; 0.0]);
        #transform_arrow(string(env.name, "_vel"), env.x_W, env.v_W_target, max_head_radius=0.05) 
        transform_arrow(string(env.name, "vel"), env.x_W, [0.0; 0.0; 0.0], max_head_radius=0.05) 
    end
    
    nothing # return nothing
end;

In [45]:
# defines a methods for a callable object.
# So when a VtolEnv object is created, it has this method that can be called
function (env::VtolEnv)(a)


    # call the step on the environoment with the next action 
    _step!(env, a)
        
end

In [46]:
env = VtolEnv();

In [47]:
methods(env) # Just to explain which methods the object has

In [48]:
function scale_actions(next_action)
    return next_action*65535.0 #todo
end

scale_actions (generic function with 1 method)

In [49]:
function _step!(env::VtolEnv, next_action)
    

    #env.last_action = copy(env.current_action)
    #env.current_action[1] = next_action[1]
    #env.current_action[2] = next_action[2]
    #env.current_action[3] = next_action[3]
    #env.current_action[4] = next_action[4]
    
    
    
    scaled_actions = scale_actions.(next_action) # between 0 and 1 for neual network
    # caluclate wind impact
    v_in_wind_B = vtol_add_wind(env.v_B, env.R_W, env.wind_W)
    # caluclate aerodynamic forces
    torque_B, force_B = crazyflie_model(scaled_actions);
    # integrate rigid body dynamics for Δt
    env.x_W, env.v_B, env.R_W, env.ω_B, env.t = rigid_body_simple(torque_B, force_B, env.x_W, env.v_B, env.R_W, env.ω_B, env.t, env.Δt, crazyflie_param)

    
    env.reached_goal_in_step = false;
    if norm(env.x_W - env.waypoints[env.current_point]) < env.r_tol
        env.reached_goal_in_step = true;
        env.reached_goal[env.current_point] = true;
        env.current_point += 1;
    end
        
            
    # calculate progress on trajectory
    env.progress_prev = env.progress
    
    current_progress = 0.0
    line_segment, env.projected_position = calculate_progress(env.waypoints, env.x_W)
    
    #env.current_point = line_segment + 1

    for i in 2:(line_segment)
       current_progress +=  norm(env.waypoints[i] - env.waypoints[i - 1])  
    end
    current_progress += norm(env.waypoints[line_segment] - env.projected_position)
    
    env.progress = current_progress
    

    if env.realtime
        sleep(env.Δt) # TODO: just a dirty hack. this is of course slower than real time.
    end

    # env.t += env.Δt # todo reevaluate


    if env.visualization
        set_transform(env.name, env.x_W,QuatRotation(env.R_W));
        set_Crazyflie_actuators(env.name, next_action[1:4])
        #transform_arrow(string(env.name, "_vel"), env.x_W, env.v_W_target, max_head_radius=0.05)               
        transform_arrow(string(env.name, "vel"), env.x_W, env.R_W*env.v_B, max_head_radius=0.05) 
    
        for i in eachindex(env.reached_goal)
            if env.reached_goal[i]
                create_sphere("fixgoal_$i", 0.05, color=RGBA{Float32}(1.0, 0.0, 0.0, 1.0));
                set_transform("fixgoal_$i", env.waypoints[i]);
            end
        end

    end
    
    
    #v_B_target = transpose(env.R_W)*env.v_W_target
    
    
    # State space
    #todo
    env.state[1] = env.x_W[1];
    env.state[2] = env.x_W[2];
    env.state[3] = env.x_W[3];
    
    env.state[4] = env.R_W[1,1] # 1  World Vector UP x
    env.state[5] = env.R_W[2,1] # 2  World Vector UP y
    env.state[6] = env.R_W[3,1] # 3  World Vector UP z

    env.state[7] = env.R_W[1,3] # 4  World Vector FRONT x
    env.state[8] = env.R_W[2,3] # 5  World Vector FRONT y
    env.state[9] = env.R_W[3,3] # 6  World Vector FRONT z
        
    env.state[10] = env.v_B[1] # 7  Body velocity along x
    env.state[11] = env.v_B[2] # 8  Body velocity along y
    env.state[12] = env.v_B[3] # 9  Body velocity along z

    env.state[13] = env.ω_B[1] # 10 Body rotational velocity around x
    env.state[14] = env.ω_B[2] # 11 Body rotational velocity around y
    env.state[15] = env.ω_B[3] # 12 Body rotational velocity around z

    env.state[16] = env.waypoints[env.current_point][1] - env.x_W[1] # 13 position error to next gate along x
    env.state[17] = env.waypoints[env.current_point][2] - env.x_W[2]; # 14 position error to next gate along z
    env.state[18] = env.waypoints[env.current_point][3] - env.x_W[3]; # 15 position error to next gate along z
     #todo            
    if env.current_point <= env.num_waypoints
        env.state[19] = env.waypoints[env.current_point + 1][1] - env.waypoints[env.current_point][1] ; # 16 way to next next gate x (next next gate - next gate), dummy integriert
        env.state[20] = env.waypoints[env.current_point + 1][2] - env.waypoints[env.current_point][2]; # 17 way to next next gate y (next next gate - next gate), dummy integriert
        env.state[21] = env.waypoints[env.current_point + 1][3] - env.waypoints[env.current_point][3]; # 18 way to next next gate z (next next gate - next gate), dummy integriert
    end

    
    
    # Termination criteria
    env.done = #true
        # After time... How fast is drone+Range of desired point
        # After reaching position (circle of r_tol)
         norm(env.ω_B) > 100.0 || 
         norm(env.v_B) > 100.0 || # stop if body is too fast_point 
        env.x_W[3] < -0.1 || # stop if body is below -5m #todo fall is now useless
        env.t > env.num_waypoints * 3.0 ||# stop after 3s per point #todo set in fast learning phase
        norm(env.x_W - env.projected_position) > 5.0 || # too far off the path 
        env.reached_goal == trues(env.num_waypoints)

    nothing
end;

In [50]:
RLBase.test_runnable!(env)

[0m[1mTest Summary:              | [22m[32m[1mPass  [22m[39m[36m[1mTotal  [22m[39m[0m[1mTime[22m
random policy with VtolEnv | [32m2000  [39m[36m 2000  [39m[0m2.1s


Test.DefaultTestSet("random policy with VtolEnv", Any[], 2000, false, false, true, 1.677049810842222e9, 1.677049812970865e9)

Show an overview of the environment.

# Setup of a reinforcement learning experiment.

In [51]:
seed = 123    
rng = StableRNG(seed)
UPDATE_FREQ = 1024

# define multiple environments for parallel training
env = VtolEnv(; rng = StableRNG(hash(seed)), name = "cf_SAC") 

# VtolEnv

## Traits

| Trait Type        |                  Value |
|:----------------- | ----------------------:|
| NumAgentStyle     |          SingleAgent() |
| DynamicStyle      |           Sequential() |
| InformationStyle  | ImperfectInformation() |
| ChanceStyle       |           Stochastic() |
| RewardStyle       |           StepReward() |
| UtilityStyle      |           GeneralSum() |
| ActionStyle       |     MinimalActionSet() |
| StateStyle        |     Observation{Any}() |
| DefaultStateStyle |     Observation{Any}() |

## Is Environment Terminated?

No

## State Space

`Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[-Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf, -Inf..Inf])`

## Action Space

`Space{Vector{ClosedInterval{Float64}}}(ClosedInterval{Float64}[0.0..1.0, 0.0..1.0, 0.0..1.0, 0.0..1.0])`

## Current State

```
[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.3633798472271099, -0.7025872365085171, 2.812468506047548, -0.9153061109562657, -2.886114502986315, 1.3200575086451947]
```


In [52]:
# Define the function approximator
# TODO ?
    ns, na = length(state(env)), length(action_space(env))

create_policy_net() = NeuralNetworkApproximator(
        model = GaussianNetwork(
            pre = Chain(
                Dense(ns, 256, relu, init = glorot_uniform(rng)),
                Dense(256, 256, relu, init = glorot_uniform(rng)),
            ),
            μ = Chain(Dense(256, na, init = glorot_uniform(rng))),
            logσ = Chain(Dense(256, na, x -> clamp.(x, typeof(x)(-10), typeof(x)(2)), init = glorot_uniform(rng))),
        ),
        optimizer = ADAM(1e-2),
    )

create_q_net() = NeuralNetworkApproximator(
        model = Chain(
            Dense(ns + na, 256, relu; init = glorot_uniform(rng)),
            Dense(256, 256, relu; init = glorot_uniform(rng)),
            Dense(256, 1; init = glorot_uniform(rng)),
        ),
        optimizer = ADAM(1e-2),
    )

create_q_net (generic function with 1 method)

In [53]:
dataset_size = 1_000_000
start_steps = 1_000 # puffer size
#trajectory_num = dataset_size
#TODO ? 

1000

In [54]:
    agent = Agent( # A wrapper of an AbstractPolicy
     # TODO ?    
    
    policy = SACPolicy(
            policy = create_policy_net(),
            qnetwork1 = create_q_net(),
            qnetwork2 = create_q_net(),
            #target_qnetwork1 = create_q_net(),
            #target_qnetwork2 = create_q_net(),
            γ = 0.99f0,
            τ = 0.005f0,
            α = 0.2f0,
            batch_size = 256,
            start_steps = start_steps,
            start_policy = RandomPolicy(Space([0.90..1.0 for _ in 1:na]); rng = rng),
            update_after = start_steps,
            update_freq = UPDATE_FREQ,
            automatic_entropy_tuning = true,
            lr_alpha = 0.003f0,
            action_dims = na,
            rng = rng,
        ),
        trajectory = CircularArraySARTTrajectory(
            capacity = dataset_size+1,
            state = Vector{Float32} => (ns,),
            action = Vector{Float32} => (na,),
        ),

    
    );

In [55]:
function saveModel(t, agent, env)
    model = cpu(agent.policy)  
    if SLOW_MODE
        f = joinpath("./RL_models_slow/", "cf_sac_$t.bson")
    else
        f = joinpath("./RL_models_fast/", "cf_sac_$t.bson")
    end;
    @save f model
    println("parameters at step $t saved to $f")
end

saveModel (generic function with 1 method)

In [56]:
function loadModel()
    f = joinpath("./RL_models_slow/", "cf_sac_$(load_from_slow_step).bson")
    @load f model
    return model
end

loadModel (generic function with 1 method)

In [57]:
function validate_policy(t, agent, env)
    # for validation extract the policy from the agend
    run(agent.policy, test_env, StopAfterEpisode(1), 
        ComposedHook(
        episode_test_step_hook, 
        episode_test_reward_hook
    ),
        )
    # the result of the hook
    reward = round((episode_test_reward_hook.rewards[end]),digits = 3)
    length = episode_test_step_hook.steps[end-1]
    
    println("step: ", t, " reward : ",reward, " length: ", length)

    with_logger(logger) do
        @info "evaluating" avg_length = length  avg_reward = reward  log_step_increment = 0
    end
    end;

episode_test_reward_hook = TotalRewardPerEpisode( is_display_on_exit=false)
episode_test_step_hook = StepsPerEpisode()
# create a env only for reward test

test_env = VtolEnv(;name = "test_cf", visualization = true, realtime = true);

In [58]:
# number of steps
steps_slow = 80_000_000
steps_fast = 80_000_000
load_from_slow_step = 20_000_000 # TODO: choose slow model

save_freq = 500_000
validate_freq = 250_000

steps = 0
if SLOW_MODE
    steps = steps_slow
else
    steps = steps_fast
end

80000000

In [59]:
# Define hook which is called during the training
total_reward_per_episode = TotalRewardPerEpisode(is_display_on_exit = false)
hook = ComposedHook(
    #total_batch_reward_per_episode,
    DoEveryNStep(saveModel, n=save_freq),
    DoEveryNStep(validate_policy, n=validate_freq),
    #=
    DoEveryNStep() do t, agent, env
        p = agent.policy
        with_logger(logger) do
            @info "training" loss = mean(p.loss)  actor_loss = mean(p.actor_loss)  critic_loss = mean(p.critic_loss)
        end
    end,
    =#
    DoEveryNStep() do t, agent, env
        with_logger(logger) do
            if length(total_reward_per_episode.rewards) > 1
                @info "training" total_reward_per_episode.rewards[end]
            end
        end
    end,
    #=
    DoEveryNStep() do t, agent, env
        with_logger(logger) do
            @info "training" action_thrust_1 = env[1].action[1]  action_thrust_2 = env[1].action[2] action_thrust_3 = env[1].action[3] action_thrust_4 = env[1].action[4]
        end
    end,
    =#
);

In [60]:
#todo load model
if !SLOW_MODE
    agent.policy.approximator = loadModel(); 
end;

In [None]:
if TRAINING
    ReinforcementLearning.run(
        agent,
        env,
        StopAfterStep(steps),
        hook
    )
end

[32mProgress:   0%|▏                                        |  ETA: 10:21:15[39m22[39mm

step: 250000 reward : 0.312 length: 6


[32mProgress:   1%|▎                                        |  ETA: 7:19:40[39mm

parameters at step 500000 saved to ./RL_models_slow/cf_sac_500000.bson
step: 500000 reward : 0.022 length: 11


[32mProgress:   1%|▍                                        |  ETA: 6:29:39[39m

step: 750000 reward : -0.054 length: 10


[32mProgress:   1%|▌                                        |  ETA: 6:07:47[39m

parameters at step 1000000 saved to ./RL_models_slow/cf_sac_1000000.bson
step: 1000000 reward : -0.097 length: 9


[32mProgress:   1%|▋                                        |  ETA: 5:58:11[39m

step: 1250000 reward : 0.145 length: 9


[32mProgress:   2%|▊                                        |  ETA: 5:58:58[39m

parameters at step 1500000 saved to ./RL_models_slow/cf_sac_1500000.bson
step: 1500000 reward : 0.004 length: 7


[32mProgress:   2%|▉                                        |  ETA: 5:45:53[39m

step: 1750000 reward : 0.148 length: 8


[32mProgress:   2%|█                                        |  ETA: 5:36:21[39m

parameters at step 2000000 saved to ./RL_models_slow/cf_sac_2000000.bson
step: 2000000 reward : 0.122 length: 6


[32mProgress:   3%|█▏                                       |  ETA: 5:29:41[39m

step: 2250000 reward : 0.086 length: 9


[32mProgress:   3%|█▎                                       |  ETA: 5:23:10[39m

parameters at step 2500000 saved to ./RL_models_slow/cf_sac_2500000.bson
step: 2500000 reward : 0.316 length: 9


[32mProgress:   3%|█▍                                       |  ETA: 5:19:00[39m

step: 2750000 reward : -0.146 length: 8


[32mProgress:   4%|█▌                                       |  ETA: 5:20:10[39m

parameters at step 3000000 saved to ./RL_models_slow/cf_sac_3000000.bson
step: 3000000 reward : 0.011 length: 7


[32mProgress:   4%|█▋                                       |  ETA: 5:30:28[39m

step: 3250000 reward : 0.217 length: 7


[32mProgress:   4%|█▊                                       |  ETA: 5:36:44[39m

parameters at step 3500000 saved to ./RL_models_slow/cf_sac_3500000.bson
step: 3500000 reward : -0.003 length: 10


[32mProgress:   5%|█▉                                       |  ETA: 5:40:04[39m

step: 3750000 reward : -0.025 length: 10


[32mProgress:   5%|██                                       |  ETA: 5:42:33[39m

parameters at step 4000000 saved to ./RL_models_slow/cf_sac_4000000.bson
step: 4000000 reward : -0.062 length: 10


[32mProgress:   5%|██▏                                      |  ETA: 5:44:12[39m

step: 4250000 reward : 0.114 length: 8


[32mProgress:   6%|██▎                                      |  ETA: 5:44:46[39m

parameters at step 4500000 saved to ./RL_models_slow/cf_sac_4500000.bson
step: 4500000 reward : 0.128 length: 6


[32mProgress:   6%|██▍                                      |  ETA: 5:45:16[39m

step: 4750000 reward : -0.519 length: 12


[32mProgress:   6%|██▌                                      |  ETA: 5:45:11[39m

parameters at step 5000000 saved to ./RL_models_slow/cf_sac_5000000.bson
step: 5000000 reward : -0.598 length: 14


[32mProgress:   7%|██▊                                      |  ETA: 5:45:08[39m

step: 5250000 reward : -0.297 length: 7


[32mProgress:   7%|██▉                                      |  ETA: 5:49:55[39m

parameters at step 5500000 saved to ./RL_models_slow/cf_sac_5500000.bson
step: 5500000 reward : -0.029 length: 8


[32mProgress:   7%|███                                      |  ETA: 21:16:40[39m

step: 5750000 reward : -0.002 length: 8


[32mProgress:   7%|███▏                                     |  ETA: 20:48:50[39m

parameters at step 6000000 saved to ./RL_models_slow/cf_sac_6000000.bson
step: 6000000 reward : 0.135 length: 9


[32mProgress:   8%|███▎                                     |  ETA: 20:19:07[39m

step: 6250000 reward : 0.125 length: 7


[32mProgress:   8%|███▍                                     |  ETA: 19:53:26[39m

parameters at step 6500000 saved to ./RL_models_slow/cf_sac_6500000.bson
step: 6500000 reward : -0.003 length: 13


[32mProgress:   8%|███▌                                     |  ETA: 19:28:47[39m

step: 6750000 reward : 0.204 length: 8


[32mProgress:   9%|███▋                                     |  ETA: 19:01:55[39m

parameters at step 7000000 saved to ./RL_models_slow/cf_sac_7000000.bson
step: 7000000 reward : -0.109 length: 9


[32mProgress:   9%|███▊                                     |  ETA: 18:35:30[39m

step: 7250000 reward : -0.064 length: 9


[32mProgress:   9%|███▉                                     |  ETA: 18:11:11[39m

parameters at step 7500000 saved to ./RL_models_slow/cf_sac_7500000.bson
step: 7500000 reward : -0.03 length: 11


[32mProgress:  10%|████                                     |  ETA: 17:46:27[39m

step: 7750000 reward : 0.039 length: 7


[32mProgress:  10%|████▏                                    |  ETA: 17:24:52[39m

parameters at step 8000000 saved to ./RL_models_slow/cf_sac_8000000.bson
step: 8000000 reward : -0.265 length: 8


[32mProgress:  10%|████▎                                    |  ETA: 17:04:09[39m

step: 8250000 reward : 0.073 length: 11


[32mProgress:  11%|████▍                                    |  ETA: 16:45:42[39m

parameters at step 8500000 saved to ./RL_models_slow/cf_sac_8500000.bson
step: 8500000 reward : -0.46 length: 18


[32mProgress:  11%|████▌                                    |  ETA: 16:27:28[39m

step: 8750000 reward : 0.105 length: 9


[32mProgress:  11%|████▋                                    |  ETA: 16:12:43[39m

parameters at step 9000000 saved to ./RL_models_slow/cf_sac_9000000.bson


[32mProgress:  11%|████▋                                    |  ETA: 16:12:42[39m

step: 9000000 reward : 0.134 length: 7


[32mProgress:  12%|████▊                                    |  ETA: 15:56:11[39m

step: 9250000 reward : 0.175 length: 12


[32mProgress:  12%|████▉                                    |  ETA: 15:37:07[39m

parameters at step 9500000 saved to ./RL_models_slow/cf_sac_9500000.bson
step: 9500000 reward : -0.607 length: 15


[32mProgress:  12%|█████                                    |  ETA: 15:19:18[39m

step: 9750000 reward : 0.039 length: 9


[32mProgress:  12%|█████▏                                   |  ETA: 15:02:04[39m

parameters at step 10000000 saved to ./RL_models_slow/cf_sac_10000000.bson
step: 10000000 reward : 0.176 length: 10


[32mProgress:  13%|█████▎                                   |  ETA: 14:47:20[39m

step: 10250000 reward : 0.032 length: 9


[32mProgress:  13%|█████▍                                   |  ETA: 14:32:01[39m

parameters at step 10500000 saved to ./RL_models_slow/cf_sac_10500000.bson
step: 10500000 reward : -0.089 length: 8


[32mProgress:  13%|█████▌                                   |  ETA: 14:17:12[39m

step: 10750000 reward : -0.004 length: 7


[32mProgress:  14%|█████▋                                   |  ETA: 14:03:22[39m

parameters at step 11000000 saved to ./RL_models_slow/cf_sac_11000000.bson
step: 11000000 reward : 0.2 length: 7


[32mProgress:  14%|█████▊                                   |  ETA: 13:49:31[39m

step: 11250000 reward : 0.125 length: 11


[32mProgress:  14%|█████▉                                   |  ETA: 13:36:17[39m

parameters at step 11500000 saved to ./RL_models_slow/cf_sac_11500000.bson
step: 11500000 reward : 0.287 length: 6


[32mProgress:  15%|██████                                   |  ETA: 13:25:23[39m

step: 11750000 reward : 0.406 length: 12


[32mProgress:  15%|██████▏                                  |  ETA: 13:14:00[39m

parameters at step 12000000 saved to ./RL_models_slow/cf_sac_12000000.bson
step: 12000000 reward : -0.128 length: 8


[32mProgress:  15%|██████▎                                  |  ETA: 13:02:35[39m

step: 12250000 reward : 0.056 length: 7


[32mProgress:  16%|██████▍                                  |  ETA: 12:53:01[39m

parameters at step 12500000 saved to ./RL_models_slow/cf_sac_12500000.bson
step: 12500000 reward : 0.074 length: 10


[32mProgress:  16%|██████▌                                  |  ETA: 12:42:17[39m

step: 12750000 reward : 0.066 length: 7


[32mProgress:  16%|██████▋                                  |  ETA: 12:29:57[39m

parameters at step 13000000 saved to ./RL_models_slow/cf_sac_13000000.bson
step: 13000000 reward : 0.199 length: 7


[32mProgress:  17%|██████▊                                  |  ETA: 12:17:25[39m

step: 13250000 reward : -0.03 length: 9


[32mProgress:  17%|██████▉                                  |  ETA: 12:05:38[39m

parameters at step 13500000 saved to ./RL_models_slow/cf_sac_13500000.bson
step: 13500000 reward : 0.198 length: 8


[32mProgress:  17%|███████                                  |  ETA: 11:54:49[39m

step: 13750000 reward : 0.301 length: 6


[32mProgress:  17%|███████▏                                 |  ETA: 11:44:08[39m

parameters at step 14000000 saved to ./RL_models_slow/cf_sac_14000000.bson
step: 14000000 reward : 0.045 length: 7


[32mProgress:  18%|███████▎                                 |  ETA: 11:33:43[39m

step: 14250000 reward : -0.029 length: 11


[32mProgress:  18%|███████▍                                 |  ETA: 11:23:57[39m

parameters at step 14500000 saved to ./RL_models_slow/cf_sac_14500000.bson
step: 14500000 reward : 0.075 length: 9


[32mProgress:  18%|███████▌                                 |  ETA: 11:14:03[39m

step: 14750000 reward : 0.155 length: 8


[32mProgress:  19%|███████▋                                 |  ETA: 11:04:05[39m

parameters at step 15000000 saved to ./RL_models_slow/cf_sac_15000000.bson
step: 15000000 reward : 0.056 length: 9


[32mProgress:  19%|███████▉                                 |  ETA: 10:54:23[39m

step: 15250000 reward : 0.097 length: 8


[32mProgress:  19%|████████                                 |  ETA: 10:45:37[39m

parameters at step 15500000 saved to ./RL_models_slow/cf_sac_15500000.bson
step: 15500000 reward : 0.356 length: 7


[32mProgress:  20%|████████▏                                |  ETA: 10:36:51[39m

step: 15750000 reward : 0.196 length: 8


[32mProgress:  20%|████████▎                                |  ETA: 10:28:27[39m

parameters at step 16000000 saved to ./RL_models_slow/cf_sac_16000000.bson
step: 16000000 reward : 0.15 length: 8


[32mProgress:  20%|████████▍                                |  ETA: 10:20:02[39m

step: 16250000 reward : 0.299 length: 8


[32mProgress:  21%|████████▌                                |  ETA: 10:11:48[39m

parameters at step 16500000 saved to ./RL_models_slow/cf_sac_16500000.bson
step: 16500000 reward : -0.031 length: 9


[32mProgress:  21%|████████▋                                |  ETA: 10:03:23[39m

step: 16750000 reward : -0.372 length: 10


[32mProgress:  21%|████████▊                                |  ETA: 9:55:00[39mm

parameters at step 17000000 saved to ./RL_models_slow/cf_sac_17000000.bson
step: 17000000 reward : -0.195 length: 10


[32mProgress:  22%|████████▉                                |  ETA: 9:46:42[39m

step: 17250000 reward : -0.221 length: 14


[32mProgress:  22%|█████████                                |  ETA: 9:38:54[39m

parameters at step 17500000 saved to ./RL_models_slow/cf_sac_17500000.bson
step: 17500000 reward : -0.17 length: 8


[32mProgress:  22%|█████████▏                               |  ETA: 9:31:25[39m

step: 17750000 reward : -0.161 length: 10


[32mProgress:  22%|█████████▎                               |  ETA: 9:24:05[39m

parameters at step 18000000 saved to ./RL_models_slow/cf_sac_18000000.bson
step: 18000000 reward : -0.118 length: 7


[32mProgress:  23%|█████████▍                               |  ETA: 9:16:38[39m

step: 18250000 reward : 0.025 length: 8


[32mProgress:  23%|█████████▌                               |  ETA: 9:09:41[39m

parameters at step 18500000 saved to ./RL_models_slow/cf_sac_18500000.bson
step: 18500000 reward : -0.255 length: 10


[32mProgress:  23%|█████████▋                               |  ETA: 9:02:54[39m

step: 18750000 reward : -0.157 length: 9


[32mProgress:  24%|█████████▊                               |  ETA: 8:56:15[39m

parameters at step 19000000 saved to ./RL_models_slow/cf_sac_19000000.bson
step: 19000000 reward : 0.007 length: 10


[32mProgress:  24%|█████████▉                               |  ETA: 8:50:05[39m

step: 19250000 reward : -0.134 length: 9


[32mProgress:  24%|██████████                               |  ETA: 8:43:51[39m

parameters at step 19500000 saved to ./RL_models_slow/cf_sac_19500000.bson
step: 19500000 reward : -0.114 length: 7


[32mProgress:  25%|██████████▏                              |  ETA: 8:37:39[39m

step: 19750000 reward : -0.026 length: 7


[32mProgress:  25%|██████████▎                              |  ETA: 8:31:39[39m

parameters at step 20000000 saved to ./RL_models_slow/cf_sac_20000000.bson
step: 20000000 reward : -0.202 length: 8


[32mProgress:  25%|██████████▍                              |  ETA: 8:25:42[39m

step: 20250000 reward : 0.08 length: 8


[32mProgress:  26%|██████████▌                              |  ETA: 8:19:59[39m

parameters at step 20500000 saved to ./RL_models_slow/cf_sac_20500000.bson
step: 20500000 reward : 0.028 length: 9


[32mProgress:  26%|██████████▋                              |  ETA: 8:14:32[39m

step: 20750000 reward : -0.239 length: 11


[32mProgress:  26%|██████████▊                              |  ETA: 8:09:05[39m

parameters at step 21000000 saved to ./RL_models_slow/cf_sac_21000000.bson
step: 21000000 reward : -0.072 length: 9


[32mProgress:  27%|██████████▉                              |  ETA: 8:03:40[39m

step: 21250000 reward : -0.162 length: 12


[32mProgress:  27%|███████████                              |  ETA: 7:58:19[39m

parameters at step 21500000 saved to ./RL_models_slow/cf_sac_21500000.bson
step: 21500000 reward : -0.023 length: 8


[32mProgress:  27%|███████████▏                             |  ETA: 7:53:07[39m

step: 21750000 reward : -0.007 length: 8


[32mProgress:  27%|███████████▎                             |  ETA: 7:48:10[39m

parameters at step 22000000 saved to ./RL_models_slow/cf_sac_22000000.bson
step: 22000000 reward : -0.046 length: 12


[32mProgress:  28%|███████████▍                             |  ETA: 7:43:13[39m

step: 22250000 reward : 0.105 length: 9


[32mProgress:  28%|███████████▌                             |  ETA: 7:38:22[39m

parameters at step 22500000 saved to ./RL_models_slow/cf_sac_22500000.bson
step: 22500000 reward : -0.066 length: 13


[32mProgress:  28%|███████████▋                             |  ETA: 7:33:38[39m

step: 22750000 reward : 0.157 length: 8


[32mProgress:  29%|███████████▊                             |  ETA: 7:29:00[39m

parameters at step 23000000 saved to ./RL_models_slow/cf_sac_23000000.bson
step: 23000000 reward : 0.139 length: 7


[32mProgress:  29%|███████████▉                             |  ETA: 7:24:20[39m

step: 23250000 reward : -0.014 length: 9


[32mProgress:  29%|████████████                             |  ETA: 7:20:23[39m

parameters at step 23500000 saved to ./RL_models_slow/cf_sac_23500000.bson
step: 23500000 reward : -0.121 length: 8


[32mProgress:  30%|████████████▏                            |  ETA: 7:18:27[39m

step: 23750000 reward : 0.028 length: 10


[32mProgress:  30%|████████████▎                            |  ETA: 7:14:09[39m

parameters at step 24000000 saved to ./RL_models_slow/cf_sac_24000000.bson
step: 24000000 reward : 0.158 length: 8


[32mProgress:  30%|████████████▍                            |  ETA: 7:10:08[39m

step: 24250000 reward : -0.326 length: 8


[32mProgress:  31%|████████████▌                            |  ETA: 7:06:14[39m

parameters at step 24500000 saved to ./RL_models_slow/cf_sac_24500000.bson
step: 24500000 reward : 0.179 length: 8


[32mProgress:  31%|████████████▋                            |  ETA: 7:02:06[39m

step: 24750000 reward : 0.167 length: 9


[32mProgress:  31%|████████████▊                            |  ETA: 6:58:01[39m

parameters at step 25000000 saved to ./RL_models_slow/cf_sac_25000000.bson
step: 25000000 reward : -0.257 length: 13


[32mProgress:  32%|█████████████                            |  ETA: 6:54:20[39m

step: 25250000 reward : 0.132 length: 11


[32mProgress:  32%|█████████████▏                           |  ETA: 6:50:13[39m

parameters at step 25500000 saved to ./RL_models_slow/cf_sac_25500000.bson
step: 25500000 reward : 0.121 length: 9


[32mProgress:  32%|█████████████▎                           |  ETA: 6:46:11[39m

step: 25750000 reward : 0.113 length: 8


[32mProgress:  32%|█████████████▍                           |  ETA: 6:42:19[39m

parameters at step 26000000 saved to ./RL_models_slow/cf_sac_26000000.bson
step: 26000000 reward : 0.01 length: 8


[32mProgress:  33%|█████████████▌                           |  ETA: 6:38:21[39m

step: 26250000 reward : 0.002 length: 9


[32mProgress:  33%|█████████████▋                           |  ETA: 6:34:27[39m

parameters at step 26500000 saved to ./RL_models_slow/cf_sac_26500000.bson
step: 26500000 reward : 0.017 length: 10


[32mProgress:  33%|█████████████▊                           |  ETA: 6:30:39[39m

step: 26750000 reward : -0.102 length: 8


[32mProgress:  34%|█████████████▉                           |  ETA: 6:26:57[39m

parameters at step 27000000 saved to ./RL_models_slow/cf_sac_27000000.bson
step: 27000000 reward : -0.249 length: 8


[32mProgress:  34%|██████████████                           |  ETA: 6:23:30[39m

step: 27250000 reward : 0.175 length: 7


[32mProgress:  34%|██████████████▏                          |  ETA: 6:20:03[39m

parameters at step 27500000 saved to ./RL_models_slow/cf_sac_27500000.bson
step: 27500000 reward : 0.137 length: 7


[32mProgress:  35%|██████████████▎                          |  ETA: 6:16:34[39m

step: 27750000 reward : -0.15 length: 9


[32mProgress:  35%|██████████████▍                          |  ETA: 6:13:08[39m

parameters at step 28000000 saved to ./RL_models_slow/cf_sac_28000000.bson
step: 28000000 reward : 0.001 length: 10


[32mProgress:  35%|██████████████▌                          |  ETA: 6:09:35[39m

step: 28250000 reward : 0.137 length: 7


[32mProgress:  36%|██████████████▋                          |  ETA: 6:06:09[39m

parameters at step 28500000 saved to ./RL_models_slow/cf_sac_28500000.bson


[32mProgress:  36%|██████████████▋                          |  ETA: 6:06:07[39m

step: 28500000 reward : -0.034 length: 9


[32mProgress:  36%|██████████████▊                          |  ETA: 6:02:48[39m

step: 28750000 reward : 0.027 length: 8


[32mProgress:  36%|██████████████▉                          |  ETA: 5:59:50[39m

parameters at step 29000000 saved to ./RL_models_slow/cf_sac_29000000.bson
step: 29000000 reward : -0.301 length: 7


[32mProgress:  37%|███████████████                          |  ETA: 5:56:36[39m

step: 29250000 reward : -0.21 length: 17


[32mProgress:  37%|███████████████▏                         |  ETA: 5:53:30[39m

parameters at step 29500000 saved to ./RL_models_slow/cf_sac_29500000.bson
step: 29500000 reward : -0.067 length: 9


[32mProgress:  37%|███████████████▎                         |  ETA: 5:50:48[39m

step: 29750000 reward : 0.123 length: 7


[32mProgress:  37%|███████████████▍                         |  ETA: 5:47:44[39m

parameters at step 30000000 saved to ./RL_models_slow/cf_sac_30000000.bson
step: 30000000 reward : -0.32 length: 12


[32mProgress:  38%|███████████████▌                         |  ETA: 5:44:40[39m

step: 30250000 reward : 0.083 length: 7


[32mProgress:  38%|███████████████▋                         |  ETA: 5:41:34[39m

parameters at step 30500000 saved to ./RL_models_slow/cf_sac_30500000.bson
step: 30500000 reward : 0.074 length: 9


[32mProgress:  38%|███████████████▊                         |  ETA: 5:38:46[39m

step: 30750000 reward : 0.032 length: 11


[32mProgress:  39%|███████████████▉                         |  ETA: 5:36:37[39m

parameters at step 31000000 saved to ./RL_models_slow/cf_sac_31000000.bson


[32mProgress:  39%|███████████████▉                         |  ETA: 5:36:36[39m

step: 31000000 reward : 0.313 length: 9


[32mProgress:  39%|████████████████                         |  ETA: 5:35:06[39m

step: 31250000 reward : -0.054 length: 9


[32mProgress:  39%|████████████████▏                        |  ETA: 5:33:40[39m

parameters at step 31500000 saved to ./RL_models_slow/cf_sac_31500000.bson
step: 31500000 reward : 0.122 length: 8


[32mProgress:  40%|████████████████▎                        |  ETA: 5:31:30[39m

step: 31750000 reward : -0.055 length: 11


[32mProgress:  40%|████████████████▍                        |  ETA: 5:29:40[39m

parameters at step 32000000 saved to ./RL_models_slow/cf_sac_32000000.bson
step: 32000000 reward : 0.143 length: 8


[32mProgress:  40%|████████████████▌                        |  ETA: 5:28:29[39m

step: 32250000 reward : 0.035 length: 10


[32mProgress:  41%|████████████████▋                        |  ETA: 5:26:42[39m

parameters at step 32500000 saved to ./RL_models_slow/cf_sac_32500000.bson
step: 32500000 reward : 0.056 length: 8


[32mProgress:  41%|████████████████▊                        |  ETA: 5:24:25[39m

step: 32750000 reward : 0.124 length: 7


[32mProgress:  41%|████████████████▉                        |  ETA: 5:22:28[39m

parameters at step 33000000 saved to ./RL_models_slow/cf_sac_33000000.bson
step: 33000000 reward : 0.155 length: 8


[32mProgress:  42%|█████████████████                        |  ETA: 5:20:26[39m

step: 33250000 reward : -0.013 length: 8


[32mProgress:  42%|█████████████████▏                       |  ETA: 5:18:54[39m

parameters at step 33500000 saved to ./RL_models_slow/cf_sac_33500000.bson
step: 33500000 reward : -0.125 length: 9


[32mProgress:  42%|█████████████████▎                       |  ETA: 5:17:20[39m

step: 33750000 reward : 0.065 length: 8


[32mProgress:  42%|█████████████████▍                       |  ETA: 5:16:12[39m

parameters at step 34000000 saved to ./RL_models_slow/cf_sac_34000000.bson


[32mProgress:  43%|█████████████████▍                       |  ETA: 5:16:12[39m

step: 34000000 reward : 0.139 length: 8


[32mProgress:  43%|█████████████████▌                       |  ETA: 5:15:05[39m

step: 34250000 reward : -0.105 length: 9


[32mProgress:  43%|█████████████████▋                       |  ETA: 5:14:11[39m

parameters at step 34500000 saved to ./RL_models_slow/cf_sac_34500000.bson
step: 34500000 reward : -0.139 length: 9


[32mProgress:  43%|█████████████████▊                       |  ETA: 5:12:46[39m

step: 34750000 reward : -0.069 length: 8


[32mProgress:  44%|█████████████████▉                       |  ETA: 5:11:23[39m

parameters at step 35000000 saved to ./RL_models_slow/cf_sac_35000000.bson
step: 35000000 reward : 0.195 length: 9


[32mProgress:  44%|██████████████████▏                      |  ETA: 5:10:09[39m

step: 35250000 reward : -0.043 length: 7


[32mProgress:  44%|██████████████████▎                      |  ETA: 5:08:35[39m

parameters at step 35500000 saved to ./RL_models_slow/cf_sac_35500000.bson
step: 35500000 reward : -0.018 length: 7


[32mProgress:  45%|██████████████████▍                      |  ETA: 5:07:15[39m

step: 35750000 reward : 0.166 length: 10


[32mProgress:  45%|██████████████████▌                      |  ETA: 5:06:02[39m

parameters at step 36000000 saved to ./RL_models_slow/cf_sac_36000000.bson
step: 36000000 reward : 0.271 length: 10


[32mProgress:  45%|██████████████████▋                      |  ETA: 5:04:43[39m

step: 36250000 reward : 0.258 length: 8


[32mProgress:  46%|██████████████████▊                      |  ETA: 5:03:16[39m

parameters at step 36500000 saved to ./RL_models_slow/cf_sac_36500000.bson


[32mProgress:  46%|██████████████████▊                      |  ETA: 5:03:16[39m

step: 36500000 reward : -0.162 length: 14


[32mProgress:  46%|██████████████████▉                      |  ETA: 5:02:03[39m

step: 36750000 reward : 0.062 length: 9


[32mProgress:  46%|███████████████████                      |  ETA: 5:00:42[39m

parameters at step 37000000 saved to ./RL_models_slow/cf_sac_37000000.bson


[32mProgress:  46%|███████████████████                      |  ETA: 5:00:42[39m

step: 37000000 reward : -0.354 length: 8


[32mProgress:  47%|███████████████████▏                     |  ETA: 4:59:12[39m

step: 37250000 reward : -0.423 length: 8


[32mProgress:  47%|███████████████████▎                     |  ETA: 4:57:57[39m

parameters at step 37500000 saved to ./RL_models_slow/cf_sac_37500000.bson
step: 37500000 reward : -0.083 length: 7


[32mProgress:  47%|███████████████████▍                     |  ETA: 4:56:03[39m

step: 37750000 reward : 0.051 length: 8


[32mProgress:  47%|███████████████████▌                     |  ETA: 4:53:44[39m

parameters at step 38000000 saved to ./RL_models_slow/cf_sac_38000000.bson
step: 38000000 reward : 0.085 length: 9


[32mProgress:  48%|███████████████████▋                     |  ETA: 4:51:31[39m

step: 38250000 reward : -0.026 length: 10


[32mProgress:  48%|███████████████████▊                     |  ETA: 4:49:20[39m

parameters at step 38500000 saved to ./RL_models_slow/cf_sac_38500000.bson


[32mProgress:  48%|███████████████████▊                     |  ETA: 4:49:18[39m

step: 38500000 reward : 0.016 length: 10


[32mProgress:  48%|███████████████████▉                     |  ETA: 4:46:59[39m

step: 38750000 reward : -0.011 length: 7


[32mProgress:  49%|████████████████████                     |  ETA: 4:44:27[39m

parameters at step 39000000 saved to ./RL_models_slow/cf_sac_39000000.bson
step: 39000000 reward : 0.228 length: 8


[32mProgress:  49%|████████████████████▏                    |  ETA: 4:41:56[39m

step: 39250000 reward : -0.308 length: 14


[32mProgress:  49%|████████████████████▎                    |  ETA: 4:39:24[39m

parameters at step 39500000 saved to ./RL_models_slow/cf_sac_39500000.bson
step: 39500000 reward : -0.024 length: 9


[32mProgress:  50%|████████████████████▍                    |  ETA: 4:37:16[39m

step: 39750000 reward : 0.144 length: 7


[32mProgress:  50%|████████████████████▌                    |  ETA: 4:35:31[39m

parameters at step 40000000 saved to ./RL_models_slow/cf_sac_40000000.bson
step: 40000000 reward : 0.031 length: 7


[32mProgress:  50%|████████████████████▋                    |  ETA: 4:33:24[39m

step: 40250000 reward : -0.01 length: 13


[32mProgress:  51%|████████████████████▊                    |  ETA: 4:30:58[39m

parameters at step 40500000 saved to ./RL_models_slow/cf_sac_40500000.bson
step: 40500000 reward : 0.078 length: 7


[32mProgress:  51%|████████████████████▉                    |  ETA: 4:28:29[39m

step: 40750000 reward : 0.085 length: 7


[32mProgress:  51%|█████████████████████                    |  ETA: 4:26:07[39m

parameters at step 41000000 saved to ./RL_models_slow/cf_sac_41000000.bson
step: 41000000 reward : -0.098 length: 12


[32mProgress:  52%|█████████████████████▏                   |  ETA: 4:23:46[39m

step: 41250000 reward : -0.455 length: 11


[32mProgress:  52%|█████████████████████▎                   |  ETA: 4:21:22[39m

parameters at step 41500000 saved to ./RL_models_slow/cf_sac_41500000.bson
step: 41500000 reward : -0.03 length: 12


[32mProgress:  52%|█████████████████████▍                   |  ETA: 4:19:01[39m

step: 41750000 reward : 0.102 length: 8


[32mProgress:  52%|█████████████████████▌                   |  ETA: 4:16:39[39m

parameters at step 42000000 saved to ./RL_models_slow/cf_sac_42000000.bson
step: 42000000 reward : 0.214 length: 8


[32mProgress:  53%|█████████████████████▋                   |  ETA: 4:14:19[39m

step: 42250000 reward : 0.168 length: 10


[32mProgress:  53%|█████████████████████▊                   |  ETA: 4:12:00[39m

parameters at step 42500000 saved to ./RL_models_slow/cf_sac_42500000.bson
step: 42500000 reward : 0.123 length: 8


[32mProgress:  53%|█████████████████████▉                   |  ETA: 4:09:39[39m

step: 42750000 reward : -0.07 length: 9


[32mProgress:  54%|██████████████████████                   |  ETA: 4:07:16[39m

parameters at step 43000000 saved to ./RL_models_slow/cf_sac_43000000.bson
step: 43000000 reward : 0.017 length: 12


[32mProgress:  54%|██████████████████████▏                  |  ETA: 4:04:53[39m

step: 43250000 reward : 0.162 length: 11


[32mProgress:  54%|██████████████████████▎                  |  ETA: 4:02:32[39m

parameters at step 43500000 saved to ./RL_models_slow/cf_sac_43500000.bson
step: 43500000 reward : -0.162 length: 10


[32mProgress:  55%|██████████████████████▍                  |  ETA: 4:00:11[39m

step: 43750000 reward : -0.093 length: 9


[32mProgress:  55%|██████████████████████▌                  |  ETA: 3:58:09[39m

parameters at step 44000000 saved to ./RL_models_slow/cf_sac_44000000.bson
step: 44000000 reward : 0.054 length: 8


[32mProgress:  55%|██████████████████████▋                  |  ETA: 3:56:24[39m

step: 44250000 reward : 0.165 length: 9


[32mProgress:  56%|██████████████████████▊                  |  ETA: 3:54:37[39m

parameters at step 44500000 saved to ./RL_models_slow/cf_sac_44500000.bson
step: 44500000 reward : -0.097 length: 7


[32mProgress:  56%|██████████████████████▉                  |  ETA: 3:52:52[39m

step: 44750000 reward : -0.119 length: 8


[32mProgress:  56%|███████████████████████                  |  ETA: 3:51:06[39m

parameters at step 45000000 saved to ./RL_models_slow/cf_sac_45000000.bson
step: 45000000 reward : 0.165 length: 7


[32mProgress:  57%|███████████████████████▎                 |  ETA: 3:49:21[39m

step: 45250000 reward : 0.128 length: 8


[32mProgress:  57%|███████████████████████▍                 |  ETA: 3:47:23[39m

parameters at step 45500000 saved to ./RL_models_slow/cf_sac_45500000.bson
step: 45500000 reward : -0.322 length: 8


[32mProgress:  57%|███████████████████████▌                 |  ETA: 3:45:19[39m

step: 45750000 reward : 0.135 length: 8


[32mProgress:  57%|███████████████████████▋                 |  ETA: 3:43:19[39m

parameters at step 46000000 saved to ./RL_models_slow/cf_sac_46000000.bson
step: 46000000 reward : -0.105 length: 9


[32mProgress:  58%|███████████████████████▊                 |  ETA: 3:41:28[39m

step: 46250000 reward : -0.099 length: 11


[32mProgress:  58%|███████████████████████▉                 |  ETA: 3:39:39[39m

parameters at step 46500000 saved to ./RL_models_slow/cf_sac_46500000.bson
step: 46500000 reward : 0.064 length: 9


[32mProgress:  58%|████████████████████████                 |  ETA: 3:37:42[39m

step: 46750000 reward : 0.066 length: 8


[32mProgress:  59%|████████████████████████▏                |  ETA: 3:35:41[39m

parameters at step 47000000 saved to ./RL_models_slow/cf_sac_47000000.bson
step: 47000000 reward : -0.043 length: 11


[32mProgress:  59%|████████████████████████▎                |  ETA: 3:33:33[39m

step: 47250000 reward : 0.155 length: 10


[32mProgress:  59%|████████████████████████▍                |  ETA: 3:31:29[39m

parameters at step 47500000 saved to ./RL_models_slow/cf_sac_47500000.bson
step: 47500000 reward : -0.029 length: 11


[32mProgress:  60%|████████████████████████▌                |  ETA: 3:29:26[39m

step: 47750000 reward : -0.221 length: 6


[32mProgress:  60%|████████████████████████▋                |  ETA: 3:27:22[39m

parameters at step 48000000 saved to ./RL_models_slow/cf_sac_48000000.bson
step: 48000000 reward : 0.019 length: 8


[32mProgress:  60%|████████████████████████▊                |  ETA: 3:25:19[39m

step: 48250000 reward : 0.161 length: 7


[32mProgress:  61%|████████████████████████▉                |  ETA: 3:23:16[39m

parameters at step 48500000 saved to ./RL_models_slow/cf_sac_48500000.bson
step: 48500000 reward : 0.098 length: 8


[32mProgress:  61%|█████████████████████████                |  ETA: 3:21:17[39m

step: 48750000 reward : -0.053 length: 9


[32mProgress:  61%|█████████████████████████▏               |  ETA: 3:19:16[39m

parameters at step 49000000 saved to ./RL_models_slow/cf_sac_49000000.bson
step: 49000000 reward : -0.005 length: 7


[32mProgress:  62%|█████████████████████████▎               |  ETA: 3:17:19[39m

step: 49250000 reward : 0.002 length: 10


[32mProgress:  62%|█████████████████████████▍               |  ETA: 3:15:21[39m

parameters at step 49500000 saved to ./RL_models_slow/cf_sac_49500000.bson
step: 49500000 reward : -0.044 length: 8


[32mProgress:  62%|█████████████████████████▌               |  ETA: 3:13:20[39m

step: 49750000 reward : 0.047 length: 9


[32mProgress:  62%|█████████████████████████▋               |  ETA: 3:11:21[39m

parameters at step 50000000 saved to ./RL_models_slow/cf_sac_50000000.bson
step: 50000000 reward : -0.293 length: 10


[32mProgress:  63%|█████████████████████████▊               |  ETA: 3:09:25[39m

step: 50250000 reward : -0.398 length: 14


[32mProgress:  63%|█████████████████████████▉               |  ETA: 3:07:30[39m

parameters at step 50500000 saved to ./RL_models_slow/cf_sac_50500000.bson
step: 50500000 reward : -0.203 length: 14


[32mProgress:  63%|██████████████████████████               |  ETA: 3:05:34[39m

step: 50750000 reward : -0.253 length: 9


[32mProgress:  64%|██████████████████████████▏              |  ETA: 3:03:35[39m

parameters at step 51000000 saved to ./RL_models_slow/cf_sac_51000000.bson
step: 51000000 reward : -0.265 length: 10


[32mProgress:  64%|██████████████████████████▎              |  ETA: 3:01:42[39m

step: 51250000 reward : 0.004 length: 8


[32mProgress:  64%|██████████████████████████▍              |  ETA: 3:00:08[39m

parameters at step 51500000 saved to ./RL_models_slow/cf_sac_51500000.bson
step: 51500000 reward : -0.007 length: 8


[32mProgress:  65%|██████████████████████████▌              |  ETA: 2:58:35[39m

step: 51750000 reward : 0.2 length: 7


[32mProgress:  65%|██████████████████████████▋              |  ETA: 2:57:02[39m

parameters at step 52000000 saved to ./RL_models_slow/cf_sac_52000000.bson
step: 52000000 reward : -0.056 length: 10


[32mProgress:  65%|██████████████████████████▊              |  ETA: 2:55:16[39m

step: 52250000 reward : 0.353 length: 8


[32mProgress:  66%|██████████████████████████▉              |  ETA: 2:53:30[39m

parameters at step 52500000 saved to ./RL_models_slow/cf_sac_52500000.bson
step: 52500000 reward : -0.002 length: 10


[32mProgress:  66%|███████████████████████████              |  ETA: 2:51:43[39m

step: 52750000 reward : 0.203 length: 7


[32mProgress:  66%|███████████████████████████▏             |  ETA: 2:49:57[39m

parameters at step 53000000 saved to ./RL_models_slow/cf_sac_53000000.bson
step: 53000000 reward : 0.027 length: 12


[32mProgress:  67%|███████████████████████████▎             |  ETA: 2:48:14[39m

step: 53250000 reward : -0.071 length: 10


[32mProgress:  67%|███████████████████████████▍             |  ETA: 2:46:36[39m

parameters at step 53500000 saved to ./RL_models_slow/cf_sac_53500000.bson
step: 53500000 reward : 0.157 length: 9


[32mProgress:  67%|███████████████████████████▌             |  ETA: 2:44:43[39m

step: 53750000 reward : -0.188 length: 10


[32mProgress:  67%|███████████████████████████▋             |  ETA: 2:42:49[39m

parameters at step 54000000 saved to ./RL_models_slow/cf_sac_54000000.bson
step: 54000000 reward : -0.008 length: 9


[32mProgress:  68%|███████████████████████████▊             |  ETA: 2:40:53[39m

step: 54250000 reward : -0.223 length: 10


[32mProgress:  68%|███████████████████████████▉             |  ETA: 2:39:13[39m

parameters at step 54500000 saved to ./RL_models_slow/cf_sac_54500000.bson


[32mProgress:  68%|███████████████████████████▉             |  ETA: 2:39:13[39m

step: 54500000 reward : -0.313 length: 13


[32mProgress:  68%|████████████████████████████             |  ETA: 2:37:25[39m

step: 54750000 reward : 0.106 length: 11


[32mProgress:  69%|████████████████████████████▏            |  ETA: 2:35:36[39m

parameters at step 55000000 saved to ./RL_models_slow/cf_sac_55000000.bson
step: 55000000 reward : 0.221 length: 9


[32mProgress:  69%|████████████████████████████▍            |  ETA: 2:33:50[39m

step: 55250000 reward : 0.139 length: 8


[32mProgress:  69%|████████████████████████████▌            |  ETA: 2:32:18[39m

parameters at step 55500000 saved to ./RL_models_slow/cf_sac_55500000.bson
step: 55500000 reward : -0.083 length: 11


[32mProgress:  70%|████████████████████████████▋            |  ETA: 2:30:42[39m

step: 55750000 reward : 0.077 length: 9


[32mProgress:  70%|████████████████████████████▊            |  ETA: 2:29:14[39m

parameters at step 56000000 saved to ./RL_models_slow/cf_sac_56000000.bson
step: 56000000 reward : -0.014 length: 9


[32mProgress:  70%|████████████████████████████▉            |  ETA: 2:27:47[39m

step: 56250000 reward : -0.036 length: 11


[32mProgress:  71%|█████████████████████████████            |  ETA: 2:26:21[39m

parameters at step 56500000 saved to ./RL_models_slow/cf_sac_56500000.bson
step: 56500000 reward : 0.04 length: 11


[32mProgress:  71%|█████████████████████████████▏           |  ETA: 2:24:55[39m

step: 56750000 reward : -0.565 length: 12


[32mProgress:  71%|█████████████████████████████▎           |  ETA: 2:23:33[39m

parameters at step 57000000 saved to ./RL_models_slow/cf_sac_57000000.bson


[32mProgress:  71%|█████████████████████████████▎           |  ETA: 2:23:33[39m

step: 57000000 reward : 0.163 length: 12


[32mProgress:  71%|█████████████████████████████▎           |  ETA: 2:22:59[39m

In [None]:
if TRAINING
    plot(episode_test_reward_hook.rewards)
end

In [None]:
if TRAINING
    plot(episode_test_step_hook.steps[1:2:end])
end

In [33]:
close_visualization(); # closes the MeshCat visualization