In [4]:
import os
import time
from datetime import datetime
import argparse
import gymnasium as gym
import numpy as np
import torch as th
import pandas as pd
import csv

from gym_pybullet_drones.utils.Logger import Logger
from gym_pybullet_drones.envs.HoverAviary import HoverAviary
from gym_pybullet_drones.envs.MultiHoverAviary import MultiHoverAviary
from gym_pybullet_drones.utils.utils import sync, str2bool
from gym_pybullet_drones.utils.enums import ObservationType, ActionType, Physics

from policies import GaussianMLPPolicy
from server import Federated_RL

DEFAULT_GUI = True
DEFAULT_RECORD_VIDEO = True
DEFAULT_OUTPUT_FOLDER = 'results'
DEFAULT_COLAB = False
DEFAULT_DYNAMICS = Physics('pyb_gnd_drag_dw') # pyb: Pybullet dynamics; dyn: Explicit Dynamics specified in BaseAviary.py
DEFAULT_WIND = np.array([0, 0.05, 0]) # units are in induced newtons
DEFAULT_OBS = ObservationType('kin') # 'kin' or 'rgb'
DEFAULT_ACT = ActionType('rpm') # 'rpm' or 'pid' or 'vel' or 'one_d_rpm' or 'one_d_pid'
DEFAULT_AGENTS = 2
DEFAULT_MA = False

DR = True
MASS_RANGE = [0.027, 0.042] # Maximum recommended payload is 15g
WIND_RANGE = 0.005 # Inspired by literature

In [5]:
algorithms = ['FedSVRPG-M', 'PPO', 'SAC', 'TD3']
num_agents = len(algorithms)
envs = [HoverAviary for _ in range(num_agents)]
env_kwargs = [dict(obs = DEFAULT_OBS, act = DEFAULT_ACT) for _ in range(num_agents)]
agent_names = algorithms
if DR == True:
    domain_randomizations = [DR for _ in range(num_agents)]
    DR_episode_thresholds = [.5 for _ in range(num_agents)] # Probability of DR at each episode
    DR_step_thresholds = [.3 for _ in range(num_agents)] # If DR episode, probability of wind at each step

mass_ranges = [MASS_RANGE for _ in range(num_agents)]
wind_ranges = [WIND_RANGE for _ in range(num_agents)]
env_example = HoverAviary(**env_kwargs[0])
# Get the state size
state_space = env_example.observation_space
state_size = state_space.shape[1]
# Get the action size
action_space = env_example.action_space
action_size = action_space.shape[1]

layers = [512, 512, 256, 128]
value_layers = [32, 32]
# Maintain consistent network structures
policy_kwargs = dict(activation_fn=th.nn.Tanh,
                     net_arch=dict(pi=layers, qf=value_layers))

print("State size:", state_size)
print("Action size:", action_size)

policy = GaussianMLPPolicy(input_size=state_size, output_size=action_size, hidden_layers=layers) # Will need some smarter way to initialize the policy within the model in the future
# ASSUMING ONE ALGORITHM SO FAR. WILL IMPLEMENT GENERAL STRUCTURE FOR DIVERSIFIED ALGORITHMS LATER

#### Train the model #######################################
model = Federated_RL(policy = policy,
                     envs = envs,
                     env_kwargs = env_kwargs,
                     num_agents = num_agents,
                     global_iterations = 20,
                     state_size = state_size,
                     action_size = action_size,
                     local_step_size = 1e-3,
                     policy_kwargs = policy_kwargs,
                     critic_net_aggregation = False,
                     critic_net = value_layers,
                     local_iterations = 10,
                     max_episode_length=2048,
                     agent_names = agent_names,
                     DR = domain_randomizations,
                     DR_episode_th = DR_episode_thresholds,
                     DR_step_th = DR_step_thresholds,
                     mass_ranges = mass_ranges,
                     wind_ranges = wind_ranges,
                     algorithms = algorithms)

State size: 72
Action size: 4


In [6]:
serverModel = model.learn()


Training agent FedSVRPG-M

Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.035672848348311186
GLOBAL ITERATION: 0
LOCAL ITERATION: 0

Episode Reward: 43.6539754487209

Importance sampling weight: 0.0010000000474974513

GLOBAL ITERATION: 0
LOCAL ITERATION: 1

Episode Reward: 73.76353250134098

Importance sampling weight: 0.0010000000474974513

Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.033335569654417904
GLOBAL ITERATION: 0
LOCAL ITERATION: 2

Episode Reward: 72.88044028650097

Importance sampling weight: 1.7999999523162842

Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.028158931993396037
GLOBAL ITERATION: 0
LOCAL ITERATIO



Eval num_timesteps=2048, episode_reward=22.25 +/- 0.53
Episode length: 16.20 +/- 0.40
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.02956938218724693
Eval num_timesteps=4096, episode_reward=24.64 +/- 0.00
Episode length: 18.00 +/- 0.00
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.039798587683300836
Eval num_timesteps=6144, episode_reward=26.03 +/- 0.01
Episode length: 19.00 +/- 0.00
New best mean reward!
Eval num_timesteps=8192, episode_reward=31.55 +/- 0.00
Episode length: 23.00 +/- 0.00
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.038966393228548606
Eval 



Eval num_timesteps=2048, episode_reward=41.83 +/- 0.41
Episode length: 32.20 +/- 0.40
New best mean reward!
Eval num_timesteps=4096, episode_reward=37.27 +/- 0.00
Episode length: 28.00 +/- 0.00
Eval num_timesteps=6144, episode_reward=33.55 +/- 0.01
Episode length: 25.00 +/- 0.00
Eval num_timesteps=8192, episode_reward=31.08 +/- 0.00
Episode length: 23.00 +/- 0.00
Eval num_timesteps=10240, episode_reward=29.83 +/- 0.01
Episode length: 22.00 +/- 0.00
Eval num_timesteps=12288, episode_reward=29.91 +/- 0.00
Episode length: 22.00 +/- 0.00
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03211406489473048
Eval num_timesteps=14336, episode_reward=28.69 +/- 0.00
Episode length: 21.00 +/- 0.00
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03478060344604218



Eval num_timesteps=2048, episode_reward=44.34 +/- 0.16
Episode length: 35.00 +/- 0.00
New best mean reward!
Eval num_timesteps=4096, episode_reward=43.88 +/- 0.38
Episode length: 33.80 +/- 0.40
Eval num_timesteps=6144, episode_reward=42.78 +/- 0.41
Episode length: 32.80 +/- 0.40
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.028824476545506482
Eval num_timesteps=8192, episode_reward=41.55 +/- 0.10
Episode length: 32.00 +/- 0.00
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.038730671769257326
Eval num_timesteps=10240, episode_reward=42.43 +/- 0.39
Episode length: 51.00 +/- 1.10
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03



Eval num_timesteps=2048, episode_reward=45.18 +/- 0.29
Episode length: 52.80 +/- 1.17
New best mean reward!
Eval num_timesteps=4096, episode_reward=46.57 +/- 0.04
Episode length: 53.80 +/- 0.40
New best mean reward!
Eval num_timesteps=6144, episode_reward=48.51 +/- 0.02
Episode length: 54.80 +/- 0.40
New best mean reward!
Eval num_timesteps=8192, episode_reward=50.16 +/- 0.05
Episode length: 56.80 +/- 0.40
New best mean reward!
Eval num_timesteps=10240, episode_reward=50.62 +/- 0.08
Episode length: 56.40 +/- 0.49
New best mean reward!
Eval num_timesteps=12288, episode_reward=52.80 +/- 0.05
Episode length: 56.00 +/- 0.00
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03969679666782459
Eval num_timesteps=14336, episode_reward=50.04 +/- 0.03
Episode length: 53.00 +/- 0.00
Eval num_timesteps=16384, episode_reward=44.48 +/- 0.01
Episode length: 34.0



Eval num_timesteps=2048, episode_reward=50.16 +/- 0.29
Episode length: 63.00 +/- 0.00
New best mean reward!
Eval num_timesteps=4096, episode_reward=47.13 +/- 0.13
Episode length: 58.00 +/- 0.00
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.04189890483312694
Eval num_timesteps=6144, episode_reward=44.23 +/- 0.14
Episode length: 54.20 +/- 0.98
Eval num_timesteps=8192, episode_reward=41.99 +/- 0.03
Episode length: 34.00 +/- 0.00
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03360396938427751
Eval num_timesteps=10240, episode_reward=37.99 +/- 0.39
Episode length: 28.80 +/- 0.40
Eval num_timesteps=12288, episode_reward=33.67 +/- 0.00
Episode length: 25.00 +/- 0.00
Episode has activated Domain Randomization. Wind will be applied with a probability of



Eval num_timesteps=2048, episode_reward=53.96 +/- 0.47
Episode length: 69.80 +/- 1.17
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.033696055867941666
Eval num_timesteps=4096, episode_reward=57.76 +/- 0.13
Episode length: 75.20 +/- 0.40
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.0409682422068948
Eval num_timesteps=6144, episode_reward=58.06 +/- 0.14
Episode length: 67.00 +/- 0.00
New best mean reward!
Eval num_timesteps=8192, episode_reward=59.07 +/- 0.05
Episode length: 66.00 +/- 0.00
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.02704309358573324
Eval nu



Eval num_timesteps=2048, episode_reward=83.88 +/- 5.15
Episode length: 136.80 +/- 18.77
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.027945751034272374
Eval num_timesteps=4096, episode_reward=55.07 +/- 0.56
Episode length: 82.20 +/- 1.47
Eval num_timesteps=6144, episode_reward=48.38 +/- 0.27
Episode length: 60.60 +/- 0.49
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03389824438770567
Eval num_timesteps=8192, episode_reward=44.02 +/- 0.17
Episode length: 58.20 +/- 1.17
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.02847816956137159
Eval num_timesteps=10240, episode_reward=40.63 +/- 0.06
Episode length



Eval num_timesteps=2048, episode_reward=111.17 +/- 8.37
Episode length: 156.80 +/- 30.97
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.032115428085806925
Eval num_timesteps=4096, episode_reward=109.97 +/- 15.23
Episode length: 220.80 +/- 16.65
Eval num_timesteps=6144, episode_reward=59.42 +/- 0.30
Episode length: 158.20 +/- 30.37
Eval num_timesteps=8192, episode_reward=51.14 +/- 0.15
Episode length: 108.80 +/- 13.85
Eval num_timesteps=10240, episode_reward=45.92 +/- 0.21
Episode length: 53.60 +/- 0.49
Eval num_timesteps=12288, episode_reward=41.99 +/- 0.34
Episode length: 47.80 +/- 1.60
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.0281197821087625
Eval num_timesteps=14336, episode_reward=34.79 +/- 0.48
Episode length: 25.



Eval num_timesteps=2048, episode_reward=132.28 +/- 5.86
Episode length: 164.00 +/- 11.26
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.038845973775824515
Eval num_timesteps=4096, episode_reward=114.38 +/- 17.22
Episode length: 195.60 +/- 27.75
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03929411019699246
Eval num_timesteps=6144, episode_reward=52.61 +/- 0.27
Episode length: 157.20 +/- 47.00
Eval num_timesteps=8192, episode_reward=47.62 +/- 0.08
Episode length: 67.20 +/- 1.33
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.04134340930783214
Eval num_timesteps=10240, episode_reward=45.49 +/- 0.10
Episode



Eval num_timesteps=2048, episode_reward=139.03 +/- 6.79
Episode length: 129.40 +/- 15.40
New best mean reward!
Eval num_timesteps=4096, episode_reward=140.87 +/- 6.54
Episode length: 173.40 +/- 39.57
New best mean reward!
Eval num_timesteps=6144, episode_reward=157.76 +/- 29.41
Episode length: 237.60 +/- 8.80
New best mean reward!
Eval num_timesteps=8192, episode_reward=163.45 +/- 34.15
Episode length: 215.20 +/- 41.06
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03366994913878317
Eval num_timesteps=10240, episode_reward=133.18 +/- 31.18
Episode length: 191.00 +/- 58.14
Eval num_timesteps=12288, episode_reward=73.19 +/- 0.71
Episode length: 98.00 +/- 7.67
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.039165013266999366
Ev



Eval num_timesteps=2048, episode_reward=170.55 +/- 0.72
Episode length: 122.00 +/- 0.00
New best mean reward!
Eval num_timesteps=4096, episode_reward=202.16 +/- 3.53
Episode length: 154.80 +/- 2.64
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.04058706952965551
Eval num_timesteps=6144, episode_reward=203.11 +/- 19.37
Episode length: 242.00 +/- 0.00
New best mean reward!
Eval num_timesteps=8192, episode_reward=144.30 +/- 15.77
Episode length: 202.00 +/- 49.01
Eval num_timesteps=10240, episode_reward=135.01 +/- 13.67
Episode length: 186.00 +/- 49.48
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.041211721122320355
Eval num_timesteps=12288, episode_reward=102.20 +/- 35.46
Episode length: 166.20 +/- 46.07
Eval num_timesteps=143



Eval num_timesteps=2048, episode_reward=210.25 +/- 16.19
Episode length: 149.20 +/- 8.84
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.030343992000936057
Eval num_timesteps=4096, episode_reward=218.10 +/- 8.29
Episode length: 154.20 +/- 4.31
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03757724406740829
Eval num_timesteps=6144, episode_reward=187.67 +/- 2.21
Episode length: 137.20 +/- 0.98
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03710554699754581
Eval num_timesteps=8192, episode_reward=178.02 +/- 12.15
Episode length: 178.60 +/- 38.74
Episode has activated Domain Randomizat



Eval num_timesteps=2048, episode_reward=214.17 +/- 16.32
Episode length: 151.40 +/- 8.55
New best mean reward!
Eval num_timesteps=4096, episode_reward=218.42 +/- 20.08
Episode length: 159.80 +/- 12.01
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03045438335940904
Eval num_timesteps=6144, episode_reward=249.65 +/- 17.11
Episode length: 242.00 +/- 0.00
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03489832536246885
Eval num_timesteps=8192, episode_reward=193.27 +/- 13.68
Episode length: 242.00 +/- 0.00
Eval num_timesteps=10240, episode_reward=212.73 +/- 11.66
Episode length: 242.00 +/- 0.00
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitu



Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03370525189405489
Eval num_timesteps=2048, episode_reward=379.32 +/- 23.40
Episode length: 225.40 +/- 11.62
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.034486263849011795
Eval num_timesteps=4096, episode_reward=413.88 +/- 2.13
Episode length: 242.00 +/- 0.00
New best mean reward!
Eval num_timesteps=6144, episode_reward=416.25 +/- 1.29
Episode length: 241.80 +/- 0.40
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.035311964609514246
Eval num_timesteps=8192, episode_reward=382.52 +/- 11.29
Episode length: 233.40 +/- 1.85
Episode has acti



Eval num_timesteps=2048, episode_reward=395.74 +/- 9.11
Episode length: 234.80 +/- 5.56
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03327176272609233
Eval num_timesteps=4096, episode_reward=395.88 +/- 17.82
Episode length: 235.60 +/- 8.87
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.02717819253667524
Eval num_timesteps=6144, episode_reward=343.25 +/- 21.02
Episode length: 214.20 +/- 8.13
Eval num_timesteps=8192, episode_reward=297.14 +/- 18.53
Episode length: 192.60 +/- 8.48
Eval num_timesteps=10240, episode_reward=246.29 +/- 16.46
Episode length: 165.80 +/- 7.08
Eval num_timesteps=12288, episode_reward=180.52 +/- 2.13
Episode length: 130.60 +/- 1.36
Episode has activated Domain Randomization. Wind



Eval num_timesteps=2048, episode_reward=382.57 +/- 21.18
Episode length: 228.00 +/- 10.53
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03409092001661646
Eval num_timesteps=4096, episode_reward=350.07 +/- 23.84
Episode length: 215.00 +/- 10.02
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.039318410940778106
Eval num_timesteps=6144, episode_reward=324.38 +/- 16.68
Episode length: 200.80 +/- 6.58
Eval num_timesteps=8192, episode_reward=285.52 +/- 16.43
Episode length: 178.80 +/- 7.25
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03295863188219151
Eval num_timesteps=10240, episode_reward=236.56 +/- 1.85
E



Eval num_timesteps=2048, episode_reward=348.25 +/- 17.83
Episode length: 209.00 +/- 8.37
New best mean reward!
Eval num_timesteps=4096, episode_reward=372.41 +/- 16.41
Episode length: 221.40 +/- 9.05
New best mean reward!
Eval num_timesteps=6144, episode_reward=387.42 +/- 16.89
Episode length: 232.80 +/- 7.55
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.029752390280610313
Eval num_timesteps=8192, episode_reward=371.98 +/- 4.45
Episode length: 229.20 +/- 2.32
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.0300569914621096
Eval num_timesteps=10240, episode_reward=272.07 +/- 1.47
Episode length: 186.80 +/- 2.40
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude 



Eval num_timesteps=2048, episode_reward=361.61 +/- 20.27
Episode length: 214.60 +/- 8.87
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03944756252667968
Eval num_timesteps=4096, episode_reward=355.29 +/- 22.78
Episode length: 214.40 +/- 10.87
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03501887452037218
Eval num_timesteps=6144, episode_reward=336.49 +/- 0.67
Episode length: 210.20 +/- 0.40
Eval num_timesteps=8192, episode_reward=282.17 +/- 14.48
Episode length: 183.00 +/- 6.45
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03277660430414321
Eval num_timesteps=10240, episode_reward=248.39 +/- 2.05
Epis



Eval num_timesteps=2048, episode_reward=353.26 +/- 16.52
Episode length: 210.60 +/- 8.04
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.03806569177045957
Eval num_timesteps=4096, episode_reward=329.15 +/- 11.43
Episode length: 200.40 +/- 4.54
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.0341194998935455
Eval num_timesteps=6144, episode_reward=315.23 +/- 6.53
Episode length: 193.80 +/- 1.94
Eval num_timesteps=8192, episode_reward=268.39 +/- 3.21
Episode length: 173.40 +/- 0.80
Eval num_timesteps=10240, episode_reward=245.45 +/- 15.38
Episode length: 161.40 +/- 7.14
Eval num_timesteps=12288, episode_reward=189.65 +/- 14.30
Episode length: 184.60 +/- 49.53
Episode has activated Domain Randomization. Wind will be applied with 



Eval num_timesteps=2048, episode_reward=361.83 +/- 18.24
Episode length: 214.80 +/- 7.63
New best mean reward!
Eval num_timesteps=4096, episode_reward=385.96 +/- 28.36
Episode length: 229.20 +/- 13.08
New best mean reward!
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.031217581621207222
Eval num_timesteps=6144, episode_reward=414.40 +/- 0.66
Episode length: 242.00 +/- 0.00
New best mean reward!
Eval num_timesteps=8192, episode_reward=411.85 +/- 0.67
Episode length: 242.00 +/- 0.00
Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.030676564656532224
Eval num_timesteps=10240, episode_reward=404.08 +/- 2.90
Episode length: 242.00 +/- 0.00
Eval num_timesteps=12288, episode_reward=268.31 +/- 32.15
Episode length: 205.60 +/- 28.71
Episode has activated D

In [10]:
rewards = model.get_rewards()
np.savetxt("TotalRewards_NoCritic.csv", rewards, delimiter=',')
evaluated_rewards = np.array(rewards)
print(evaluated_rewards.shape)

(80, 10)


In [11]:
FEDSVRPG_M_rewards = []
PPO_rewards = []
SAC_rewards = []
TD3_rewards = []
reward_lists = [FEDSVRPG_M_rewards, PPO_rewards, SAC_rewards, TD3_rewards]
for i, rewards in enumerate(evaluated_rewards):
    reward_lists[i % 4].append(rewards)

In [12]:
np.savetxt('FEDSVRPG_M_rewards_without_value_aggregation.csv',FEDSVRPG_M_rewards, delimiter=',')
np.savetxt('PPO_rewards_without_value_aggregation.csv', PPO_rewards, delimiter=',')
np.savetxt('SAC_rewards_without_value_aggregation.csv', SAC_rewards, delimiter=',')
np.savetxt('TD3_rewards_without_value_aggregation.csv', TD3_rewards, delimiter=',')