In [1]:
import os
import time
from datetime import datetime
import argparse
import gymnasium as gym
import numpy as np
import torch as th
import pandas as pd
import csv

from gym_pybullet_drones.utils.Logger import Logger
from gym_pybullet_drones.envs.HoverAviary import HoverAviary
from gym_pybullet_drones.envs.MultiHoverAviary import MultiHoverAviary
from gym_pybullet_drones.utils.utils import sync, str2bool
from gym_pybullet_drones.utils.enums import ObservationType, ActionType, Physics

from policies import GaussianMLPPolicy
from server import Federated_RL

DEFAULT_GUI = True
DEFAULT_RECORD_VIDEO = True
DEFAULT_OUTPUT_FOLDER = 'results'
DEFAULT_COLAB = False
DEFAULT_DYNAMICS = Physics('pyb') # pyb: Pybullet dynamics; dyn: Explicit Dynamics specified in BaseAviary.py
DEFAULT_WIND = np.array([0, 0.05, 0]) # units are in induced newtons
DEFAULT_OBS = ObservationType('kin') # 'kin' or 'rgb'
DEFAULT_ACT = ActionType('rpm') # 'rpm' or 'pid' or 'vel' or 'one_d_rpm' or 'one_d_pid'
DEFAULT_AGENTS = 2
DEFAULT_MA = False

DR = True
MASS_RANGE = [0.027, 0.042] # Maximum recommended payload is 15g
WIND_RANGE = 0.005 # Inspired by literature

pybullet build time: Jun 24 2024 15:23:59


In [2]:
algorithms = ['FedSVRPG-M', 'PPO', 'SAC', 'TD3']
num_agents = len(algorithms)
envs = [HoverAviary for _ in range(num_agents)]
env_kwargs = [dict(obs = DEFAULT_OBS, act = DEFAULT_ACT) for _ in range(num_agents)]
agent_names = algorithms
if DR == True:
    domain_randomizations = [DR for _ in range(num_agents)]
    DR_episode_thresholds = [.5 for _ in range(num_agents)] # Probability of DR at each episode
    DR_step_thresholds = [.3 for _ in range(num_agents)] # If DR episode, probability of wind at each step

mass_ranges = [MASS_RANGE for _ in range(num_agents)]
wind_ranges = [WIND_RANGE for _ in range(num_agents)]
env_example = HoverAviary(**env_kwargs[0])
# Get the state size
state_space = env_example.observation_space
state_size = state_space.shape[1]
# Get the action size
action_space = env_example.action_space
action_size = action_space.shape[1]

layers = [512, 512, 256, 128]
value_layers = [32, 32]
# Maintain consistent network structures
policy_kwargs = dict(activation_fn=th.nn.Tanh,
                     net_arch=dict(pi=layers, qf=value_layers))

print("State size:", state_size)
print("Action size:", action_size)

policy = GaussianMLPPolicy(input_size=state_size, output_size=action_size, hidden_layers=layers) # Will need some smarter way to initialize the policy within the model in the future
# ASSUMING ONE ALGORITHM SO FAR. WILL IMPLEMENT GENERAL STRUCTURE FOR DIVERSIFIED ALGORITHMS LATER

#### Train the model #######################################
model = Federated_RL(policy = policy,
                     envs = envs,
                     env_kwargs = env_kwargs,
                     num_agents = num_agents,
                     global_iterations = 50,
                     state_size = state_size,
                     action_size = action_size,
                     local_step_size = 1e-3,
                     policy_kwargs = policy_kwargs,
                     critic_net_aggregation = True,
                     critic_net = value_layers,
                     local_iterations = 50,
                     max_episode_length=2048,
                     agent_names = agent_names,
                     DR = domain_randomizations,
                     DR_episode_th = DR_episode_thresholds,
                     DR_step_th = DR_step_thresholds,
                     mass_ranges = mass_ranges,
                     wind_ranges = wind_ranges,
                     algorithms = algorithms)

State size: 72
Action size: 4


  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [3]:
serverModel = model.learn()

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")



Training agent FedSVRPG-M

Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.034346594379458965
GLOBAL ITERATION: 0
LOCAL ITERATION: 0

Episode Reward: 42.76640574785146

Importance sampling weight: 1.7999999523162842

GLOBAL ITERATION: 0
LOCAL ITERATION: 1

Episode Reward: 79.25004502368917

Importance sampling weight: 0.0010000000474974513

GLOBAL ITERATION: 0
LOCAL ITERATION: 2

Episode Reward: 68.0884672993511

Importance sampling weight: 1.7999999523162842

Episode has activated Domain Randomization. Wind will be applied with a probability of 0.3 with a maximum magnitude of 0.005 Newtons.
Mass parameter for next episode: 0.02934030353653905
GLOBAL ITERATION: 0
LOCAL ITERATION: 3

Episode Reward: 24.480286385966796

Importance sampling weight: 0.0010000000474974513

GLOBAL ITERATION: 0
LOCAL ITERATION: 4

Episode Reward: 22.641294199484406

Importance sampling wei

KeyboardInterrupt: 