In [1]:
import os
import time
from datetime import datetime
import argparse
import gymnasium as gym
import numpy as np
import torch as th
import pandas as pd
import csv

from gym_pybullet_drones.utils.Logger import Logger
from gym_pybullet_drones.envs.HoverAviary import HoverAviary
from gym_pybullet_drones.envs.MultiHoverAviary import MultiHoverAviary
from gym_pybullet_drones.utils.utils import sync, str2bool
from gym_pybullet_drones.utils.enums import ObservationType, ActionType, Physics

from policies import GaussianMLPPolicy
from server import Federated_RL

DEFAULT_GUI = True
DEFAULT_RECORD_VIDEO = True
DEFAULT_OUTPUT_FOLDER = 'results'
DEFAULT_COLAB = False
DEFAULT_DYNAMICS = Physics('pyb') # pyb: Pybullet dynamics; dyn: Explicit Dynamics specified in BaseAviary.py
DEFAULT_WIND = np.array([0, 0.05, 0]) # units are in induced newtons
DEFAULT_OBS = ObservationType('kin') # 'kin' or 'rgb'
DEFAULT_ACT = ActionType('one_d_rpm') # 'rpm' or 'pid' or 'vel' or 'one_d_rpm' or 'one_d_pid'
DEFAULT_AGENTS = 4
DEFAULT_MA = False
DEFAULT_MASS = 0.037 # Actual default is 0.027

DR = False
MASS_RANGE = [0.027, 0.042] # Maximum recommended payload is 15g
WIND_RANGE = 0.005 # Inspired by literature

pybullet build time: Jun 24 2024 15:23:59


In [7]:
# Testing on classic gym control environment first
envs = [HoverAviary(obs=DEFAULT_OBS, act=DEFAULT_ACT) for _ in range(DEFAULT_AGENTS)]
#envs = [gym.make('MountainCarContinuous-v0') for _ in range(DEFAULT_AGENTS)]
env = envs[0]
# Get the state size
state_space = env.observation_space
state_size = state_space.shape[1]
# Get the action size
action_space = env.action_space
action_size = action_space.shape[1]

layers = [512, 512, 256, 128]

print("State size:", state_size)
print("Action size:", action_size)

policy = GaussianMLPPolicy(input_size=state_size, output_size=action_size, hidden_layers=layers) # Will need some smarter way to initialize the policy within the model in the future
# ASSUMING ONE ALGORITHM SO FAR. WILL IMPLEMENT GENERAL STRUCTURE FOR DIVERSIFIED ALGORITHMS LATER

#### Train the model #######################################
model = Federated_RL(policy=policy,
                    envs=envs,
                    num_agents = DEFAULT_AGENTS,
                    global_iterations=10,
                    local_iterations=100,
                    env_kwargs=[2 for _ in range(DEFAULT_AGENTS)],
                    algorithms=['FedSVRPG-M' for _ in range(DEFAULT_AGENTS)],
                    max_episode_length=5,
                    global_step_size=1e-2,
                    local_step_size=1e-2
                    )

model.learn()

[INFO] BaseAviary.__init__() loaded parameters from the drone's .urdf:
[INFO] m 0.027000, L 0.039700,
[INFO] ixx 0.000014, iyy 0.000014, izz 0.000022,
[INFO] kf 0.000000, km 0.000000,
[INFO] t2w 2.250000, max_speed_kmh 30.000000,
[INFO] gnd_eff_coeff 11.368590, prop_radius 0.023135,
[INFO] drag_xy_coeff 0.000001, drag_z_coeff 0.000001,
[INFO] dw_coeff_1 2267.180000, dw_coeff_2 0.160000, dw_coeff_3 -0.110000
[INFO] BaseAviary.__init__() loaded parameters from the drone's .urdf:
[INFO] m 0.027000, L 0.039700,
[INFO] ixx 0.000014, iyy 0.000014, izz 0.000022,
[INFO] kf 0.000000, km 0.000000,
[INFO] t2w 2.250000, max_speed_kmh 30.000000,
[INFO] gnd_eff_coeff 11.368590, prop_radius 0.023135,
[INFO] drag_xy_coeff 0.000001, drag_z_coeff 0.000001,
[INFO] dw_coeff_1 2267.180000, dw_coeff_2 0.160000, dw_coeff_3 -0.110000
[INFO] BaseAviary.__init__() loaded parameters from the drone's .urdf:
[INFO] m 0.027000, L 0.039700,
[INFO] ixx 0.000014, iyy 0.000014, izz 0.000022,
[INFO] kf 0.000000, km 0.00

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


State size: 27
Action size: 1
GLOBAL ITERATION: 0
TRAINING AGENT: 1
LOCAL ITERATION: 0

Acquiring global rollout
Total Reward: 6.677060518795864
Episode length: 5

Acquiring local rollout
Total Reward: 6.755722343693469
Episode length: 5

Importance sampling weight: 0.023920558393001556

Step size: tensor(0.0265, grad_fn=<MulBackward0>)
GLOBAL ITERATION: 0
TRAINING AGENT: 1
LOCAL ITERATION: 1

Acquiring global rollout
Total Reward: 6.7610997690005785
Episode length: 5

Acquiring local rollout
Total Reward: 6.745528357764654
Episode length: 5

Importance sampling weight: 1.7999999523162842

Step size: tensor(0.0081, grad_fn=<MulBackward0>)
GLOBAL ITERATION: 0
TRAINING AGENT: 1
LOCAL ITERATION: 2

Acquiring global rollout
Total Reward: 6.7488756053098236
Episode length: 5

Acquiring local rollout
Total Reward: 6.766497152574669
Episode length: 5

Importance sampling weight: 1.7999999523162842

Step size: tensor(0.0081, grad_fn=<MulBackward0>)
GLOBAL ITERATION: 0
TRAINING AGENT: 1
LOCAL I

KeyboardInterrupt: 

Everything below is unit testing stuff

In [26]:
main_model = model.get_model()
model_params = []
for i in main_model.named_parameters():
    print(i[0] + ': ' + str(i[1].shape))
    if 'fc' in i[0]:
        model_params.append(i[1])
print()
for i in model_params:
    print(i.size())


std_bias: torch.Size([])
fc1.weight: torch.Size([512, 27])
fc1.bias: torch.Size([512])
fc_layers.0.weight: torch.Size([512, 512])
fc_layers.0.bias: torch.Size([512])
fc_layers.1.weight: torch.Size([256, 512])
fc_layers.1.bias: torch.Size([256])
fc_layers.2.weight: torch.Size([128, 256])
fc_layers.2.bias: torch.Size([128])
fc2.weight: torch.Size([1, 128])
fc2.bias: torch.Size([1])

torch.Size([512, 27])
torch.Size([512])
torch.Size([512, 512])
torch.Size([512])
torch.Size([256, 512])
torch.Size([256])
torch.Size([128, 256])
torch.Size([128])
torch.Size([1, 128])
torch.Size([1])


In [100]:
from stable_baselines3 import SAC
SAC_model = SAC.load('/Users/kevinhan/opt/anaconda3/envs/drones/lib/python3.12/site-packages/Federated_RL/SAC_test_run/final_model.zip')
#for i in SAC_model.get_parameters()['policy']:
    #print(i + ': ' + str(SAC_model.get_parameters()['policy'][i].shape))
    #if 'actor' in i:
params = SAC_model.get_parameters()['policy']
p1 = [param for param in params if 'critic.qf0' in param]
p2 = [param for param in params if 'critic.qf1' in param]
p3 = [(params[par1] + params[par2]) / 2 for par1, par2 in zip(p1, p2)]
state_tensor = p3[0][:, :-1] 
action_tensor = p3[0][:, -1:]  
print(len(action_tensor))


32


In [9]:
from stable_baselines3 import TD3
from utils import TD3_policy_update
TD3_model = TD3.load('/Users/kevinhan/opt/anaconda3/envs/drones/lib/python3.12/site-packages/Federated_RL/TD3_test_run/final_model.zip')
policy_params = []
value_params = []
for i in TD3_model.get_parameters()['policy']:
    print(i + ': ' + str(TD3_model.get_parameters()['policy'][i].shape))
    for i in TD3_model.get_parameters()['policy']:
        if 'actor.mu' in i:
            policy_params.append(TD3_model.get_parameters()['policy'][i])
        if 'critic.qf' in i:
            value_params.append(TD3_model.get_parameters()['policy'][i])
test = TD3_policy_update(TD3_model, policy_params, value_params)


actor.mu.0.weight: torch.Size([512, 27])
actor.mu.0.bias: torch.Size([512])
actor.mu.2.weight: torch.Size([512, 512])
actor.mu.2.bias: torch.Size([512])
actor.mu.4.weight: torch.Size([256, 512])
actor.mu.4.bias: torch.Size([256])
actor.mu.6.weight: torch.Size([128, 256])
actor.mu.6.bias: torch.Size([128])
actor.mu.8.weight: torch.Size([1, 128])
actor.mu.8.bias: torch.Size([1])
actor_target.mu.0.weight: torch.Size([512, 27])
actor_target.mu.0.bias: torch.Size([512])
actor_target.mu.2.weight: torch.Size([512, 512])
actor_target.mu.2.bias: torch.Size([512])
actor_target.mu.4.weight: torch.Size([256, 512])
actor_target.mu.4.bias: torch.Size([256])
actor_target.mu.6.weight: torch.Size([128, 256])
actor_target.mu.6.bias: torch.Size([128])
actor_target.mu.8.weight: torch.Size([1, 128])
actor_target.mu.8.bias: torch.Size([1])
critic.qf0.0.weight: torch.Size([32, 28])
critic.qf0.0.bias: torch.Size([32])
critic.qf0.2.weight: torch.Size([32, 32])
critic.qf0.2.bias: torch.Size([32])
critic.qf0.4.w

In [5]:
from stable_baselines3 import PPO
PPO_params = []
PPO_model = PPO.load('/Users/kevinhan/opt/anaconda3/envs/drones/lib/python3.12/site-packages/Federated_RL/test_run/best_model.zip')
PPO_orig_params_OG = PPO_model.get_parameters()
PPO_orig_params = PPO_orig_params_OG['policy']
for i in PPO_orig_params:
    print(i + ': ' + str(PPO_model.get_parameters()['policy'][i].shape))
    if 'mlp' and 'policy' in i or 'action' in i:
        PPO_params.append(PPO_orig_params[i])


log_std: torch.Size([1])
mlp_extractor.policy_net.0.weight: torch.Size([512, 27])
mlp_extractor.policy_net.0.bias: torch.Size([512])
mlp_extractor.policy_net.2.weight: torch.Size([512, 512])
mlp_extractor.policy_net.2.bias: torch.Size([512])
mlp_extractor.policy_net.4.weight: torch.Size([256, 512])
mlp_extractor.policy_net.4.bias: torch.Size([256])
mlp_extractor.policy_net.6.weight: torch.Size([128, 256])
mlp_extractor.policy_net.6.bias: torch.Size([128])
mlp_extractor.value_net.0.weight: torch.Size([32, 27])
mlp_extractor.value_net.0.bias: torch.Size([32])
mlp_extractor.value_net.2.weight: torch.Size([32, 32])
mlp_extractor.value_net.2.bias: torch.Size([32])
action_net.weight: torch.Size([1, 128])
action_net.bias: torch.Size([1])
value_net.weight: torch.Size([1, 32])
value_net.bias: torch.Size([1])


In [74]:
new_params = []
new_params = [mp - ppop for mp, ppop in zip(model_params, PPO_params)]
print(new_params[0].size())

torch.Size([512, 27])


In [75]:
for i,j in zip([param for param in PPO_orig_params if ('mlp' in param and 'policy' in param) or 'action' in param], new_params):
    PPO_orig_params[i] = j


In [77]:
from stable_baselines3.common.env_util import make_vec_env
PPO_orig_params_OG['policy'] = PPO_orig_params
PPO_model.set_parameters(PPO_orig_params_OG, exact_match = True)

In [2]:
from utils import PPO_policy_update
new_params = PPO_policy_update(PPO_Model=PPO_model, policy_net_update=PPO_params, value_net_update = None)
print(new_params)

{'policy': OrderedDict({'log_std': tensor([0.0155]), 'mlp_extractor.policy_net.0.weight': tensor([[ 0.2983,  0.0939,  0.0582,  ..., -0.3148, -0.2203,  0.0149],
        [ 0.1385,  0.2232, -0.0835,  ..., -0.1183, -0.1324, -0.0246],
        [ 0.1947,  0.0075, -0.1628,  ..., -0.0387,  0.0062,  0.1038],
        ...,
        [-0.1292, -0.3942, -0.0594,  ...,  0.0531, -0.0747,  0.1528],
        [-0.0552,  0.2426,  0.0551,  ..., -0.0403,  0.0743, -0.1494],
        [ 0.0859,  0.0312,  0.1152,  ...,  0.1407, -0.0177, -0.1886]]), 'mlp_extractor.policy_net.0.bias': tensor([ 1.2528e-02, -9.4204e-03, -9.1568e-03,  3.4986e-03, -1.2750e-02,
        -9.8455e-05,  6.7938e-03,  1.2609e-02, -1.8640e-04,  5.3353e-03,
         8.9538e-03,  5.2348e-04,  4.0354e-03, -4.9570e-03, -1.4039e-02,
        -8.2345e-03,  1.2211e-02,  5.4926e-03,  1.4528e-02, -1.1743e-02,
         1.1887e-02,  3.5642e-03, -8.1238e-04,  6.3826e-03,  9.9020e-03,
        -1.2047e-02, -8.4845e-03, -9.1807e-03,  8.9473e-03, -1.9909e-04,
  

In [62]:
import torch.nn as nn
from collections import OrderedDict

def build_nested_module_params(module, param_dict):
    # Initialize an empty dictionary to hold the nested modules
    nested_dict = {}

    # Recursively populate the nested dictionary
    def add_params_to_module(module, param_names):
        for name, param in param_names.items():
            if '.' in name:
                # Split the name to get the module path
                parts = name.split('.')
                module_name = parts[0]
                param_name = '.'.join(parts[1:])

                # Create or get the nested module
                if module_name not in nested_dict:
                    nested_dict[module_name] = nn.ModuleDict()
                
                # If the nested module doesn't exist, create it
                if param_name:
                    if param_name not in nested_dict[module_name]:
                        nested_dict[module_name][param_name] = param
            else:
                # If there is no '.' in the name, it's a direct parameter of the module
                if name not in nested_dict:
                    nested_dict[name] = param

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the neural network class
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        # Initialize weights and biases as nn.Parameter
        self.w1 = nn.Parameter(torch.tensor(0.5))
        self.b1 = nn.Parameter(torch.tensor(0.0))
        self.w2 = nn.Parameter(torch.tensor(0.5))
        self.b2 = nn.Parameter(torch.tensor(0.0))

    def forward(self, x):
        # Compute hidden layer output
        h = self.w1 * x + self.b1
        # Compute output layer output
        y = self.w2 * h + self.b2
        return y

# Instantiate the model
model = SimpleNN()

# Define a loss function and optimizer
def criterion(y,t):
    return 0.5 * (y-t)**2


# Sample data
x = torch.tensor([1.0], requires_grad=False)  # Ensure x does not require gradients
t = torch.tensor([2.0])

# Forward pass
y = model(x)

# Compute loss
loss = criterion(y, t)


# Print out the gradients
with th.no_grad():
    for name, param in model.named_parameters():
        param.add_(2)
        print(f"{name}: {param.item()}")

# Print the computed loss
print(f"Loss: {loss.item()}")


w1: 2.5
b1: 2.0
w2: 2.5
b2: 2.0
Loss: 1.53125


In [8]:
from utils import multiply_and_sum_tensors
import torch as th
def calculate_g(
    policy_params,
    log_probs: list[th.Tensor],
    returns: th.Tensor) -> list[th.Tensor]:
        grads = []
        for pi in log_probs:
            print(pi)
            grad_tuple = th.autograd.grad(outputs=pi, inputs=policy_params, grad_outputs=th.ones_like(pi))
            grads.append(grad_tuple)
        print(grad_tuple)
        return multiply_and_sum_tensors(scalar_tensor=returns, tensor_lists=grads)

In [9]:
import torch
import torch.nn as nn
from policies import GaussianMLPPolicy
import torch as th


# Set random seed for reproducibility
torch.manual_seed(42)

# Instantiate the model
policy = GaussianMLPPolicy(input_size=2, output_size=1, hidden_layers=[1])

# Sample data
x = torch.tensor([1.0, 2.0])
x2 = torch.tensor([2.0, 3.0])
action1 = policy.get_action(x)[0]
print("action1:", action1)
mean1, std1 = policy.forward(x)
print("mean and std:", policy.forward(x))
# Compute log probabilities
log_probs = [policy.get_log_prob(x, action1), policy.get_log_prob(x2, policy.get_action(x2)[0])]

print("Log probabilities:", log_probs)
grad_list = [th.autograd.grad(outputs=pi, inputs=policy.parameters(), grad_outputs=th.ones_like(pi)) for pi in log_probs]
print(grad_list)
#grads = torch.autograd.grad(outputs=log_probs, inputs=policy.parameters(), grad_outputs=torch.ones_like(log_probs))

print(*policy.parameters())



action1: tensor([0.9623], grad_fn=<AddBackward0>)
mean and std: (tensor([0.6201], grad_fn=<ViewBackward0>), tensor([1.0505], grad_fn=<AddBackward0>))
Log probabilities: [tensor(-1.0213, grad_fn=<MulBackward0>), tensor(-1.3880, grad_fn=<MulBackward0>)]
[(tensor(-0.9519), tensor([[-0.0940, -0.1881]]), tensor([-0.0940]), tensor([[-0.5655]]), tensor([-0.6190])), (tensor(-0.9115), tensor([[-0.0209, -0.0314]]), tensor([-0.0105]), tensor([[-0.6015]]), tensor([-0.6072]))]
Parameter containing:
tensor(0., requires_grad=True) Parameter containing:
tensor([[0.5406, 0.5869]], requires_grad=True) Parameter containing:
tensor([-0.1657], requires_grad=True) Parameter containing:
tensor([[0.9186]], requires_grad=True) Parameter containing:
tensor([-0.2191], requires_grad=True)


In [40]:
from policies import GaussianMLPPolicy
testModel = GaussianMLPPolicy(input_size=27, hidden_layers=[512, 512, 256, 128], output_size=1)
paramList = [param for param in testModel.parameters()]
paramList[0] = paramList[0].unsqueeze(dim=0)
paramList[1] = paramList[1].unsqueeze(dim=0)


print(len([name for name, param in testModel.named_parameters()][2:]))

10


In [None]:
import numpy as np
summationTerm = 0
print(len(action1))
for mean, std, act in zip(mean1, std1, action1):

    summationTerm += (act-mean)**2/std**2 + 2*torch.log(std)

total = -0.5 * (summationTerm + 1 * np.log(2*np.pi))
print(total)

In [None]:
total = -0.5 * (torch.sum((action1-mean1)**2/std1**2 + 2 * torch.log(std1)) + len(action1) * np.log(2 * np.pi))

In [None]:
test1 = torch.tensor([1,1,1,1])
test2 = torch.tensor([2,2,2,2])
torch.sum(test2+test1)

In [None]:
from utils import multiply_tensors_in_place, subtract_lists_of_tensors, add_lists_of_tensors
import torch
test = [torch.tensor([1,2,3]), torch.tensor([7,8]), torch.tensor([9,10,22])]
multiply_tensors_in_place(test, scalar=1)
print(test)

[tensor([1, 2, 3]), tensor([7, 8]), tensor([ 9, 10, 22])]


In [None]:
u_r = [torch.tensor([1,1,1]), torch.tensor([1,1]), torch.tensor([1,1,1])]
otherG = [torch.tensor([1,1,1]), torch.tensor([1,1]), torch.tensor([1,1,1])]
momentum_term = subtract_lists_of_tensors(add_lists_of_tensors(u_r, otherG), test)
print(momentum_term)

[tensor([ 1,  0, -1]), tensor([-5, -6]), tensor([ -7,  -8, -20])]


In [None]:
u = [.5 * gc + (1 - .5) * mt for gc, mt in zip(test, momentum_term)]

In [None]:
print(u)

[tensor([1., 1., 1.]), tensor([1., 1.]), tensor([1., 1., 1.])]
