In [41]:
import flwr as fl
import torch
import torch.optim as optim
import pandas as pd
import numpy as np
# import gymnasium as env
from tensordict import TensorDict, TensorDictBase
from torchrl.envs import EnvBase, Transform
from tensordict.nn import TensorDictModule
from torchrl.data import BoundedTensorSpec, CompositeSpec, UnboundedContinuousTensorSpec
from torchrl.envs.utils import check_env_specs
from agent import PolicyNetwork
from gymnasium import Env, spaces
# from env import DroneEnv

In [18]:
def _step(self, tensordict):
    action = tensordict["action"]
    dx = int(action[0])
    dy = int(action[1])
    new_x = np.clip(self.pos[0] + dx, 0, self.map_size - 1)
    new_y = np.clip(self.pos[1] + dy, 0, self.map_size - 1)
    self.pos = np.array([new_x, new_y])
    already_covered = self.coverage_map[new_x, new_y]

    self.altitude = tensordict["params", "altitude"]
    self.battery = tensordict["params", "battery_voltage"] / 52000  # normalização do valor da bateria
    self.wind_speed = tensordict["params", "wind_speed"]
    self.wind_angle = tensordict["params", "wind_angle"] / 360

    # Recompensa adaptada
    alpha = 1.0      # incentivo por nova área
    beta = 0.5       # penalização por redundância
    gamma = 0.2      # penalização por vento
    delta = 0.2      # penalização por bateria baixa

    reward = 0.0

    if already_covered < 0.5:
        reward += alpha * (1 - already_covered)
    else:
        reward -= beta

    reward -= gamma * self.wind_speed

    if self.battery < 0.15:
        reward -= delta * (1 - self.battery)

    self.coverage_map[new_x, new_y] = 1.0
    done = self.battery <= 0

    out = TensorDict({
        "state": self._get_state(),
        "params": tensordict['params'],
        "reward": reward,
        "done": done,
    })

    return out

In [19]:
def _get_state(self):
    return torch.tensor([
        self.pos[0],
        self.pos[1],
        self.coverage_map[self.pos[0]/self.map_size, self.pos[1]/self.map_size],
        self.altitude,  # altitude
        self.battery,
        self.wind_speed,
        self.wind_angle,  # direção do vento
    ], dtype=torch.float32)

In [20]:
def _reset_env(self):
    self.coverage_map[:] = 0
    self.pos = np.random.randint(0, self.map_size, size=(2,))
    self.battery = 1.0
    self.wind_speed = 0.0
    self.wind_angle = 0
    self.altitude = 0

In [21]:
def _reset(self):
    self._reset_env()
    return self._get_state()

In [37]:
def _make_spec(self):
    # self.observation_spec = BoundedTensorSpec()
    # CompositeSpec(
        # pos=BoundedTensorSpec(
            # low=
        # )
    # )
    # self.observation_spec = CompositeSpec(observation=UnboundedContinuousTensorSpec(shape=(7,)))
    self.state_spec = CompositeSpec(BoundedTensorSpec(shape=(7,), low=0, high=1, dtype=torch.float32))
    self.action_spec = BoundedTensorSpec(shape=(2,), low=-1, high=1, dtype=torch.float32)
    self.reward_spec = UnboundedContinuousTensorSpec(shape=(), dtype=torch.float32)
    # self.done_spec = BoundedTensorSpec(shape=(), dtype=torch.bool)


In [None]:
def _set_seed(self, seed):
    rng = torch.manual_seed(seed)
    self.rng = rng


In [45]:
class DroneEnvG(Env):
    def __init__(self, map_size=50):
        super().__init__()
        self.map_size = map_size
        self.coverage_map = np.zeros((map_size, map_size))

        self.observation_space = spaces.Dict({
            "pos_x": spaces.Box(-np.inf, np.inf, shape=(), dtype=np.float32),
            "pos_y": spaces.Box(-np.inf, np.inf, shape=(), dtype=np.float32),
            "coverage": spaces.Box(0, 1.0, shape=(), dtype=np.float32),
            "battery": spaces.Box(0, np.inf, shape=(), dtype=np.float32),
            "wind_speed": spaces.Box(-np.inf, np.inf, shape=(), dtype=np.float32),
            "wind_angle": spaces.Box(0, 360, shape=(), dtype=np.int32),
            "altitude": spaces.Box(-np.inf, np.inf, shape=(), dtype=np.float32),
        })

        self.action_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32)

        self.reset()
    

    def _get_obs(self):
        return {
            "pos_x": self.pos[0],
            "pos_y": self.pos[1],
            "coverage": self.coverage_map[self.pos[0] % self.map_size, self.pos[1] % self.map_size],
            "battery": self.battery,
            "wind_speed": self.wind_speed,
            "wind_angle": self.wind_angle,
            "altitude": self.altitude,
        }

    def _get_info(self):
        return {
            "total_covered": np.count_nonzero(self.coverage_map)
        }

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.coverage_map[:] = 0
        self.pos = np.random.randint(0, self.map_size, size=(2,))
        self.battery = 1.0
        self.wind_speed = 0.0
        self.wind_angle = 0
        self.altitude = 0.0
        obs = self._get_obs()
        info = self._get_info()
        return obs, info

    def _get_reward(self):
        # normalizacao da velocidade do vento
        wind_speed_normal = self.wind_speed / 100
        # se já mapeou regiao
        already_covered = self.coverage_map[self.pos[0], self.pos[1]]

        # Recompensa adaptada
        alpha = 1.0      # incentivo por nova área
        beta = 10.0       # penalização por cair / drone no chão
        gamma = 0.3      # penalização por vento
        delta = 0.5      # penalização por bateria baixa

        reward = 0.0

        reward += alpha * (1 - already_covered)
        reward -= gamma * (wind_speed_normal * np.cos(np.radians([self.wind_angle])))

        if self.battery < 0.15:
            reward -= delta * (1 - self.battery)
        
        if self.altitude <= 0:
            reward -= beta

        return reward

    def step(self, action, params):
        dx = int(action[0])
        dy = int(action[1])
        new_x = np.clip(self.pos[0] + dx, 0, self.map_size - 1)
        new_y = np.clip(self.pos[1] + dy, 0, self.map_size - 1)
        self.pos = np.array([new_x, new_y])

        self.altitude = params["altitude"]
        self.battery = params["battery_voltage"] / 50000  # normalização do valor da bateria
        self.wind_speed = params["wind_speed"]
        self.wind_angle = params["wind_angle"]

        reward = self._get_reward()
        self.coverage_map[new_x, new_y] = 1.0
        done = (self.battery <= 0 or np.all(self.coverage_map))

        obs = self._get_obs()
        info = self._get_info()

        return obs, reward, done, False, info
    

In [51]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split

In [52]:
csv_path ='../data/sensor_dataset.csv'

In [113]:
df = pd.read_csv(csv_path, sep=';')

In [114]:
df['position_x'] = df['position_x'].apply(lambda x : (x*10000) % 100)
df['position_y'] = df['position_y'].apply(lambda x : (x*10000) % 100)
df['battery_voltage'] = df['battery_voltage'].apply(lambda x : x/52000)
df['altitude'] = df['altitude'].mask(df['altitude'] < 0, 0)

In [132]:
df['battery_current']

0         1551
1         1551
2         1551
3         1551
4         1551
          ... 
111077    1734
111078    1734
111079    1734
111080    1734
111081    1730
Name: battery_current, Length: 111082, dtype: int64

In [121]:
pos_x = df['position_x']
pos_y = df['position_y']

In [128]:
df['position_x'] = (df['position_x']-df['position_x'].min())/(df['position_x'].max()-df['position_x'].min())

In [130]:
df['position_x'] = df['position_x'] * 100

count    111082.000000
mean          0.687213
std           0.237620
min           0.000000
25%           0.527144
50%           0.788090
75%           0.879157
max           1.000000
Name: position_x, dtype: float64

In [127]:
normal_x.describe()

count    111082.000000
mean         68.721271
std          23.762004
min           0.000000
25%          52.714410
50%          78.809011
75%          87.915655
max         100.000000
Name: position_x, dtype: float64

In [110]:
df['altitude'].describe()

count    111082.000000
mean         58.468777
std          25.542943
min           0.000000
25%          59.954132
50%          69.978600
75%          70.016907
max         102.755218
Name: altitude, dtype: float64

In [106]:
df['battery_voltage'].describe()

count    111082.000000
mean      45042.238806
std        1369.206312
min       42394.000000
25%       43976.000000
50%       44779.000000
75%       45847.000000
max       51630.000000
Name: battery_voltage, dtype: float64

In [107]:
df['altitude'].describe()

count    111082.000000
mean         58.468709
std          25.543098
min          -0.065979
25%          59.954132
50%          69.978600
75%          70.016907
max         102.755218
Name: altitude, dtype: float64

In [46]:
env_t = DroneEnvG()
# check_env_specs(env_t)

In [48]:
obs, info = env_t.reset()

In [50]:
info

{'total_covered': np.int64(0)}

In [None]:
df = pd.read_csv("../../data/sensor_dataset.csv", sep=';')

df = df.astype({
    'wind_speed': float,
    'wind_angle': float,
    'battery_voltage': float,
    'altitude': float,
    'yaw': float
})

env = DroneEnv(data=df.to_dict(orient="records"))


In [3]:
len(env.data)

111082

In [None]:
trajectories = []
for uid, group in df.groupby('uid'):
    group = group.sort_values('timestamp').reset_index(drop=True)
    for i in range(len(group) - 1):
        curr = group.iloc[i]
        next_ = group.iloc[i + 1]

        state = np.array([
            curr['position_x'],
            curr['position_y'],
            0, # coverage
            curr['battery_voltage'] / 20.0,  # normalização
            curr['wind_speed'] / 20.0,
            curr['wind_angle'] / 360.0,
            curr['altitude'] / 1000.0,
            curr['yaw'] / 360.0,
        ], dtype=np.float32)

        # Próximo estado
        next_state = np.array([
            next_['position_x'] / 1000.0,
            next_['position_y'] / 1000.0,
            0.0,
            next_['battery_voltage'] / 20.0,
            next_['wind_speed'] / 20.0,
            next_['wind_angle'] / 360.0,
            next_['altitude'] / 1000.0,
            next_['yaw'] / 360.0,
        ], dtype=np.float32)

        # Ação aproximada = delta posição
        dx = (next_['position_x'] - curr['position_x']) / 10.0
        dy = (next_['position_y'] - curr['position_y']) / 10.0
        action = np.array([dx, dy], dtype=np.float32)

        # Recompensa estimada baseada nos critérios usados no ambiente
        reward = 0
        if i > 0:
            redundancy = 1 if np.linalg.norm([dx, dy]) < 1e-2 else 0
            reward += 1 - redundancy
            reward -= 0.5 * (curr['wind_speed'] / 20.0)
            if curr['battery_voltage'] < 15:
                reward -= 0.3

        log_prob = 0  # Placeholder: pode ser ignorado no fine-tuning
        done = False  # Assumimos episódio contínuo para fine-tuning

        trajectories.append((state, action, reward, done, log_prob))

In [5]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
STATE_DIM = 7
ACTION_DIM = 2

In [6]:

class DroneClient(fl.client.NumPyClient):
    def __init__(self):
        self.model = PolicyNetwork(STATE_DIM, ACTION_DIM).to(DEVICE)

    def get_parameters(self, config):
        return [val.cpu().numpy() for val in self.model.state_dict().values()]

    def set_parameters(self, parameters):
        keys = list(self.model.state_dict().keys())
        state_dict = {k: torch.tensor(v) for k, v in zip(keys, parameters)}
        self.model.load_state_dict(state_dict, strict=True)

    def fit(self, parameters, config):
        self.set_parameters(parameters)

        env = DroneEnv()
        optimizer = optim.Adam(self.model.parameters(), lr=0.001)

        for episode in range(5):  # Local episodes
            state = torch.tensor(env.reset(), dtype=torch.float32).to(DEVICE)
            done = False
            while not done:
                action, log_prob = self.model.act(state)
                next_state, reward, done, _ = env.step(action.cpu().detach().numpy())
                next_state = torch.tensor(next_state, dtype=torch.float32).to(DEVICE)

                loss = -log_prob * reward
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                state = next_state

        return self.get_parameters({}), 1, {}

    def evaluate(self, parameters, config):
        self.set_parameters(parameters)
        env = DroneEnv()

        total_reward = 0.0
        for _ in range(5):
            state = torch.tensor(env.reset(), dtype=torch.float32).to(DEVICE)
            done = False
            while not done:
                with torch.no_grad():
                    action, _ = self.model.act(state)
                next_state, reward, done, _ = env.step(action.cpu().numpy())
                total_reward += reward
                state = torch.tensor(next_state, dtype=torch.float32).to(DEVICE)

        avg_reward = total_reward / 5
        return -avg_reward, 1, {"reward": avg_reward}

In [7]:
fl.client.start_numpy_client(server_address="localhost:8080", client=DroneClient())


	Instead, use `flwr.client.start_client()` by ensuring you first call the `.to_client()` method as shown below: 
	flwr.client.start_client(
		server_address='<IP>:<PORT>',
		client=FlowerClient().to_client(), # <-- where FlowerClient is of type flwr.client.NumPyClient object
	)
	Using `start_numpy_client()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
	Instead, use the `flower-supernode` CLI command to start a SuperNode as shown below:

		$ flower-supernode --insecure --superlink='<IP>:<PORT>'

	To view all available options, run:

		$ flower-supernode --help

	Using `start_client()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        


_MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
	status = StatusCode.UNAVAILABLE
	details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:8080: Failed to connect to remote host: connect: Connection refused (111)"
	debug_error_string = "UNKNOWN:Error received from peer  {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:8080: Failed to connect to remote host: connect: Connection refused (111)", grpc_status:14}"
>