<a href="https://colab.research.google.com/github/JimmyPlaysViolin/AI4Gov-UAV/blob/main/noisy_windy_environment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pyflyt



In [None]:
import gymnasium as gym
import numpy as np
from PyFlyt.core import Aviary
import PyFlyt.gym_envs
import matplotlib.pyplot as plt


In [None]:
!pip3 install numpy==1.25.2



In [None]:
!pip3 install stable_baselines3



In [None]:
from stable_baselines3 import DQN
from stable_baselines3 import PPO

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
class NoisyObservationEnv(gym.Env):
    def __init__(self, env, noise_scale=0.1):
        super().__init__()
        self.env = env
        self.observation_space = env.observation_space
        self.action_space = env.action_space
        self.noise_scale = noise_scale

    def step(self, action):
        obs, reward, terminated, truncated, info = self.env.step(action)
        obs = obs + np.random.normal(0, self.noise_scale, size=obs.shape) # Add gaussian noise
        return obs, reward, terminated, truncated, info

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)

In [None]:
class WindDisturbanceEnv(gym.Env):
    def __init__(self, env, force_scale=1.0, disturb_prob=0.2):
        super().__init__()
        self.env = env
        self.observation_space = env.observation_space
        self.action_space = env.action_space
        self.force_scale = force_scale
        self.disturb_prob = disturb_prob

    def step(self, action):
        # Simulates wind pushing in a random direction
        if np.random.rand() < self.disturb_prob:
            action = self.action_space.sample()  # chooses a random action for the drone to complete/ simulates the jerking of drones in wind
        return self.env.step(action)

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)

In [None]:
!pip install swig
!pip install "gymnasium[box2d]"



In [None]:
wind_levels = [0.0, 0.05, 0.1, 0.2, 0.3]
performance = []

for noise in wind_levels:
    print(f"\nTraining with noise level: {noise}")

    # create and wrap environment in varying levels
    base_env = gym.make("PyFlyt/QuadX-Hover-v4", render_mode=None)
    noisy_env = WindDisturbanceEnv(base_env, disturb_prob = noise)

    # training agent
    model = PPO("MlpPolicy", noisy_env, verbose=0)
    model.learn(total_timesteps=25000)
    model.save(f"ppo_noise_{noise:.2f}")
    # evaluating the model
    mean_reward, std_reward = evaluate_policy(model, noisy_env, n_eval_episodes=5)
    performance.append((noise, mean_reward))
    print(f"Noise: {noise}, Mean Reward: {mean_reward}")

# plotting performance vs noise results
noise_vals, rewards = zip(*performance)
plt.plot(noise_vals, rewards, marker='o')
plt.title("Noise vs. Performance")
plt.xlabel("Observation Noise Std Dev")
plt.ylabel("Mean Reward")
plt.grid(True)
plt.show()


Training with noise level: 0.0
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             



[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
Noise: 0.0, Mean Reward: -73.57070897594095

Training with noise level: 0.05
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                    

In [None]:
# # create the original environment
# base_env = gym.make("PyFlyt/QuadX-Hover-v4")  # Example environment
# # wrap it with the noisy observation wrapper
# env = WindDisturbanceEnv(base_env, disturb_prob = .2)
# obs = env.reset()

[A                             [A


In [None]:
# # now you can use the environment with the added observation noise
# obs = env.reset()
# for _ in range(100):
#   action = env.action_space.sample()
#   obs, reward, terminated, truncated, info = env.step(action)
#   print(reward)
#   if terminated or truncated:
#     obs, _ = env.reset()

In [None]:
# from stable_baselines3 import PPO
# model = PPO("MlpPolicy", env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [None]:
# model.learn(total_timesteps=100000)

[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                         

<stable_baselines3.ppo.ppo.PPO at 0x7a9b50404a10>

In [None]:
# mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
# print(f"Mean reward: {mean_reward} +/- {std_reward}")



[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
[A                             [A
Mean reward: -65.36227559765102 +/- 18.297199358127273


In [None]:
performance = []

In [None]:
#env.render()
# .3 wind disturbance = Mean reward: -65.36227559765102 +/- 18.297199358127273

In [None]:

#wind environment data
# Training with noise level: 0.0


# Noise: 0.0, Mean Reward: -73.2433608725667

# Training with noise level: 0.05



# Noise: 0.05, Mean Reward: -71.67692965114256

# Training with noise level: 0.1



# Noise: 0.1, Mean Reward: -74.75786998979747

# Training with noise level: 0.2


# Noise: 0.2, Mean Reward: -77.9028786212206

# Training with noise level: 0.3


# Noise: 0.3, Mean Reward: -88.15445534883766
