# Wind Impact on PID Performance

How robust is the RL architecture trained on only N wind to all wind? Many functions here are the same as in the other `Wind Impact` notebooks.

In [1]:
from systems.long_multirotor import LongTrajEnv

from typing import Union, Iterable, List
from copy import deepcopy
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from tqdm.autonotebook import tqdm, trange
import optuna

from rl import learn_rl, transform_rl_policy, evaluate_rl, PPO, load_agent
from multirotor.simulation import Multirotor
from multirotor.helpers import DataLog
from multirotor.visualize import plot_datalog
from multirotor.controller import Controller
from multirotor.trajectories import Trajectory, GuidedTrajectory
from multirotor.controller.scurves import SCurveController
from multirotor.coords import body_to_inertial, inertial_to_body, direction_cosine_matrix, euler_to_angular_rate
from systems.multirotor import MultirotorTrajEnv, VP
from multirotor.controller import (
    AltController, AltRateController,
    PosController, AttController,
    VelController, RateController,
    Controller
)
from scripts.opt_pidcontroller import (
    get_controller, make_disturbance_fn,
    apply_params as apply_params_pid, get_study as get_study_pid
)
from scripts.opt_multirotorenv import apply_params, get_study, get_established_controller

In [2]:
def get_env(wind_ranges, scurve=False, **kwargs):  
    kw = dict(
        safety_radius=5,
        vp=VP,get_controller_fn=lambda m: get_established_controller(m),
        steps_u=kwargs['steps_u'],
        scaling_factor=kwargs['scaling_factor'],
        wind_ranges=wind_ranges,
        proximity=5, 
        seed=0)
    return MultirotorTrajEnv(**kw)

In [4]:
log_root_path = './tensorboard/MultirotorTrajEnv/optstudy/%s/'
def get_study_agent_params(name):
    study = get_study(name)
    best_trial = study.best_trial.number
    best_agent = load_agent((log_root_path + '%03d/run_1/agent') % (name, best_trial)) #best_trial
    best_params = study.best_params
    return study, best_agent, best_params

In [5]:
study, best_agent, best_params = get_study_agent_params('n@random5m')

[I 2023-08-17 15:45:32,772] Using an existing study with name 'n@random5m' instead of creating a new one.


In [6]:
best_params

{'bounding_rect_length': 5,
 'steps_u': 21,
 'scaling_factor': 0.9500000000000001,
 'learning_rate': 4.399194315916977e-05,
 'n_epochs': 5,
 'n_steps': 144,
 'batch_size': 32,
 'training_interactions': 100000}

In [7]:
square_np = np.array([[100,0,0], [100,100,0], [0,100,0], [0,0,0]])
square_traj = Trajectory(None, points=square_np, resolution=best_params['bounding_rect_length'])
square_wpts = square_traj.generate_trajectory(curr_pos=np.array([0,0,0]))

In [143]:
def get_tte(initial_pos: tuple, waypoints: np.ndarray, x: np.ndarray, y:np.ndarray, z:np.ndarray) -> np.ndarray:
        """
        Calculates the trajectory tracking error. 
        The distance between the current point and the vector between previous and next wp. Uses ||v1 x v2|| / ||v1||.

        Parameters
        ----------
        initial_pos : tuple  
            the initial position of the UAV.
        waypoints : np.ndarray 
            the reference positions at each point in time.
        x : np.ndarray 
            the x positions of the UAV.
        y : np.ndarray 
            the y positions of the UAV.
        z : np.ndarray
            the z positions of the UAV.

        Returns
        -------
        np.ndarray 
            the trajectory tracking error at each point in time.
        """
        ttes = []
        prev = initial_pos
        for i, waypoint in enumerate(waypoints):
            if i > 0 and not np.array_equal(waypoints[i-1], waypoints[i]):
                prev = waypoints[i-1]

            v1 = waypoint - prev
            v2 = np.array([x[i],y[i],z[i]]) - prev
            tte = np.linalg.norm(np.cross(v1, v2)) / np.linalg.norm(v1)
            ttes.append(tte)
                
        return np.array(ttes)

In [144]:
def toc(tte: np.ndarray):
    corridor = 5
    return len(tte[tte > corridor]) / 100

In [145]:
def completed_mission(waypoints: np.ndarray, x: np.ndarray, y: np.ndarray, z: np.ndarray, radius: float = 0.65):
        for waypoint in waypoints:
            reached_waypoint = False

            for position in zip(x,y,z):
                dist = np.linalg.norm(waypoint - position)

                if dist <= radius:
                    reached_waypoint = True
                    break

            if not reached_waypoint:
                return False
            
        return True

In [146]:
def run_trajectory(wind_ranges: np.ndarray, agent, params):
    env = LongTrajEnv(
        waypoints = square_wpts,
        base_env = get_env(wind_ranges, **params),
        initial_waypoints = square_np,
        random_cardinal_wind=False
    )
    done = False
    state = env.reset()
    log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                      other_vars=('reward',))
    while not done:
        action = agent.predict(state, deterministic=True)[0]
        state, reward, done, info = env.step(action)
        log.log(reward=reward)

    log.done_logging()
    return log, info

In [147]:
wind_range_dict = {
    'zero': [(0,0), (0,0), (0,0)],
    'n5': [(0,0), (5,5), (0,0)],
    'n7': [(0,0), (7,7), (0,0)],
    'n10': [(0,0), (10,10), (0,0)],
    's5': [(0,0), (-5,-5), (0,0)],
    's7': [(0,0), (-7,-7), (0,0)],
    's10': [(0,0), (-10,-10), (0,0)],
    'e5': [(5,5), (0,0), (0,0)],
    'e7': [(7,7), (0,0), (0,0)],
    'e10': [(10,10), (0,0), (0,0)],
    'w5': [(-5,-5), (0,0), (0,0)],
    'w7': [(-7,-7), (0,0), (0,0)],
    'w10': [(-10,-10), (0,0), (0,0)],
    'nw5': [(-3.53553391,-3.53553391), (3.53553391,3.53553391), (0,0)],
    'nw7': [(-4.94974747,-4.94974747), (4.94974747,4.94974747), (0,0)],
    'nw10': [(-7.07106781,-7.07106781), (7.07106781,7.07106781), (0,0)],
    'sw5': [(-3.53553391,-3.53553391), (-3.53553391,-3.53553391), (0,0)],
    'sw7': [(-4.94974747,-4.94974747), (-4.94974747,-4.94974747), (0,0)],
    'sw10': [(-7.07106781,-7.07106781), (-7.07106781,-7.07106781), (0,0)],
    'ne5': [(3.53553391,3.53553391), (3.53553391,3.53553391), (0,0)],
    'ne7': [(4.94974747,4.94974747), (4.94974747,4.94974747), (0,0)],
    'ne10': [(7.07106781,7.07106781), (7.07106781,7.07106781), (0,0)],
    'se5': [(3.53553391,3.53553391), (-3.53553391,-3.53553391), (0,0)],
    'se7': [(4.94974747,4.94974747), (-4.94974747,-4.94974747), (0,0)],
    'se10': [(7.07106781,7.07106781), (-7.07106781,-7.07106781), (0,0)],
}

In [148]:
wind_results = pd.DataFrame(columns=['Wind', 'Total TTE', 'Mean TTE', 'Completed Mission', 'Reward', 'Time Outside Corridor'])

In [149]:
def run_wind_sweep(results, wind_dict, agent, params):
    for wind in tqdm(wind_dict.keys()):
        log, info = run_trajectory(wind_dict[wind], agent, params)
        traj_err = get_tte(np.array([0,0,0]), log.states[:,12:], log.x, log.y, log.z)
        new_result = {
            'Wind': wind,
            'Mean TTE': np.mean(traj_err),
            'Total TTE': np.sum(traj_err),
            'Completed Mission': completed_mission(square_wpts, log.x, log.y, log.z, radius=5),
            'Reward': np.sum(log.reward),
            'Time Outside Corridor': toc(traj_err)
        }
        results = pd.concat([results, pd.DataFrame([new_result])], ignore_index=True)

    return results

In [150]:
wind_results = run_wind_sweep(wind_results, wind_range_dict, best_agent, best_params)

  0%|          | 0/25 [00:00<?, ?it/s]

In [151]:
# wind_results.to_csv('n_wind.csv')

In [8]:
wind_results = pd.read_csv('./data/n_wind.csv')

In [9]:
wind_results

Unnamed: 0.1,Unnamed: 0,Wind,Total TTE,Mean TTE,Completed Mission,Reward,Time Outside Corridor
0,0,zero,1084.71105,1.347467,True,5974.0,0.02
1,1,n5,918.059637,1.169503,True,6000.0,0.0
2,2,n7,1006.376032,1.293542,True,6000.0,0.0
3,3,n10,2401.420841,2.56015,True,3413.0,1.23
4,4,s5,1705.118274,1.928867,True,4478.0,0.73
5,5,s7,2415.365889,2.537149,True,3183.0,1.34
6,6,s10,5157.495353,2.54189,False,-1279.0,0.61
7,7,e5,2679.461705,2.551868,True,5643.0,0.17
8,8,e7,5199.839365,3.722147,True,3012.0,1.44
9,9,e10,10217.492486,12.536801,False,-14841.0,7.08


In [154]:
wind_results[['Mean TTE', 'Time Outside Corridor', 'Completed Mission']].mean()

Mean TTE                 2.5473
Time Outside Corridor    0.9148
Completed Mission        0.7600
dtype: float64

In [155]:
wind_results[['Mean TTE', 'Time Outside Corridor', 'Completed Mission']].std()

Mean TTE                 2.247544
Time Outside Corridor    2.043113
Completed Mission        0.435890
dtype: float64