# Evaluate Trained Cardinal Wind Agents

In [1]:
from systems.long_multirotor_sliding_error import LongTrajEnv

from typing import Union, Iterable, List
from copy import deepcopy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from tqdm.autonotebook import tqdm, trange
import optuna

from rl import learn_rl, transform_rl_policy, evaluate_rl, PPO, load_agent
from multirotor.simulation import Multirotor
from multirotor.helpers import DataLog
from multirotor.visualize import plot_datalog
from multirotor.controller import Controller
from multirotor.trajectories import Trajectory, GuidedTrajectory
from multirotor.controller.scurves import SCurveController
from systems.multirotor_sliding_error import MultirotorTrajEnv, VP
from multirotor.controller import (
    AltController, AltRateController,
    PosController, AttController,
    VelController, RateController,
    Controller
)
from scripts.opt_multirotorenv import get_established_controller
from utilities import get_agent
import pickle

In [None]:
study, best_agent, best_params = get_agent('midwind')

In [None]:
best_params['bounding_rect_length']=200
best_params['steps_u']=50

In [None]:
best_params

In [None]:
def get_env(wind_ranges, scurve=False, **kwargs):  
    kw = dict(
        safety_radius=kwargs['safety_radius'],
        vp=VP,get_controller_fn=lambda m: get_established_controller(m),
        steps_u=kwargs['steps_u'],
        scaling_factor=kwargs['scaling_factor'],
        wind_ranges=wind_ranges,
        proximity=5, # have to get within 5m of waypoint
        seed=kwargs['seed'])
    return MultirotorTrajEnv(**kw)

In [None]:
def get_long_env(best_params, wind_range):
    env_kwargs = dict(
        safety_radius=5, # safety radius of 5m
        seed=0,
        get_controller_fn=lambda m: get_established_controller(m),
        vp = VP,
    )

    env_kwargs['steps_u'] = best_params['steps_u']
    env_kwargs['scaling_factor'] = best_params['scaling_factor']

    square_np = np.array([[100,0,0], [100,100,0], [0,100,0], [0,0,0]])
    square_traj = Trajectory(None, points=square_np, resolution=best_params['bounding_rect_length']) 
    square_wpts = square_traj.generate_trajectory(curr_pos=np.array([0,0,0]))

    env = LongTrajEnv(
        waypoints = square_wpts,
        base_env = get_env(wind_ranges = wind_range , **env_kwargs),
        initial_waypoints = square_np,
        randomize_direction=False,
        # window_distance=best_params['window_distance']
    )

    return env

In [None]:
def get_tte(initial_pos: tuple, waypoints: np.ndarray, x: np.ndarray, y:np.ndarray, z:np.ndarray) -> np.ndarray:
        """
        Calculates the trajectory tracking error. 
        The distance between the current point and the vector between previous and next wp. Uses ||v1 x v2|| / ||v1||.

        Parameters
        ----------
        initial_pos : tuple  
            the initial position of the UAV.
        waypoints : np.ndarray 
            the reference positions at each point in time.
        x : np.ndarray 
            the x positions of the UAV.
        y : np.ndarray 
            the y positions of the UAV.
        z : np.ndarray
            the z positions of the UAV.

        Returns
        -------
        np.ndarray 
            the trajectory tracking error at each point in time.
        """
        ttes = []
        prev = initial_pos
        for i, waypoint in enumerate(waypoints):
            if i > 0 and not np.array_equal(waypoints[i-1], waypoints[i]):
                prev = waypoints[i-1]

            v1 = waypoint - prev
            v2 = np.array([x[i],y[i],z[i]]) - prev
            tte = np.linalg.norm(np.cross(v1, v2)) / np.linalg.norm(v1)
            ttes.append(tte)
                
        return np.array(ttes)

In [None]:
# what ranges of wind to evaluate the agent for
all_wind_ranges = [[(0,0), (0,0), (0,0)],
                               [(0,0), (5,5), (0,0)],
                               [(0,0), (7,7), (0,0)],
                               [(0,0), (10,10), (0,0)],
                               [(0,0), (-5,-5), (0,0)],
                               [(0,0), (-7,-7), (0,0)],
                               [(0,0), (-10,-10), (0,0)],
                               [(5,5), (0,0), (0,0)],
                               [(7,7), (0,0), (0,0)],
                               [(10,10), (0,0), (0,0)],
                               [(-5,-5), (0,0), (0,0)],
                               [(-7,-7), (0,0), (0,0)],
                               [(-10,-10), (0,0), (0,0)]]

### 0 m/s wind

In [None]:
wind_range = all_wind_ranges[0]
env = get_long_env(best_params, wind_range)

In [None]:
best_params

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))
vels = []
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    # action[1] = action[1] - 1
    vels.append(action)
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

vels = np.array(vels) * best_params['scaling_factor']/2
log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.plot(log.velocity[:,0], label='vel')
plt.plot(log.target.velocity[:,0], label='target vel')
plt.plot(vels[:,0], label='agent actions')
# plt.plot(log.target.position[:,0]) add in the change in waypoint
plt.legend()

In [None]:
plt.plot(log.velocity[:,1], label='vel')
plt.plot(log.target.velocity[:,1], label='target vel')
plt.plot(vels[:,1], label='agent actions')
plt.legend()

In [None]:
np.sum(log.reward)

## North

### 5 m/s wind

In [None]:
wind_range = all_wind_ranges[1]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')

In [None]:
np.sum(log.reward)

### 7 m/s wind

In [None]:
wind_range = all_wind_ranges[2]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))

total_reward = 0
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')

In [None]:
np.sum(log.reward)

### 10 m/s wind

In [None]:
wind_range = all_wind_ranges[3]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))

total_reward = 0
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')

In [None]:
np.sum(log.reward)

## South

### 5 m/s wind

In [None]:
wind_range = all_wind_ranges[4]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')

In [None]:
np.sum(log.reward)

### 7 m/s wind

In [None]:
wind_range = all_wind_ranges[5]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))

vels = []
total_reward = 0
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    vels.append(action)
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

vels = np.array(vels)
log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.plot(log.velocity[:,1], label='vel')
plt.plot(log.target.velocity[:,1], label='target vel')
plt.plot(vels[:,1]*15, label='agent actions')
plt.legend()

In [None]:
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')

In [None]:
np.sum(log.reward)

### 10 m/s wind

In [None]:
wind_range = all_wind_ranges[6]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))

total_reward = 0
vels = []
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    vels.append(action)
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.plot(np.array(vels)[:,1]*15)

In [None]:
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')

In [None]:
np.sum(log.reward)

## East

### 5 m/s wind

In [None]:
wind_range = all_wind_ranges[7]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')

In [None]:
np.sum(log.reward)

### 7 m/s wind

In [None]:
wind_range = all_wind_ranges[8]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))

total_reward = 0
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')

In [None]:
np.sum(log.reward)

### 10 m/s wind

In [None]:
wind_range = all_wind_ranges[9]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))

total_reward = 0
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')

In [None]:
np.sum(log.reward)

## West

### 5 m/s wind

In [None]:
wind_range = all_wind_ranges[10]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

log.done_logging()

In [None]:
plt.plot(log.x, log.y)

In [None]:
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')

In [None]:
np.sum(log.reward)

### 7 m/s wind

In [None]:
wind_range = all_wind_ranges[11]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))
vels = []

total_reward = 0
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    vels.append(action)
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

log.done_logging()
vels = np.array(vels)

In [None]:
plt.title("Wind 7 m/s West")
plt.plot(log.x, log.y)

In [None]:
plt.title("Agent Actions for 7 m/s Wind West")
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')
# plt.xlim(30, 37)

In [None]:
plt.title("Agent Actions for 7 m/s Wind West")
# plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.x, log.y, c=np.linalg.norm(vels, axis=1), label='Speed')

In [None]:
plt.title("X Velocity 7 m/s Wind West")
plt.plot(log.velocity[:,0])
plt.xlabel("Timesteps")

In [None]:
np.sum(log.reward)

### 10 m/s wind

In [None]:
wind_range = all_wind_ranges[12]
env = get_long_env(best_params, wind_range)

In [None]:
done = False
state = env.reset()
log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                  other_vars=('reward',))
vels = []

total_reward = 0
while not done:
    action = best_agent.predict(state, deterministic=True)[0]
    vels.append(action)
    state, reward, done, info = env.step(action)
    log.log(reward=reward)

vels = np.array(vels)
log.done_logging()

In [None]:
plt.title("Wind 10 m/s West")
plt.plot(log.x, log.y)

In [None]:
plt.title("Agent Actions for 10 m/s Wind West")
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
plt.scatter(log.states[:,12], log.states[:,13], marker='x', color='red')
plt.ylim(-5,100)
plt.xlim(30, 37)

In [None]:
plt.title("X Velocity 10 m/s West Wind")
plt.plot(log.velocity[:,0])

In [None]:
np.sum(log.reward)

In [None]:
plt.plot(log.velocity[:,0], label='vel')
plt.plot(log.target.velocity[:,0], label='target vel')
plt.plot(vels[:,0]*(best_params['scaling_factor']/2), label='agent action')
plt.legend()

In [None]:
def calculate_safe_sliding_bound(reference_point, intersection_point, distance=5):
        reference_point = np.array(reference_point)
        intersection_point = np.array(intersection_point)
        
        # Calculate the vector from the point to the reference point
        vector_to_reference = reference_point - intersection_point
        
        # Calculate the distance between the point and the reference point
        distance_to_reference = np.linalg.norm(vector_to_reference)
        
        if distance_to_reference <= distance:
            # If the distance is within the specified range, return the reference point
            return reference_point
        else:
            # Calculate the intermediate point that is 'distance' units along the vector_to_reference
            intermediate_point = intersection_point + (distance / distance_to_reference) * vector_to_reference
            return intermediate_point

In [None]:
calculate_safe_sliding_bound([100,0,0], [10,0,0])

In [None]:
plt.plot(log.velocity[:,1], label='vel')
plt.plot(log.target.velocity[:,1], label='target vel')
plt.legend()

In [None]:
plt.title("Agent Actions for 10 m/s Wind West")
plt.scatter(log.target.position[:,0], log.target.position[:,1], c=range(len(log.target.position)))
# plt.scatter(log.x, log.y, c=log.target.velocity[:,0])
# plt.xlim(30, 37)

In [None]:
plt.scatter(log.states[:,18], log.states[:,19])