In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

from utilities import get_agent
from environments import OctorotorEnvSelector
from systems.pid_agent import PIDAgent
from multirotor.helpers import DataLog
from systems.long_blending import softmax

In [2]:
def get_tte(initial_pos: tuple, waypoints: np.ndarray, x: np.ndarray, y:np.ndarray, z:np.ndarray) -> np.ndarray:
        """
        Calculates the trajectory tracking error. 
        The distance between the current point and the vector between previous and next wp. Uses ||v1 x v2|| / ||v1||.

        Parameters
        ----------
        initial_pos : tuple  
            the initial position of the UAV.
        waypoints : np.ndarray 
            the reference positions at each point in time.
        x : np.ndarray 
            the x positions of the UAV.
        y : np.ndarray 
            the y positions of the UAV.
        z : np.ndarray
            the z positions of the UAV.

        Returns
        -------
        np.ndarray 
            the trajectory tracking error at each point in time.
        """
        ttes = []
        prev = initial_pos
        for i, waypoint in enumerate(waypoints):
            if i > 0 and not np.array_equal(waypoints[i-1], waypoints[i]):
                prev = waypoints[i-1]

            v1 = waypoint - prev
            v2 = np.array([x[i],y[i],z[i]]) - prev
            tte = np.linalg.norm(np.cross(v1, v2)) / (np.linalg.norm(v1) + 1e-6)
            ttes.append(tte)
                
        return np.array(ttes)

def toc(tte: np.ndarray):
    corridor = 5
    return len(tte[tte > corridor]) / 2

def completed_mission(waypoints: np.ndarray, x: np.ndarray, y: np.ndarray, z: np.ndarray, radius: float = 0.65):
        for waypoint in waypoints:
            reached_waypoint = False

            for position in zip(x,y,z):
                dist = np.linalg.norm(waypoint - position)

                if dist <= radius:
                    reached_waypoint = True
                    break

            if not reached_waypoint:
                return False
            
        return True

In [3]:
env_selector = OctorotorEnvSelector()
pid_sl_params = {'steps_u':50, 'scaling_factor':0, 'window_distance':10}
pid_sl_agent = PIDAgent()

pid_params = {'steps_u':50, 'scaling_factor':0, 'window_distance':1000}
pid_agent = PIDAgent()

study, blending_agent, blending_params = get_agent('blending@softmax@scaled', filepath='BlendingEnv')
blending_params['steps_u'] = 50
blending_params['bounding_rect_length']=1000
blending_params['window_distance']=10
blending_params['scaling_factor']=5

study, full_agent, full_params = get_agent('allwind')
full_params['steps_u'] = 50
full_params['bounding_rect_length']=1000
full_params['window_distance']=10

[I 2023-09-14 00:40:22,921] Using an existing study with name 'blending@softmax@scaled' instead of creating a new one.
[I 2023-09-14 00:40:25,210] Using an existing study with name 'allwind' instead of creating a new one.


In [4]:
all_agents = [pid_agent, pid_sl_agent, blending_agent, full_agent]
all_params = [pid_params, pid_sl_params, blending_params, full_params]
types = ["sliding", "sliding", "blending", "sliding"]
names = ['PID', 'PID SL', 'Blending', 'Full Agent']

In [5]:
nasa_wp = np.asarray([ # defines a real trajectory shown in a NASA paper
    [164.0146725649829, -0.019177722744643688, 0],
    [165.6418055187678, 111.5351051245816, 0],
    [127.3337449710234, 165.73576059611514, 0],
    [-187.28170707810204, 170.33217775914818, 10],
    [-192.03130502498243, 106.30660058604553, 10],
    [115.89920266153058, 100.8644210617058, 0],
    [114.81859536317643, 26.80923518165946, 0],
    [-21.459931490011513, 32.60508110653609, 0]
])

In [6]:
traj_len = 500 # trajectory seems to be around 500 seconds to complete
direction_changes = [1,2,3]
num_repeat = 25

In [7]:
def sample_wind():
    wind_vec = np.random.uniform(0,10,2)
    mag = np.linalg.norm(wind_vec)
    
    if mag > 10:
        wind_vec = wind_vec * (10/mag)

    return wind_vec

In [8]:
# Evaluates all saved agents with their params on a wind range
def wind_injection(agents, params, types, names, traj_len, direction_changes, num_repeat):
    results = pd.DataFrame(columns=['Agent', 'Changes', 'Total TTE', 'Mean TTE', 'Completed Mission', 'Reward', 'Time Outside Corridor'])
    for num_changes in tqdm(direction_changes):
        for agent, param, env_type, name in tqdm(zip(agents, params, types, names), total=len(agents)):
            for i in range(num_repeat):

                changes_triggered = 0
                split_time = traj_len // num_changes
                random_time = int(np.random.uniform(0,split_time))
                change_time = changes_triggered * split_time + random_time
                
                env = env_selector.get_env(env_type, param, [(0,0),(0,0),(0,0)], nasa_wp)
                done = False
                state = env.reset()
                state = np.array(state, dtype=np.float32)
                log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                                  other_vars=('reward',))
                index = 0
                while not done:
                    if index == change_time:
                        changes_triggered += 1
                        change_time = changes_triggered * split_time + random_time
                        wind_vec = sample_wind()
                        env.base_env.wind_x = wind_vec[0]
                        env.base_env.wind_y = wind_vec[1]
                        
                    action = agent.predict(state, deterministic=True)[0] 
                    state, reward, done, info = env.step(action)
                    state = np.array(state, dtype=np.float32)
                    log.log(reward=reward)
                    index += 1
                
                log.done_logging()
                traj_err = get_tte(np.array([0,0,0]), log.target.position, log.x, log.y, log.z)
                new_result = {
                    'Agent': name,
                    'Changes': num_changes,
                    'Mean TTE': np.mean(traj_err),
                    'Total TTE': np.sum(traj_err),
                    'Completed Mission': completed_mission(nasa_wp, log.x, log.y, log.z, radius=5),
                    'Reward': np.sum(log.reward),
                    'Time Outside Corridor': toc(traj_err)
                }
                results = pd.concat([results, pd.DataFrame([new_result])], ignore_index=True)

    return results

In [9]:
results = wind_injection(all_agents, all_params, types, names, traj_len, direction_changes, num_repeat)

  0%|                                                                                    | 0/3 [00:00<?, ?it/s]
  0%|                                                                                    | 0/4 [00:18<?, ?it/s][A
  0%|                                                                                    | 0/3 [00:18<?, ?it/s]


KeyboardInterrupt: 

In [None]:
results.to_csv('./data/wind_changing.csv')

In [5]:
results = pd.read_csv('./data/wind_changing.csv')

In [3]:
results

Unnamed: 0.1,Unnamed: 0,Agent,Changes,Total TTE,Mean TTE,Completed Mission,Reward,Time Outside Corridor
0,0,PID,1,1323.780828,6.214933,True,6713.8916,53.5
1,1,PID,1,1323.780828,6.214933,True,6713.8916,53.5
2,2,PID,1,1971.598059,5.257595,False,-17177.5310,23.5
3,3,PID,1,1323.780828,6.214933,True,6713.8916,53.5
4,4,PID,1,2421.128238,5.696772,False,-14201.9270,170.0
...,...,...,...,...,...,...,...,...
295,295,Full Agent,3,1283.938127,2.547496,True,7115.5474,10.0
296,296,Full Agent,3,1526.632160,2.987538,True,4758.1850,52.5
297,297,Full Agent,3,1194.769951,2.370575,True,8036.6934,5.0
298,298,Full Agent,3,1063.374760,2.017789,True,9343.0230,0.0


In [18]:
results[results['Agent']=='PID'].groupby(by=['Changes'])[['Total TTE', 'Mean TTE', 'Time Outside Corridor', 'Reward', 'Completed Mission']].mean()

Unnamed: 0_level_0,Total TTE,Mean TTE,Time Outside Corridor,Reward,Completed Mission
Changes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1866.85698,7.512132,61.24,-399.898902,0.88
2,2131.920586,7.18664,80.68,-2010.116692,0.92
3,2122.899821,7.132749,89.82,-2079.172088,0.92


In [19]:
results[results['Agent']=='PID'].groupby(by=['Changes'])[['Total TTE', 'Mean TTE', 'Time Outside Corridor', 'Reward', 'Completed Mission']].std()

Unnamed: 0_level_0,Total TTE,Mean TTE,Time Outside Corridor,Reward,Completed Mission
Changes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2114.44024,6.646919,30.742587,21861.914589,0.331662
2,2453.562,5.578039,46.321989,24727.997319,0.276887
3,1372.122008,3.890877,51.823394,14193.538891,0.276887


In [20]:
results[results['Agent']=='PID SL'].groupby(by=['Changes'])[['Total TTE', 'Mean TTE', 'Time Outside Corridor', 'Reward', 'Completed Mission']].mean()

Unnamed: 0_level_0,Total TTE,Mean TTE,Time Outside Corridor,Reward,Completed Mission
Changes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,909.250552,1.686916,30.7,5062.919637,0.6
2,1385.137163,2.395555,55.1,-1162.687113,0.52
3,1699.285873,2.647801,59.92,-4768.383345,0.44


In [21]:
results[results['Agent']=='PID SL'].groupby(by=['Changes'])[['Total TTE', 'Mean TTE', 'Time Outside Corridor', 'Reward', 'Completed Mission']].std()

Unnamed: 0_level_0,Total TTE,Mean TTE,Time Outside Corridor,Reward,Completed Mission
Changes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,516.690829,1.354105,55.600697,12035.901143,0.5
2,766.651298,1.750486,73.037662,13675.501022,0.509902
3,985.134173,1.519987,59.285798,12334.354208,0.506623


In [22]:
results[results['Agent']=='Full Agent'].groupby(by=['Changes'])[['Total TTE', 'Mean TTE', 'Time Outside Corridor', 'Reward', 'Completed Mission']].mean()

Unnamed: 0_level_0,Total TTE,Mean TTE,Time Outside Corridor,Reward,Completed Mission
Changes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1340.131091,2.515391,18.44,5674.133728,0.92
2,1440.192572,2.711602,32.98,4588.723955,0.92
3,1495.557282,2.863008,29.56,5057.162308,1.0


In [23]:
results[results['Agent']=='Full Agent'].groupby(by=['Changes'])[['Total TTE', 'Mean TTE', 'Time Outside Corridor', 'Reward', 'Completed Mission']].std()

Unnamed: 0_level_0,Total TTE,Mean TTE,Time Outside Corridor,Reward,Completed Mission
Changes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,460.491577,0.729965,24.1287,6266.763996,0.276887
2,423.804328,0.768083,40.132458,6824.512027,0.276887
3,367.683983,0.67397,21.272694,3637.285876,0.0


In [24]:
results[results['Agent']=='Blending'].groupby(by=['Changes'])[['Total TTE', 'Mean TTE', 'Time Outside Corridor', 'Reward', 'Completed Mission']].mean()

Unnamed: 0_level_0,Total TTE,Mean TTE,Time Outside Corridor,Reward,Completed Mission
Changes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,388.477714,0.619835,0.0,16018.135,1.0
2,443.032185,0.694451,0.0,15472.86904,1.0
3,735.54307,1.151539,0.0,12585.49092,1.0


In [25]:
results[results['Agent']=='Blending'].groupby(by=['Changes'])[['Total TTE', 'Mean TTE', 'Time Outside Corridor', 'Reward', 'Completed Mission']].std()

Unnamed: 0_level_0,Total TTE,Mean TTE,Time Outside Corridor,Reward,Completed Mission
Changes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,11.217058,0.014832,0.0,111.964675,0.0
2,1.878943,0.002731,0.0,17.9927,0.0
3,6.46237,0.012387,0.0,64.6954,0.0


## Statistical Tests

In [7]:
from scipy import stats

In [8]:
t_stat, p_value = stats.ttest_ind(results[results['Agent']=='Full Agent']['Mean TTE'], results[results['Agent']=='PID SL']['Mean TTE'])

In [9]:
p_value

0.02578146377996868

In [10]:
t_stat, p_value = stats.ttest_ind(results[results['Agent']=='Full Agent']['Mean TTE'], results[results['Agent']=='Blending']['Mean TTE'])
p_value

1.2506478880032633e-46

In [11]:
t_stat, p_value = stats.ttest_ind(results[results['Agent']=='PID SL']['Time Outside Corridor'], results[results['Agent']=='PID']['Time Outside Corridor'])
p_value

0.0017317478029655992