# Wind Impact on Blending Agent Performance

How robust is the RL architecture trained on to blend other RL policies to all wind?

In [12]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

from utilities import get_agent
from environments import OctorotorEnvSelector
from systems.pid_agent import PIDAgent
from multirotor.helpers import DataLog
from systems.long_blending import softmax

In [13]:
study, best_agent, best_params = get_agent('blending@softmax@scaled', filepath='BlendingEnv')

best_params['steps_u']=50
best_params['scaling_factor']=5
best_params['bounding_rect_length']=200
best_params['window_distance']=10

[I 2023-09-13 21:54:45,277] Using an existing study with name 'blending@softmax@scaled' instead of creating a new one.


In [14]:
env_selector = OctorotorEnvSelector()
square_wpts = np.array([[100,0,0], [100,100,0], [0,100,0], [0,0,0]])

In [15]:
def run_trajectory(env_selector, wind_ranges: np.ndarray, agent, params):
    env = env_selector.get_env("blending", params, wind_ranges, square_wpts)
    done = False
    state = np.array(env.reset(), dtype=np.float32)
    log = DataLog(env.base_env.vehicle, env.base_env.ctrl,
                      other_vars=('reward',))
    weights = []
    while not done:
        action = agent.predict(state, deterministic=True)[0]
        state, reward, done, info = env.step(action)
        weights.append(softmax(action*5))
        
        state = np.array(state, dtype=np.float32)
        log.log(reward=reward)

    log.done_logging()
    return log, info, np.mean(weights, axis=0), np.std(weights, axis=0)

In [16]:
average_weights = pd.DataFrame(columns=['Wind Speed', 'PID', 'PID Std', 'Mid', 'Mid Std', 'High', 'High Std'])

In [17]:
def run_wind_sweep(results, agent, params):
    for i in tqdm(range(10)):
        log, info, avg_weights, std_weights = run_trajectory(env_selector,[(0,0),(-(i+1),-(i+1)),(0,0)], agent, params)
        new_result = {
            'Wind Speed': i+1,
            'PID': avg_weights[0],
            'PID Std': std_weights[0],
            'Mid': avg_weights[1],
            'Mid Std': std_weights[1],
            'High': avg_weights[2],
            'High Std': std_weights[2],
        }
        results = pd.concat([results, pd.DataFrame([new_result])], ignore_index=True)

    return results

In [18]:
average_weights = run_wind_sweep(average_weights, best_agent, best_params)

  0%|                                                                                | 0/10 [00:00<?, ?it/s][I 2023-09-13 21:54:45,426] Using an existing study with name 'highwind' instead of creating a new one.
[I 2023-09-13 21:54:45,527] Using an existing study with name 'midwind' instead of creating a new one.
 10%|███████▏                                                                | 1/10 [00:05<00:49,  5.52s/it][I 2023-09-13 21:54:50,933] Using an existing study with name 'highwind' instead of creating a new one.
[I 2023-09-13 21:54:51,003] Using an existing study with name 'midwind' instead of creating a new one.
 20%|██████████████▍                                                         | 2/10 [00:11<00:44,  5.53s/it][I 2023-09-13 21:54:56,478] Using an existing study with name 'highwind' instead of creating a new one.
[I 2023-09-13 21:54:56,572] Using an existing study with name 'midwind' instead of creating a new one.
 30%|█████████████████████▌                            

In [19]:
average_weights.to_csv('./data/weights_scaled.csv')

In [20]:
average_weights = pd.read_csv('./data/weights_scaled.csv') # or read cardinal_wind.csv for the Cardinal High agent

In [21]:
average_weights

Unnamed: 0.1,Unnamed: 0,Wind Speed,PID,PID Std,Mid,Mid Std,High,High Std
0,0,1,0.297354,0.033309,0.357713,0.016013,0.344933,0.022447
1,1,2,0.298867,0.033471,0.355906,0.015982,0.345227,0.022602
2,2,3,0.301358,0.033232,0.352949,0.015546,0.345693,0.022816
3,3,4,0.304994,0.033677,0.348575,0.015625,0.346432,0.023001
4,4,5,0.309644,0.033575,0.34323,0.015192,0.347127,0.023194
5,5,6,0.315325,0.033531,0.336742,0.014953,0.347933,0.023372
6,6,7,0.321818,0.033367,0.329464,0.014629,0.348718,0.023463
7,7,8,0.328924,0.032585,0.321321,0.013939,0.349755,0.023289
8,8,9,0.336641,0.031924,0.312735,0.013529,0.350625,0.023051
9,9,10,0.345007,0.030424,0.303748,0.012922,0.351244,0.022306


In [22]:
softmax((np.array([0,0,0.25]))*5)

array([0.18213798, 0.18213798, 0.63572403])