In [1]:
from stable_baselines3 import SAC
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnNoModelImprovement
from stable_baselines3.common.monitor import Monitor

import gym
import numpy as np
from datetime import datetime

from KBMproject import ATLA
import KBMproject.utilities as utils

from citylearn.data import DataSet

Basic constants

In [2]:
DATASET_NAME = 'citylearn_challenge_2022_phase_2'
SAVE_DIR = 'Models/ATLA/'
LOG_DIR = 'logs/Phase3/ATLA/'
PERTUBATION_SCALE = 0.5
VERBOSITY = 0
#EVAL_VERBOSITY = 1
DEVICE = 'cuda'

In [3]:
BINS = 20
R_EXP = 3 #for norm distance reward
PRE_TRAINING_EPISODES = 0
EVALS = 20


PRE_TRAINED_AGENT = 'Models\Victim\SAC_citylearn_challenge_2022_phase_2_Building_6_default_rwd_MARLISA_hyperparams_500.zip'
MAX_EPISODES = 200


Define SB3 environments, note the the eval and training environments must be difference objects

In [4]:
kwargs = dict(
    schema=DataSet.get_schema(DATASET_NAME),
)

adv_env = utils.make_continuous_env(seed=0,
                        **kwargs)

adv_eval_env = utils.make_continuous_env(seed=42,
                        **kwargs)

In [5]:
T = adv_env.time_steps - 1
print(f'Each episode is {T} timesteps')

Each episode is 8759 timesteps


Define agent (could load/save pretrained agent)

In [6]:
agent = SAC.load(path=PRE_TRAINED_AGENT,
                     #env=agent_env,
                     device=DEVICE,
                     tensorboard_log=LOG_DIR,
                     verbose=VERBOSITY,
                     print_system_info=True,
                     #force_reset=False, #default is true for continued training ref: https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#stable_baselines3.ppo.PPO.load
                     )

== CURRENT SYSTEM INFO ==
- OS: Windows-10-10.0.22631-SP0 10.0.22631
- Python: 3.10.12
- Stable-Baselines3: 1.8.0
- PyTorch: 1.12.1
- GPU Enabled: True
- Numpy: 1.25.1
- Gym: 0.21.0

== SAVED MODEL SYSTEM INFO ==
- OS: Windows-10-10.0.19045-SP0 10.0.19045
- Python: 3.10.12
- Stable-Baselines3: 1.8.0
- PyTorch: 1.12.1
- GPU Enabled: True
- Numpy: 1.23.5
- Gym: 0.21.0



The number of timesteps and agent has trained is non-zero when loaded from storage, this must be added to the pause and total timesteps so training is not prematurely aborted

In [7]:
agent_n_ts = agent.num_timesteps

In [8]:
now = datetime.now()
dtg = f'{now.month}-{now.day}-{now.hour}'

Name contains RL algorithm, episodes per alternation and total episodes, followed by a the date-time with hour precision

Define adversary's reward

In [9]:
rwd = ATLA.NormScaleReward(adv_env, 
                            np.inf,
                            exp=R_EXP,
                            )

Choose features which will be perturbed. The mask below leaves the temporal features unperturbed

In [10]:
mask=np.arange(6,31) #only features 7-31 will be perturbed

Define an adv action space in [-1,1] for ATLA.BScaledSumPrevProj, which scale a maximum perturbation

In [11]:
normalized_a_space = gym.spaces.Box(low=-1*np.ones(mask.shape),
                                    high=np.ones(mask.shape),
                                    dtype='float32',)

  logger.warn(


##### Parameterize the B function
- The adversary adds a bounded perturbation to the current observation with B(s) as BScaledSum
- The max mean difference represents the largest change between two samples for each feature minus the mean difference. This will be the maximum perturbation size for our adversary. Using the max difference represents the wors case scenario we expect to encounter based on our training data. Because this is derived from the difference between samples, we subtract the mean difference so on average the inter sample change will not exceed the max recorded value. This is our boundary for the adversary's perturbation.
see bline obs analysis.ipynb in the PPO 500 results

In [12]:
max_mean_diff = np.array([0.24977164, 0.24977164, 0.34341758, 0.69515118, 0.04606484,
                        0.04608573, 0.26690566, 0.26690266, 0.2669048 , 0.26690781,
                        0.62865948, 0.62865314, 0.62865568, 0.62865948, 0.52596206,
                        0.52596487, 0.52598294, 0.52596206, 0.75557218, 0.75558416,
                        0.75558188, 0.75557218, 0.28202381, 0.61189055, 0.00253725,
                        0.47459565, 0.0052361 , 0.89720221, 0.89720221, 0.89720221,
                        0.89720221])

mean_diff = np.array([0.12511418, 0.12511418, 0.18184461, 0.35953119, 0.10637713,
                     0.10636668, 0.15978021, 0.15978171, 0.15978064, 0.15977914,
                     0.36344801, 0.36345118, 0.36344991, 0.36344801, 0.3260062 ,
                     0.3260048 , 0.32599576, 0.3260062 , 0.44802713, 0.44802114,
                     0.44802228, 0.44802713, 0.16781362, 0.36620854, 0.00152669,
                     0.31896562, 0.00326229, 0.52109586, 0.52109586, 0.52109586,
                     0.52109586])

In [13]:
B_params = dict(
    max_perturbation=np.ones(mask.shape)*mean_diff[mask]*PERTUBATION_SCALE
                  )

Define adversary's environment

In [14]:
kwargs = dict(
    #adv_reward=rwd, #use default negative agent reward
    victim=agent,
    B=ATLA.BScaledSum,
    action_space=normalized_a_space, #[-1,1] for scaled B defined above
    feature_mask=mask, 
    B_kwargs=B_params,
)
adv_eval_env = ATLA.AdversaryATLAWrapper(env=adv_eval_env, **kwargs)
adv_eval_env = Monitor(adv_eval_env)

adv_env = ATLA.AdversaryATLAWrapper(env=adv_env, **kwargs)


In [15]:
check_env(adv_env,)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (31,) + inhomogeneous part.

Define adversary

In [None]:
policy_kwargs = dict(net_arch=[256, 256])
adversary = SAC('MlpPolicy', 
            Monitor(adv_env),
            device=DEVICE,
            policy_kwargs=policy_kwargs,
            tensorboard_log=LOG_DIR,
            verbose=VERBOSITY,
            )

In [None]:
adv_name = f'{adversary.__class__.__name__} adversary {adversary.env.get_attr("B")[0].__class__.__name__} {PERTUBATION_SCALE}x mean diff {dtg}'

Define the adversary's perturbation function for the victim environment. We use a function which applies the corresponding B(s) to the adversary's prediction 

Define ATLA evaluation callbacks

In [None]:
adv_stopping = StopTrainingOnNoModelImprovement(max_no_improvement_evals=5, 
                                                min_evals=10,
                                                verbose=2, #tell me why it stopped
                                                )

adv_eval_callback = EvalCallback(adv_eval_env,
                                 eval_freq=MAX_EPISODES//EVALS*T,
                                 callback_after_eval=adv_stopping,
                                 verbose=VERBOSITY)


Conduct ATLA. Note:
- the agents are not reset between iterations, this prevents attributes like scaled exploration and learning rates from resetting.
- A callback pauses training after a number of episodes has elapsed but before the max training budget is reached (does this work better than resetting?). 

In [None]:
adversary.learn(total_timesteps=MAX_EPISODES*T,
                callback=[adv_eval_callback,
                            ATLA.AdvDistanceTensorboardCallback(),
                            ATLA.HParamCallback(),
                            ],
                tb_log_name=adv_name,
                reset_num_timesteps=False, #allows training to continue where it left off between .learn() calls
                progress_bar=True, # progress bar really slows cell execution
                log_interval=1 #start logging after first epsiode, useful for debugging
                )

Output()

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (31,) + inhomogeneous part.

Save models

In [None]:
if SAVE_DIR is not None:
    adversary.save(SAVE_DIR + adv_name)

Training episodes before early stopping

In [None]:
adversary.num_timesteps//T

200