In [1]:
from stable_baselines3 import SAC

from citylearn.citylearn import CityLearnEnv
from citylearn.wrappers import NormalizedObservationWrapper, StableBaselines3Wrapper
from citylearn.data import DataSet

from art.estimators.classification import PyTorchClassifier as classifier
from art.attacks.evasion import AutoConjugateGradient as ACG

from torch.nn import CrossEntropyLoss, MSELoss
from torch import from_numpy

from tqdm import tqdm

from typing import List
import numpy as np

In [2]:
dataset_name = 'citylearn_challenge_2022_phase_1' #only action is electrical storage

In [3]:
schema = DataSet.get_schema(dataset_name)

In [4]:
def make_SB3_env(schema, bldg: list = ['Building_1'], single_agent: bool = True, seed:int =0):
    """Because ART's attacks are designed for supervised learning they one work with ANNs with a single label or head, using multiple buildings adds an action/head for each"""
    env = CityLearnEnv(schema, 
        central_agent=single_agent, 
        buildings=bldg, 
        random_seed=seed)
    env = NormalizedObservationWrapper(env)
    env = StableBaselines3Wrapper(env)
    return env

In [5]:
env = make_SB3_env(schema=schema)

Define vectorized training environment, seems that citylearn is not compatible: "TypeError: 'StableBaselines3Wrapper' object is not callable" is raised by 

num_cpu = cpu_count()

subproc_vec_env = DummyVecEnv([make_discrete_env(schema=schema, action_bins=20, seed=i) for i in range(num_cpu)])

Define RL agent

In [6]:
policy_kwargs = dict(net_arch=[256, 256])
agent = SAC('MlpPolicy', 
            env,
            device='cuda',
            policy_kwargs=policy_kwargs,
            tensorboard_log='logs/Phase1/SAC/',
            )

In [7]:
agent.critic

ContinuousCritic(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (qf0): Sequential(
    (0): Linear(in_features=32, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=1, bias=True)
  )
  (qf1): Sequential(
    (0): Linear(in_features=32, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=1, bias=True)
  )
)

In [8]:
agent.actor

Actor(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (latent_pi): Sequential(
    (0): Linear(in_features=31, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
  )
  (mu): Linear(in_features=256, out_features=1, bias=True)
  (log_std): Linear(in_features=256, out_features=1, bias=True)
)

Train victim agent (Python: Launch Tensorboard)

In [7]:
episodes = 20
T = env.time_steps - 1
agent_name = f'default_SAC_{dataset_name}_{episodes}'

try:
    agent = agent.load(path=f"Models/Victim/{agent_name}", env=env)
except:
    print("No saved agent found by that name")
    agent.learn(total_timesteps=int(T*episodes), tb_log_name=agent_name, progress_bar=True)
    agent.save(f"Models/Victim/{agent_name}")


In [8]:
def display_kpis(env):
    """displays the KPIs from the evnironment's most recent timestep.
    This function can be called after an agent runs in a test env to evaluate performance"""

    kpis = env.evaluate().pivot(index='cost_function', columns='name', values='value')
    kpis = kpis.dropna(how='all')
    display(kpis['District']) #the district values are all we need with a single building (cells are either repeated or NaN)

In [9]:
def eval_agent(env, agent):
    """displays the KPIs for each building and district
    ref quickstart"""
    observations = env.reset()

    while not env.done:
        actions, _ = agent.predict(observations, deterministic=True)
        observations, _, _, _ = env.step(actions)

    display_kpis(env)

Unperturbed agent performance

In [10]:
eval_agent(env,agent)

cost_function
annual_peak_average                      1.001616
carbon_emissions_total                   0.994202
cost_total                               0.969934
daily_one_minus_load_factor_average      0.993918
daily_peak_average                       1.008839
discomfort_delta_average                 0.000000
discomfort_delta_maximum                 0.000000
discomfort_delta_minimum                 0.000000
electricity_consumption_total            0.997156
monthly_one_minus_load_factor_average    0.997524
ramping_average                          1.232901
zero_net_energy                          1.046888
Name: District, dtype: float64

In [11]:
def eval_rand_attack(agent, env, eps=0.3):
    """displays the KPIs for each building and district withc random noise in the observations
    ref quickstart"""
    observations = env.reset()
    asr = 0

    while not env.done:
        noisey_obs = observations + np.random.rand(*observations.shape)*eps
        a_adv, _ = agent.predict(noisey_obs, deterministic=True)
        actions, _ = agent.predict(observations, deterministic=True)
        if a_adv!=actions: #check if the perturbation changed the agent's action
            asr+=1
        observations, _, _, _ = env.step(a_adv)

    asr/=env.time_steps
    print(f'The Adversarial success rate is: {asr}')
    display_kpis(env)

Model performance while observations are perturbed by random noise [0,1). Note that all the observation values are normalized to [0,1]

In [12]:
eval_rand_attack(agent, env, eps=0.3)

The Adversarial success rate is: 0.9998858447488584


cost_function
annual_peak_average                      1.188810
carbon_emissions_total                   1.039336
cost_total                               1.019354
daily_one_minus_load_factor_average      0.999066
daily_peak_average                       1.089598
discomfort_delta_average                 0.000000
discomfort_delta_maximum                 0.000000
discomfort_delta_minimum                 0.000000
electricity_consumption_total            1.042609
monthly_one_minus_load_factor_average    1.005882
ramping_average                          1.429770
zero_net_energy                          1.062409
Name: District, dtype: float64

Define our gradient based attack:

In [13]:
def extract_SAC_policy(agent):
    """Extracts the policy network from and SB3 actor critic algorithm as a pytorch seuqential network"""
    from copy import deepcopy
    policy_net = deepcopy(agent.actor.latent_pi) #copies shared net rather than referencing/changing the agent
    policy_net.add_module('output', agent.actor.mu)
    return policy_net

Does the victim policy need to be processed as ART regressor->BlackBoxClassifier->attack?

In [31]:
def define_attack(agent, ART_atk, loss_fn=CrossEntropyLoss(), nb_classes:int=10, **kwargs):
    """returns an ART attack function based on the input gym enviornment, SB3 Agent and ART attack class"""
    
    agent_policy = extract_SAC_policy(agent)

    #Treat the regressor as a classifier
    victim_policy = classifier(
        model=agent_policy,
        loss=loss_fn,
        nb_classes=nb_classes,
        input_shape=agent.observation_space.shape,
        )
        
    return ART_atk(victim_policy, verbose=False, **kwargs)

The black box classifier does not support cross entropy or DLR, and selecting neith results in an input type error. Maybe a differnet attack will work?

In [40]:
attack = define_attack(agent, ACG, **ACG_parameters)

In [41]:
def eval_untargeted_attack(agent, atk, time_steps:int=None, mask:list=None):
    """Evaluates an SB3 agent subject to untargeted observation perturbations generated by an ART evasion attack"""
    observations = env.reset()
    asr = 0
    failed_adv_exs = 0
    if time_steps is None:
        time_steps = env.time_steps - 1
    if mask is None:
        mask=np.ones(agent.observation_space.shape[0]) #1 for all features

    for i in tqdm(range(time_steps)):

        adv_obs = np.expand_dims(observations, axis=0) #ART atks expect a 2d array
        #would using the true label/action imporve the asr? it would hurt adversarial training: https://arxiv.org/abs/1611.01236
        adv_obs = atk.generate(adv_obs, mask=mask) #add a mask (0) for features like time where changes would be obvious
        adv_obs = np.squeeze(adv_obs) #CityLearn envs expect a 1d array
        
        a_adv, _ = agent.predict(adv_obs, deterministic=True)
        actions, _ = agent.predict(observations, deterministic=True)
        if a_adv!=actions: #check if the perturbation changed the agent's action
            asr+=1
        elif np.array_equal(adv_obs, observations): #when the victim's action is unchanged, check if an adv observation was crafted
            failed_adv_exs+=1
        observations, _, _, _ = env.step(a_adv)

        if env.done:
            break

    asr/=time_steps
    print(f'The Adversarial success rate is: {asr}')
    print(f'{failed_adv_exs} adversarial examples were produced but failed to change the victim\'s action')
    display_kpis(env)
    return asr

Before attacking the victim, we must understand what we are perturbing. So we need to see which observations/features are active in the environment:

It seems that all these observations are variable (changes to constant values would indicate perturbations), however perturbations to the date and time would be obvious to an analyst. We will mask these features in our attack to they will not be changed.

In [None]:
#This seems to be missing some observations, like building set-points, these might be added later
#env.observation_names was added in later version, but thatse break display kpis
#observation_mask = dict(zip(observation_names, np.ones(len(observation_names))))
#observation_mask['month'] = 0
#observation_mask['day_type'] = 0
#observation_mask['hour'] = 0
#mask=np.array(list(observation_mask.values()))

In [35]:
mask_time=np.ones(agent.observation_space.shape[0]) #permits attack on all features/observations
mask_time[0:3]=0 #masks the first three observations/features which correspond to the date/time

Using ZOO and Hop Skip Jump raise the same error as ACG, but during generation rather than attack definition. converting the sample to a tensor does remove the error, I suspect the issues is the input type for the classifier.

In [42]:
asr = eval_untargeted_attack(agent, attack, time_steps=20, mask=mask_time)

  0%|          | 0/20 [00:00<?, ?it/s]


RuntimeError: Expected floating point type for target with class probabilities, got Int

Evaluate agent with variable epsilon:
- Start at min value and increase in loop
- try multiple values in parallel
- return stat on the eps used, box plot?