In [42]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv

from citylearn.citylearn import CityLearnEnv
from citylearn.wrappers import NormalizedObservationWrapper, StableBaselines3Wrapper, DiscreteActionWrapper
from citylearn.data import DataSet

from captum.attr import IntegratedGradients, FeaturePermutation

import torch

import pandas as pd
import numpy as np

%matplotlib inline

todo: add test env (new seed?)

In [2]:
dataset_name = 'citylearn_challenge_2022_phase_2' #only action is electrical storage

In [3]:
schema = DataSet.get_schema(dataset_name)

In [4]:
def make_discrete_env(schema, action_bins: int = 10, bldg: list = ['Building_1'], single_agent: bool = True, seed:int = 0):
    """Because ART's attacks are designed for supervised learning they one work with ANNs with a single label or head, using multiple buildings adds an action/head for each"""
    env = CityLearnEnv(schema, 
        central_agent=single_agent, 
        buildings=bldg, 
        random_seed=seed)
    #Because ART attacks are made for classification tasks we need a discrete action space 
    env = DiscreteActionWrapper(env, bin_sizes=[{'electrical_storage':action_bins}])
    #Calendar observations are periodically normalized, everything else is min/max normalized 
    env = NormalizedObservationWrapper(env)
    #provides an interface for SB3
    env = StableBaselines3Wrapper(env)
    return env

In [5]:
first_building = list(schema['buildings'].keys())[0] #the first building from the schema's building keys
env = make_discrete_env(schema=schema, 
                        bldg=[first_building], 
                        action_bins=10)

Define RL agent

In [6]:
policy_kwargs = dict(net_arch=[256, 256])
agent = PPO('MlpPolicy', 
            env,
            device='cuda',
            policy_kwargs=policy_kwargs,
            tensorboard_log='logs/Phase1/PPO/',
            )

Train victim agent (Python: Launch Tensorboard)

In [7]:
episodes = 300
T = env.time_steps - 1
agent_name = f'default_PPO_{dataset_name}_{episodes}'

try:
    agent = agent.load(path=f"Models/Victim/{agent_name}", env=env)
    print('Model loaded from storage')
except:
    print("No saved agent found by that name")
    agent.learn(total_timesteps=int(T*episodes), tb_log_name=agent_name, progress_bar=True)
    agent.save(f"Models/Victim/{agent_name}")


Model loaded from storage


In [8]:
def format_kpis(env):
    """displays the KPIs from the evnironment's most recent timestep.
    This function can be called after an agent runs in a test env to evaluate performance"""

    kpis = env.evaluate().pivot(index='cost_function', columns='name', values='value')
    kpis = kpis.dropna(how='all')
    kpis = kpis['District']
    kpis = kpis[kpis != 0]
    return kpis

In [23]:
def eval_agent(env, agent):
    """displays the KPIs for each building and district
    ref quickstart"""
    obs_list = []
    a_list = []

    observations = env.reset()

    while not env.done:
        obs_list.append(observations)
        actions, _ = agent.predict(observations, deterministic=True)
        a_list.append(actions)
        observations, _, _, _ = env.step(actions)
    obs_list.append(observations)
    
    return format_kpis(env), np.array(obs_list), np.array(a_list)

Unperturbed agent performance

In [25]:
baseline_kpis, baseline_obs, baseline_a = eval_agent(env,agent)
display(baseline_kpis)

cost_function
annual_peak_average                      1.000000
carbon_emissions_total                   0.889286
cost_total                               0.820249
daily_one_minus_load_factor_average      1.057196
daily_peak_average                       0.912028
electricity_consumption_total            0.894755
monthly_one_minus_load_factor_average    0.986110
ramping_average                          1.171399
zero_net_energy                          1.093327
Name: District, dtype: float64

We'll prepare an index with our feature names, since CityLearn does not name them properly

In [11]:
index = env.observation_names[0]
#add second index for periodic features
for i in [1,3,5]:
    index.insert(i, index[i-1] + '_cos')
#add suffix for other periodic features
for i in [0,2,4]:
    index[i] += '_sin'

In [50]:
longest_idx = max([len(idx) for idx in index])

Define our gradient based attack:

In [12]:
def extract_actor(agent):
    """Extracts the policy network from and SB3 actor critic algorithm as a pytorch seuqential network"""
    from copy import deepcopy
    policy_net = deepcopy(agent.policy.mlp_extractor.policy_net) #copies shared net rather than referencing
    policy_net.add_module('actions', agent.policy.action_net)
    return policy_net

In [40]:
actor = extract_actor(agent)

Exclude the last observation, because there's no corresponding action

In [31]:
tensor_obs = torch.from_numpy(baseline_obs[:-1]).type(torch.FloatTensor).to('cuda')

In [15]:
ig = IntegratedGradients(actor)

In [18]:
help(ig.attribute)

Help on method attribute in module captum.attr._core.integrated_gradients:

attribute(inputs: ~TensorOrTupleOfTensorsGeneric, baselines: Union[NoneType, torch.Tensor, int, float, Tuple[Union[torch.Tensor, int, float], ...]] = None, target: Union[NoneType, int, Tuple[int, ...], torch.Tensor, List[Tuple[int, ...]], List[int]] = None, additional_forward_args: Any = None, n_steps: int = 50, method: str = 'gausslegendre', internal_batch_size: Optional[int] = None, return_convergence_delta: bool = False) -> Union[~TensorOrTupleOfTensorsGeneric, Tuple[~TensorOrTupleOfTensorsGeneric, torch.Tensor]] method of captum.attr._core.integrated_gradients.IntegratedGradients instance
    This method attributes the output of the model with given target index
    (in case it is provided, otherwise it assumes that output is a
    scalar) to the inputs of the model using the approach described above.
    
    In addition to that it also returns, if `return_convergence_delta` is
    set to True, integral ap

In [61]:
tensor_obs.requires_grad_()
attr = ig.attribute(tensor_obs, 
                    target=baseline_a.flatten().tolist())
attr = attr.detach().cpu().numpy()

In [62]:
s_ig_avg = pd.Series(np.mean(attr, axis=0), index=index)

In [63]:
s_ig_avg.sort_values(ascending=False)

electrical_storage_soc                        4.897409
hour_sin                                      1.983663
day_type_cos                                  1.180800
diffuse_solar_irradiance_predicted_24h        0.902080
electricity_pricing_predicted_12h             0.787581
diffuse_solar_irradiance                      0.510592
hour_cos                                      0.391376
outdoor_dry_bulb_temperature_predicted_6h     0.340412
outdoor_dry_bulb_temperature                  0.324295
day_type_sin                                  0.238612
outdoor_dry_bulb_temperature_predicted_12h    0.205808
outdoor_relative_humidity                     0.167938
outdoor_dry_bulb_temperature_predicted_24h    0.148229
direct_solar_irradiance_predicted_24h         0.045698
solar_generation                              0.004952
outdoor_relative_humidity_predicted_24h      -0.035277
electricity_pricing                          -0.070747
month_cos                                    -0.089793
diffuse_so

In [43]:
fp = FeaturePermutation(actor)

In [44]:
attr = fp.attribute(tensor_obs, 
                    target=baseline_a.flatten().tolist())
attr = attr.detach().cpu().numpy()

In [60]:
s_fp_avg = pd.Series(np.mean(attr, axis=0), index=index)

Permutation importances in descending order:

In [59]:
s_fp_avg.sort_values(ascending=False)

electrical_storage_soc                        3.121432
hour_sin                                      1.500013
electricity_pricing_predicted_12h             0.905066
day_type_sin                                  0.774473
day_type_cos                                  0.688497
non_shiftable_load                            0.595734
hour_cos                                      0.574666
direct_solar_irradiance_predicted_12h         0.448235
month_sin                                     0.424114
diffuse_solar_irradiance_predicted_12h        0.329114
diffuse_solar_irradiance_predicted_24h        0.301431
direct_solar_irradiance_predicted_6h          0.298853
direct_solar_irradiance_predicted_24h         0.279034
diffuse_solar_irradiance                      0.273284
electricity_pricing_predicted_24h             0.203601
electricity_pricing                           0.188364
carbon_intensity                              0.163813
outdoor_relative_humidity_predicted_24h       0.151998
diffuse_so