##### CityLearnART built using these commands
- conda create -n CityLearnART python=3.10 setuptools==66 wheel<0.40 numpy pytorch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 cudatoolkit=11.6 ipywidgets -c pytorch -c conda-forge
- conda activate CityLearnART
- pip install adversarial-robustness-toolbox[pytorch] citylearn==2.0b5 stable-baselines3[extra]==1.8.0

restrictions (painful lessons):
- CityLearn only works with PyTorch v1
- CityLearn only works with SB3<2 ref: https://github.com/intelligent-environments-lab/CityLearn/issues/63, for this reason https://gymnasium.farama.org/content/migration-guide/#environment-reset
- SB3 <2 requires gym 0.21 
- gym requires setuptools==66 and wheel<0.40 ref:https://github.com/openai/gym/issues/3200, https://github.com/openai/gym/issues/3211, https://stackoverflow.com/questions/76129688/why-is-pip-install-gym-failing-with-python-setup-py-egg-info-did-not-run-succ 
- ART requires a version of numpy later than 1.21

In [1]:
from stable_baselines3 import A2C #Approaches SAC's performance in 2021 challenge benchmark, though PPO did well in this example: https://www.aicrowd.com/showcase/going-below-1-0-score-with-stablebaseline3

from citylearn.citylearn import CityLearnEnv
from citylearn.wrappers import NormalizedObservationWrapper, StableBaselines3Wrapper, DiscreteActionWrapper
from citylearn.data import DataSet


import numpy as np

In [2]:
dataset_name = 'citylearn_challenge_2022_phase_1' #only action is electrical storage

In [3]:
schema = DataSet.get_schema(dataset_name)

In [4]:
list(schema['buildings'].keys())

['Building_1', 'Building_2', 'Building_3', 'Building_4', 'Building_5']

By removing active actions we ensure that there is only one device to control in each building, however multiple buildings will still result in the agent controlling multiple devices, so we mut remove all but one building.

In [5]:
#set env parameters
env = CityLearnEnv(schema, 
                   central_agent=True, #necessary for single agent
                   #simulation_end_time_step=1000
                   )
#wrap env
BINS = 10
env = DiscreteActionWrapper(env, 
                            bin_sizes=[{'electrical_storage':BINS}]*len(schema['buildings']) #this must be a list dicts for each action, see TabularQLearningWrapper example from https://colab.research.google.com/drive/1rZn6qLEIHMlu2iwNl1jKqvcEet8lS33A#scrollTo=1rkt9jnNuiZE
                            )
env = NormalizedObservationWrapper(env)
env = StableBaselines3Wrapper(env)
#Set RL algo parameters
policy_kwargs = dict(net_arch=[256, 256])
agent = A2C('MlpPolicy', 
            env,
            device='cuda',
            policy_kwargs=policy_kwargs)

In [6]:
env.observation_names

[['month',
  'day_type',
  'hour',
  'outdoor_dry_bulb_temperature',
  'outdoor_dry_bulb_temperature_predicted_6h',
  'outdoor_dry_bulb_temperature_predicted_12h',
  'outdoor_dry_bulb_temperature_predicted_24h',
  'outdoor_relative_humidity',
  'outdoor_relative_humidity_predicted_6h',
  'outdoor_relative_humidity_predicted_12h',
  'outdoor_relative_humidity_predicted_24h',
  'diffuse_solar_irradiance',
  'diffuse_solar_irradiance_predicted_6h',
  'diffuse_solar_irradiance_predicted_12h',
  'diffuse_solar_irradiance_predicted_24h',
  'direct_solar_irradiance',
  'direct_solar_irradiance_predicted_6h',
  'direct_solar_irradiance_predicted_12h',
  'direct_solar_irradiance_predicted_24h',
  'carbon_intensity',
  'non_shiftable_load',
  'solar_generation',
  'electrical_storage_soc',
  'net_electricity_consumption',
  'electricity_pricing',
  'electricity_pricing_predicted_6h',
  'electricity_pricing_predicted_12h',
  'electricity_pricing_predicted_24h',
  'non_shiftable_load',
  'solar_ge

Display the default reward function

In [7]:
agent.policy

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (pi_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (vf_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (policy_net): Sequential(
      (0): Linear(in_features=63, out_features=256, bias=True)
      (1): Tanh()
      (2): Linear(in_features=256, out_features=256, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=63, out_features=256, bias=True)
      (1): Tanh()
      (2): Linear(in_features=256, out_features=256, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=256, out_features=50, bias=True)
  (value_net): Linear(in_features=256, out_features=1, bias=True)
)

In [8]:
agent.policy.mlp_extractor.policy_net

Sequential(
  (0): Linear(in_features=63, out_features=256, bias=True)
  (1): Tanh()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): Tanh()
)

In [9]:
agent.policy.action_net

Linear(in_features=256, out_features=50, bias=True)

In [10]:
from copy import deepcopy
policy_net = deepcopy(agent.policy.mlp_extractor.policy_net) #copies shared net rather than referencing
policy_net.add_module('4', agent.policy.action_net)
policy_net

Sequential(
  (0): Linear(in_features=63, out_features=256, bias=True)
  (1): Tanh()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): Tanh()
  (4): Linear(in_features=256, out_features=50, bias=True)
)

Below from Bing chat:

In [11]:
import torch
import torch.nn as nn

# Define a simple model
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 3)
        self.fc3 = nn.Linear(3, 2)

    def forward(self, x):
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        x = torch.relu(x)
        x = self.fc3(x)
        return x

# Create an instance of the model
model = MyModel()

# Define some example input data
x = torch.randn(1, 10)

# Get the original outputs from the model
original_outputs = model(x)

# Create a new model that wraps the original model and applies additional operations to its outputs
class NewModel(nn.Module):
    def __init__(self, original_model):
        super(NewModel, self).__init__()
        self.original_model = original_model

    def forward(self, x):
        x = self.original_model(x)
        return x[:, 0]

new_model = NewModel(model)

# Get the modified outputs from the new model
modified_outputs = new_model(x)

# Compare the original and modified outputs
print('Original outputs:', original_outputs)
print('Modified outputs:', modified_outputs)


Original outputs: tensor([[-0.5308, -0.2107]], grad_fn=<AddmmBackward0>)
Modified outputs: tensor([-0.5308], grad_fn=<SelectBackward0>)


In [15]:
class OutputSelectionWrapper(nn.Module):
    def __init__(self, original_model, low_slice=None, high_slice=None):
            super(OutputSelectionWrapper, self).__init__()
            self.original_model = original_model
            self.low_slice=low_slice
            self.high_slice=high_slice

    def forward(self, x):
        x = self.original_model(x)
        return x[:, self.low_slice:self.high_slice]

In [13]:
help(policy_net)

Help on Sequential in module torch.nn.modules.container object:

class Sequential(torch.nn.modules.module.Module)
 |  Sequential(*args)
 |  
 |  A sequential container.
 |  Modules will be added to it in the order they are passed in the
 |  constructor. Alternatively, an ``OrderedDict`` of modules can be
 |  passed in. The ``forward()`` method of ``Sequential`` accepts any
 |  input and forwards it to the first module it contains. It then
 |  "chains" outputs to inputs sequentially for each subsequent module,
 |  finally returning the output of the last module.
 |  
 |  The value a ``Sequential`` provides over manually calling a sequence
 |  of modules is that it allows treating the whole container as a
 |  single module, such that performing a transformation on the
 |  ``Sequential`` applies to each of the modules it stores (which are
 |  each a registered submodule of the ``Sequential``).
 |  
 |  What's the difference between a ``Sequential`` and a
 |  :class:`torch.nn.ModuleList`? 

In [16]:
building1_policy_net = OutputSelectionWrapper(policy_net,0,BINS-1) 

In [17]:
building1_policy_net

OutputSelectionWrapper(
  (original_model): Sequential(
    (0): Linear(in_features=63, out_features=256, bias=True)
    (1): Tanh()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): Tanh()
    (4): Linear(in_features=256, out_features=50, bias=True)
  )
)

Initialization specifically for the PyTorch-based implementation:
https://adversarial-robustness-toolbox.readthedocs.io/en/latest/modules/estimators/classification.html#pytorch-classifier

In [29]:
from art.estimators.classification import PyTorchClassifier as classifier
from torch.nn import CrossEntropyLoss

victim_policy = classifier(
    model=building1_policy_net,
    loss=CrossEntropyLoss(), #most common func for classification
    nb_classes=BINS,
    input_shape=agent.observation_space.shape,
    device_type='gpu'
    )

In [30]:
from art.attacks.evasion import AutoProjectedGradientDescent as APGD
from art.attacks.evasion import AutoConjugateGradient as ACG

APGDatk = APGD(estimator=victim_policy)
ACGatk = ACG(estimator=victim_policy)

In [31]:
obs = env.reset()

In [32]:
agent.observation_space.shape == obs.shape

True

generate attack using initial observation to verify pipeline

Same issue as I had in sinergym, need a different branch to solve the issue ref: https://github.com/Trusted-AI/adversarial-robustness-toolbox/issues/2165

The input must have shape (n_sample, n_features), as obs is a 1d array of features with shape (20,) expand dims adds the n_samples for a shape of (1,20)

In [33]:
APGDatk.generate(np.expand_dims(obs, axis=0), verbose=False)

AutoPGD - restart:   0%|          | 0/5 [00:00<?, ?it/s]

AutoPGD - batch:   0%|          | 0/1 [00:00<?, ?it/s]

AutoPGD - iteration:   0%|          | 0/100 [00:00<?, ?it/s]

array([[-0.2330127 , -0.05000001,  0.5535534 ,  0.44644663,  0.7       ,
         0.8       ,  0.2413534 ,  0.1774436 ,  0.94661653,  0.8413534 ,
         0.5222222 ,  0.48888886,  0.34444445,  1.0888889 , -0.3       ,
        -0.2754179 ,  0.6478859 ,  0.3       ,  0.3       ,  0.4049318 ,
         0.55519414,  0.3       ,  0.77462256,  0.5797812 , -0.3       ,
        -0.3       ,  0.6901262 ,  0.33030304, -0.26969698,  0.33030304,
         0.33030304,  0.01984614, -0.12999684,  0.1779058 ,  0.68919575,
         0.33030304,  0.33030304, -0.26969698, -0.26969698,  0.3       ,
        -0.3       ,  0.3       ,  1.2888019 , -0.26969698,  0.33030304,
        -0.26969698,  0.33030304,  0.11767554, -0.3       , -0.3       ,
         0.6902224 , -0.26969698,  0.33030304, -0.26969698, -0.26969698,
        -0.14380042, -0.3       ,  0.3       ,  0.68896556,  0.33030304,
         0.33030304,  0.33030304,  0.33030304]], dtype=float32)

In [34]:
adv_obs = ACGatk.generate(np.expand_dims(obs, axis=0), verbose=False)

ACG - restart:   0%|          | 0/5 [00:00<?, ?it/s]

ACG - batch:   0%|          | 0/1 [00:00<?, ?it/s]

ACG - iteration:   0%|          | 0/100 [00:00<?, ?it/s]

ACG - batch:   0%|          | 0/1 [00:00<?, ?it/s]

ACG - iteration:   0%|          | 0/100 [00:00<?, ?it/s]

ACG - batch:   0%|          | 0/1 [00:00<?, ?it/s]

ACG - iteration:   0%|          | 0/100 [00:00<?, ?it/s]

ACG - batch:   0%|          | 0/1 [00:00<?, ?it/s]

ACG - iteration:   0%|          | 0/100 [00:00<?, ?it/s]

ACG - batch:   0%|          | 0/1 [00:00<?, ?it/s]

ACG - iteration:   0%|          | 0/100 [00:00<?, ?it/s]

In [35]:
adv_obs-obs

array([[ 0.2570156 ,  0.3       , -0.3       , -0.00201663,  0.15979874,
        -0.05045426,  0.28522855,  0.30000004, -0.08188039,  0.3       ,
        -0.20640892,  0.3       , -0.17198583, -0.28007215, -0.3       ,
        -0.3       , -0.01677883, -0.0456723 , -0.24517198, -0.3       ,
        -0.23076278,  0.26577947,  0.3       ,  0.10684139, -0.3       ,
         0.3       , -0.2844051 , -0.26629826,  0.211675  , -0.11561083,
        -0.3       ,  0.1543653 ,  0.24009891,  0.06805073, -0.10612983,
         0.29267356,  0.3       , -0.08641784, -0.00577542,  0.19289085,
        -0.0520905 ,  0.01119445,  0.30000007, -0.10570316,  0.3       ,
        -0.3       , -0.3       ,  0.27844185,  0.07663569, -0.3       ,
        -0.3       , -0.3       ,  0.04294941, -0.11961664, -0.3       ,
        -0.1809127 ,  0.2595473 ,  0.15053672,  0.10703987, -0.3       ,
         0.07359464,  0.24528712, -0.3       ]], dtype=float32)

In [36]:
agent.predict(obs, deterministic=True)

(array([2, 1, 9, 4, 8], dtype=int64), None)

In [37]:
agent.predict(adv_obs, deterministic=True)

(array([[2, 2, 9, 7, 2]], dtype=int64), None)