In [1]:
from stable_baselines3 import PPO, SAC
from stable_baselines3.common.env_checker import check_env

import gym
import numpy as np
import pandas as pd

from KBMproject import ATLA
import KBMproject.utilities as utils

from citylearn.data import DataSet

Basic constants

In [2]:
DATASET_NAME = 'citylearn_challenge_2022_phase_2'
SAVE_DIR = 'PPO agent 100 alts over 1000+200 2-3-21 results' + '/'
TEST_NAME = 'SAC adversary BScaledSum mean diff 1-15-15'
VERBOSITY = 0
DEVICE = 'cuda'
BINS = 20
AGENT = 'Models/ATLA/PPO agent 100 alts over 1000+200 2-3-21.zip'
ADVERSARY = 'Models/Adversary/SAC adversary BScaledSum mean diff 1-15-15.zip'

Define SB3 environments, note the the eval and training environments must be difference objects

In [3]:
kwargs = dict(
    schema=DataSet.get_schema(DATASET_NAME),
    action_bins=BINS,
    seed=42,
    T=None #this was supposed to make evaluations shorter, but does not work... never passed it in lol
)
agent_env = utils.make_discrete_env(**kwargs,)

adv_env = utils.make_discrete_env(**kwargs)

In [4]:
T = agent_env.time_steps - 1
print(f'Each episode is {T} timesteps')

Each episode is 8759 timesteps


Define agent (could load/save pretrained agent)

In [5]:
agent = PPO.load(path=AGENT,
                    env=agent_env,
                    device=DEVICE,
                    verbose=VERBOSITY,
                    print_system_info=True,
                    )

== CURRENT SYSTEM INFO ==
- OS: Windows-10-10.0.22631-SP0 10.0.22631
- Python: 3.10.12
- Stable-Baselines3: 1.8.0
- PyTorch: 1.12.1
- GPU Enabled: True
- Numpy: 1.25.1
- Gym: 0.21.0

== SAVED MODEL SYSTEM INFO ==
- OS: Windows-10-10.0.22631-SP0 10.0.22631
- Python: 3.10.12
- Stable-Baselines3: 1.8.0
- PyTorch: 1.12.1
- GPU Enabled: True
- Numpy: 1.25.1
- Gym: 0.21.0



Choose features which will be perturbed. The mask below leaves the temporal features unperturbed

In [6]:
mask=np.arange(6,31) #only features 7-31 will be perturbed

Define an adv action space in [-1,1] for ATLA.BScaledSumPrevProj, which scale a maximum perturbation

In [7]:
normalized_a_space = gym.spaces.Box(low=-1*np.ones(mask.shape),
                                    high=np.ones(mask.shape),
                                    dtype='float32',)

  logger.warn(


##### Parameterize the B function
- The adversary adds a bounded perturbation to the current observation with B(s) as BScaledSum
- The max mean difference represents the largest change between two samples for each feature minus the mean difference. This will be the maximum perturbation size for our adversary. Using the max difference represents the wors case scenario we expect to encounter based on our training data. Because this is derived from the difference between samples, we subtract the mean difference so on average the inter sample change will not exceed the max recorded value. This is our boundary for the adversary's perturbation.
see bline obs analysis.ipynb in the PPO 500 results

In [8]:
max_mean_diff = np.array([0.24977164, 0.24977164, 0.34341758, 0.69515118, 0.04606484,
                        0.04608573, 0.26690566, 0.26690266, 0.2669048 , 0.26690781,
                        0.62865948, 0.62865314, 0.62865568, 0.62865948, 0.52596206,
                        0.52596487, 0.52598294, 0.52596206, 0.75557218, 0.75558416,
                        0.75558188, 0.75557218, 0.28202381, 0.61189055, 0.00253725,
                        0.47459565, 0.0052361 , 0.89720221, 0.89720221, 0.89720221,
                        0.89720221])

mean_diff = np.array([0.12511418, 0.12511418, 0.18184461, 0.35953119, 0.10637713,
                     0.10636668, 0.15978021, 0.15978171, 0.15978064, 0.15977914,
                     0.36344801, 0.36345118, 0.36344991, 0.36344801, 0.3260062 ,
                     0.3260048 , 0.32599576, 0.3260062 , 0.44802713, 0.44802114,
                     0.44802228, 0.44802713, 0.16781362, 0.36620854, 0.00152669,
                     0.31896562, 0.00326229, 0.52109586, 0.52109586, 0.52109586,
                     0.52109586])

Max perturbation reduced to mean diff devided by 2

In [9]:
B_params = dict(
    #clip_bound=np.ones(agent_env.observation_space.shape)*0.33,
    max_perturbation=np.ones(mask.shape)*mean_diff[mask]/2
                  )

Define adversary's environment

In [10]:
kwargs = dict(
    #adv_reward=rwd, #use default negative agent reward
    victim=agent,
    B=ATLA.BScaledSum,
    action_space=normalized_a_space, #[-1,1] for scaled B defined above
    feature_mask=mask, 
    B_kwargs=B_params,
)

adv_env = ATLA.AdversaryATLAWrapper(env=adv_env, **kwargs)


In [12]:
#check_env(adv_env,)

Define adversary

In [13]:
adversary = SAC.load(path=ADVERSARY,
                    env=adv_env,
                    device=DEVICE,
                    #tensorboard_log=LOG_DIR,
                    verbose=VERBOSITY,
                    print_system_info=True,
                    )

== CURRENT SYSTEM INFO ==
- OS: Windows-10-10.0.22631-SP0 10.0.22631
- Python: 3.10.12
- Stable-Baselines3: 1.8.0
- PyTorch: 1.12.1
- GPU Enabled: True
- Numpy: 1.25.1
- Gym: 0.21.0

== SAVED MODEL SYSTEM INFO ==
- OS: Windows-10-10.0.19045-SP0 10.0.19045
- Python: 3.10.12
- Stable-Baselines3: 1.8.0
- PyTorch: 1.12.1
- GPU Enabled: True
- Numpy: 1.23.5
- Gym: 0.21.0



Define the adversary's perturbation function for the victim environment. We use a function which applies the corresponding B(s) to the adversary's prediction 

In [14]:
perturbation = ATLA.sb3_perturbation(adversary,)

Wrap agent's environments for ATLA

In [15]:
agent_env = ATLA.VictimATLAWrapper(agent_env,
                                   perturbation,)

In [16]:
check_env(agent_env)

replace pre-training environment with ATLA environment

In [17]:
agent.set_env(agent_env)

In [18]:
kpi, obs, a = utils.eval_agent(agent_env, agent)
display(kpi)

cost_function
annual_peak_average                      1.042875
carbon_emissions_total                   0.891042
cost_total                               0.796751
daily_one_minus_load_factor_average      1.033188
daily_peak_average                       0.926451
electricity_consumption_total            0.908205
monthly_one_minus_load_factor_average    0.985678
ramping_average                          1.124090
zero_net_energy                          1.103965
Name: District, dtype: float64

In [19]:
kpi_savename = SAVE_DIR+'KPIs.csv'
try:
    df_kpis = pd.read_csv(kpi_savename, 
                          index_col=0)
    df_kpis[TEST_NAME] = kpi.values
    df_kpis.to_csv(kpi_savename)
    print(f'{kpi_savename} updated')
except:
    kpi.name = TEST_NAME
    kpi.to_csv(kpi_savename)
    print(f'{kpi_savename} created')

PPO agent 100 alts over 1000+200 2-3-21 results/KPIs.csv updated


In [20]:
df_sa = pd.DataFrame(obs)
df_sa.columns = agent_env.observation_names
df_sa['actions'] = a
df_sa.to_csv(SAVE_DIR + TEST_NAME + ' obs-a.csv')