In [2]:
AGENT_NAME = 'default_PPO_citylearn_challenge_2022_phase_2_Building_6_20_bins_0'
DATASET_NAME = 'citylearn_challenge_2022_phase_2' #only action is electrical storage

In [3]:
from stable_baselines3 import PPO

from citylearn.data import DataSet

from art.attacks.evasion import AutoConjugateGradient as ACG


import pandas as pd
import numpy as np

import KBMproject.utilities as utils
from tqdm import tqdm
from art.estimators.classification import PyTorchClassifier as classifier
from torch.nn import CrossEntropyLoss

%matplotlib inline

In [4]:
schema = DataSet.get_schema(DATASET_NAME)

Define RL agent

In [5]:
try:
    agent = PPO.load(path=f"Models/Victim/{AGENT_NAME}")
    print('Model loaded from storage')
except:
    print(f'Model: {AGENT_NAME} not found')

Model loaded from storage


In [6]:
bins = agent.action_space[0].n
env = utils.make_discrete_env(schema=schema,  
                        action_bins=bins,
                        seed=42)

In [7]:
observation_masks = pd.read_csv('observation_masks.csv')

In [18]:
ACG_parameters = dict(
    loss_type='difference_logits_ratio', 
    batch_size=1,
    eps=0.05, #default 0.3, this seems to be the smallest effective value
    eps_step=0.015, #0.1, default ration: eps:3*eps_step
    nb_random_init=5, #5, lower values speed crafting
    max_iter=100, #iterations per restart
    norm='inf', #->l2 ->l1 most restrictive 
)
attack = utils.define_attack(agent, env, ACG, **ACG_parameters)
kpis_5e2, adv_obs_5e2 = utils.eval_untargeted_attack_with_action_distance(agent, env, attack, 
                                                                                 time_steps=env.time_steps-1, 
                                                                                 mask=observation_masks['mask_time'].to_numpy())
display(kpis_5e2)

100%|█████████▉| 8758/8759 [1:25:59<00:00,  1.70it/s]  


The Adversarial success rate is: 0.9227080716976824
The average distance between optinmal and adversarial actions is: [3.21931727]


cost_function
annual_peak_average                      1.018688
carbon_emissions_total                   0.930952
cost_total                               0.867422
daily_one_minus_load_factor_average      1.453252
daily_peak_average                       1.000841
electricity_consumption_total            0.941739
monthly_one_minus_load_factor_average    0.993985
ramping_average                          1.331808
zero_net_energy                          1.095370
Name: District, dtype: float64

In [19]:
ACG_parameters = dict(
    loss_type='difference_logits_ratio', 
    batch_size=1,
    eps=0.04, #default 0.3, this seems to be the smallest effective value
    eps_step=0.012, #0.1, default ration: eps:3*eps_step
    nb_random_init=5, #5, lower values speed crafting
    max_iter=100, #iterations per restart
    norm='inf', #->l2 ->l1 most restrictive 
)
attack = utils.define_attack(agent, env, ACG, **ACG_parameters)
kpis_4e2, adv_obs_4e2 = utils.eval_untargeted_attack_with_action_distance(agent, env, attack, 
                                                                                 time_steps=env.time_steps-1, 
                                                                                 mask=observation_masks['mask_time'].to_numpy())
display(kpis_4e2)

100%|█████████▉| 8758/8759 [1:37:26<00:00,  1.50it/s]  


The Adversarial success rate is: 0.8664231076606919
The average distance between optinmal and adversarial actions is: [2.9740838]


cost_function
annual_peak_average                      1.018688
carbon_emissions_total                   0.921534
cost_total                               0.854414
daily_one_minus_load_factor_average      1.381039
daily_peak_average                       0.975951
electricity_consumption_total            0.931798
monthly_one_minus_load_factor_average    0.988122
ramping_average                          1.321135
zero_net_energy                          1.098077
Name: District, dtype: float64