In [1]:
AGENT_NAME = 'Models\Victim\SAC_citylearn_challenge_2022_phase_2_Building_6_default_rwd_MARLISA_hyperparams_500.zip'
DATASET_NAME = 'citylearn_challenge_2022_phase_2' #only action is electrical storage
SAVE_DIR = 'default SAC 500 norm space results' + '/'
ATK_NAME = 'untargeted_binary_myPGD_03_mask_time_REscale_solar_and_consumption_eps_clipped_adv_obs'
CONSUMPTION_SPREAD = 0.016
SOLAR_SPREAD = 0.004 #0.04 prev typo
MIN_OBS = 0
MAX_OBS = 1

In [2]:
import torch
from torch import nn
import torch.nn.functional as F

from stable_baselines3 import SAC

from citylearn.data import DataSet

import pandas as pd
import numpy as np
import json

import KBMproject.utilities as utils

from tqdm import tqdm


In [3]:
schema = DataSet.get_schema(DATASET_NAME)

In [4]:
agent = SAC.load(path=f"{AGENT_NAME}")

In [5]:
from copy import deepcopy
policy_net = deepcopy(agent.actor.latent_pi) #copies shared net rather than referencing/changing the agent
policy_net.add_module('4', agent.actor.mu)

In [6]:
env = utils.make_continuous_env(schema=schema,  
                        seed=42)
cols = env.observation_names

In [7]:
def broken_pgd_linf(model, X, y, loss_fn, epsilon:float=0.05, step:float=0.01, num_iter:int=100, 
             num_restarts:int=5, num_decay:int=0, decay_rate=1):
    """ Construct FGSM adversarial examples on the examples X with random restarts"""
    max_loss = torch.zeros([num_restarts, y.shape[0]]).to(y.device)
    max_delta = torch.zeros_like(X)

    assert 0 < decay_rate <= 1, 'decay rate must be between 0 and 1'

    if num_decay > 0: 
        decay_iters = num_iter//num_decay
    else: #no decay
        decay_iters = num_iter

    # Create a tensor to hold delta for all restarts at once
    delta = torch.rand(num_restarts, *X.shape, device=X.device, requires_grad=True)
    # Scale the random values to the range [-epsilon, epsilon]
    delta.data = delta.data * 2 * epsilon - epsilon

    for iter in range(num_iter):
        loss = loss_fn(reduction='none')(model(X + delta), y.unsqueeze(0).repeat(num_restarts, 1))
        loss.backward(torch.ones_like(loss))

        # Perform the update on delta (via the data attribute to skip the gradient tracking)
        delta.data = (delta + step*delta.grad.detach().sign()).clamp(-epsilon, epsilon)
        delta.grad.zero_()
        
        #find the best delta for all restarts
        is_max = loss.unsqueeze(-1).unsqueeze(-1) >= max_loss.unsqueeze(-1).unsqueeze(-1)
        max_delta = torch.where(is_max, delta.detach(), max_delta)
        max_loss = torch.where(is_max.squeeze(-1).unsqueeze(-1), loss, max_loss)

        if(iter%decay_iters == 0):
            step *= decay_rate
        
    return max_delta


In [8]:
def my_pgd_linf(model, X, y=None, loss_fn=None, epsilon=0.05, step:float=0.01, num_iter:int=100, 
                num_decay:int=0, decay_rate=1):
    """ Construct FGSM adversarial examples on the examples X with random restarts
    ref: https://adversarial-ml-tutorial.org/adversarial_examples/
    made for X as a single sample"""

    assert 0 < decay_rate <= 1, 'decay rate must be between 0 and 1'
    assert loss_fn is not None, 'Loss function must be provided'
    model.eval()
    if y is None:
        y = model(X)
        n_out = y.shape[0]
        if n_out > 1: #multiple outputs, assumes X is 1d
            _, y = torch.max(y, -1) #argmax, max returns (values, indeces)
            y = F.one_hot(y,num_classes=n_out)

    if num_decay > 0: 
        decay_iters = num_iter//num_decay
    else: #no decay
        decay_iters = num_iter

    delta = torch.zeros_like(X, requires_grad=True)
    for iter in range(num_iter):

        loss = loss_fn(reduction='none')(model(X + delta), y)
        loss.backward(torch.ones_like(loss))

        # Perform the update on delta (via the data attribute to skip the gradient tracking)
        delta.data = (delta + step*delta.grad.detach().sign()).clamp(-epsilon, epsilon)
        delta.grad.zero_()
        
        if(iter%decay_iters == 0):
            step *= decay_rate
        
    return delta


In [9]:
class RegressorLinearWrapper(nn.Module):
    """wraps a regressor
    and replaces the single output with 2 logits, one is maximized at 0 
    the other at 1 (by default)
    y= m*x + b"""
    def __init__(self, base_model, m1=1.0, b1=0.0, m2=-1.0, b2=0.0):
        super(RegressorLinearWrapper, self).__init__()
        self.base_model = base_model
        self.m1 = m1
        self.m2 = m2
        self.b1 = b1
        self.b2 = b2


    def forward(self, input):
        x = self.base_model(input)
        
        logits = torch.cat((self.m1*x + self.b1, self.m2*x + self.b2)).float()
        return logits

In [10]:
class DLLoss(nn.Module):
    """Carlini and Wagner or Difference Logits loss FOR UNTARGETED ATTACKS
    where the loss is difference between the target/clean
    logit and any other"""
    def __init__(self, reduction=None):
        super(DLLoss, self).__init__()
        self.reduction = reduction

    def forward(self, logits, target_one_hot): #myPGD doesn't provide a 1 hot target...
        target_logits = torch.sum(target_one_hot * logits)
        max_non_target_logits = torch.max((1 - target_one_hot) * logits)
        loss = max_non_target_logits - target_logits

        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:  #reduction is None
            return loss

Adjust epsilon for different features, the slice from 0 to 6 are temporal features and setting $\epsilon$ to 0 means that these features will not be perturbed (using torch.clamp)

In [11]:
env.observation_names[0][24]

'solar_generation'

In [12]:
solar_idx = 24

In [13]:
env.observation_names[0][26]

'net_electricity_consumption'

In [14]:
consumption_idx = 26

In [15]:
eps_list = np.ones(agent.observation_space.shape[0])*0.03
eps_list[:6] = 0.0 #masked
#these idx are improperly normalized, so eps bust be adjusted accordingly
eps_list[solar_idx] *= SOLAR_SPREAD
eps_list[consumption_idx] *= CONSUMPTION_SPREAD


In [16]:
kwargs = dict(
    model=RegressorLinearWrapper(policy_net),
    epsilon=torch.tensor(eps_list, device=agent.device, dtype=torch.float32),
    step=0.01,
    num_iter=100,
    num_decay=4,
    decay_rate=0.5,
    loss_fn=DLLoss
)

In [17]:
kwargs['model'].base_model[-1].out_features

1

In [18]:
time_steps = None

obs_list = []
adv_obs_list = []
a_list = []
adv_a_list = []
mae = 0
n_features = agent.observation_space.shape[0]

observations = env.reset()
if time_steps is None:
    time_steps = env.time_steps - 1

pbar = tqdm(total=time_steps)
for step in tqdm(range(time_steps)):

    obs_list.append(observations)
    actions = agent.predict(observations, deterministic=True)
    a_list.append(actions[0])

    delta = my_pgd_linf(X=torch.from_numpy(observations).to(agent.device),
#try toggling the one hot target y based on odd/even steps to imitat the optimal adversarial policy
                                         **kwargs).cpu().detach().numpy()

    adv_obs = np.clip(observations + delta, MIN_OBS, MAX_OBS) #keep adv obs in obs space
    adv_obs_list.append(adv_obs)
    
    a_adv, _ = agent.predict(adv_obs, deterministic=True)
    a_dist = abs(a_adv[0] - actions[0])[0]
    mae += a_dist

    adv_a_list.append(a_adv[0])
    observations, _, _, _ = env.step(a_adv)

    #update progress bar including MAE
    pbar.update(1)
    pbar.set_postfix({'MAE': mae/(step + 1)}, refresh=True)
    if env.done:
        break

pbar.close()
mae/=time_steps


100%|█████████▉| 8758/8759 [23:04<00:00,  6.32it/s] MAE=0.109]
100%|██████████| 8759/8759 [23:04<00:00,  6.32it/s, MAE=0.109]


In [19]:
kpi = utils.format_kpis(env)
display(kpi)

cost_function
annual_peak_average                      1.046289
carbon_emissions_total                   0.958512
cost_total                               0.921895
daily_one_minus_load_factor_average      0.989155
daily_peak_average                       1.030163
electricity_consumption_total            0.965838
monthly_one_minus_load_factor_average    0.996660
ramping_average                          1.361771
zero_net_energy                          1.084689
Name: District, dtype: float64

In [20]:
kpi_savename = SAVE_DIR+'KPIs.csv'
try:
    df_kpis = pd.read_csv(kpi_savename,
                          index_col=0)
    df_kpis[ATK_NAME] = kpi.values
    df_kpis.to_csv(kpi_savename)
    print('KPIs.csv updated')
except:
    kpi.name = ATK_NAME
    kpi.to_csv(kpi_savename)
    print('KPIs.csv created')

KPIs.csv updated


In [21]:
df_obs = pd.DataFrame(obs_list)
df_obs.columns = cols
df_obs['a'] = np.array(a_list).flatten().tolist()
df_obs.to_csv(SAVE_DIR+ATK_NAME+'_obs-a.csv')

In [22]:
df_obs = pd.DataFrame(adv_obs_list)
df_obs.columns = cols
df_obs['a'] = np.array(adv_a_list).flatten().tolist()
df_obs.to_csv(SAVE_DIR+ATK_NAME+'_adv_obs-a.csv')

In [23]:
asr_savename = SAVE_DIR+'MAEs.csv'
try:
    df_asrs = pd.read_csv(asr_savename,
                          index_col=0)
    df_asrs[ATK_NAME] = mae
    df_asrs.to_csv(asr_savename)
    print(f'{asr_savename} updated')
except:
    asr = pd.Series([mae])
    asr.name = ATK_NAME
    asr.to_csv(asr_savename)
    print(f'{asr_savename} created')

default SAC 500 norm space results/MAEs.csv updated


In [24]:
kwargs.keys()

dict_keys(['model', 'epsilon', 'step', 'num_iter', 'num_decay', 'decay_rate', 'loss_fn'])

In [25]:
kwargs_to_save = {k: v for k, v in kwargs.items() if k != 'model'} #don't save NN as json
kwargs_to_save['loss_fn'] = kwargs['loss_fn'].__name__ #replace function with a string
if not isinstance(kwargs_to_save['epsilon'], float):
    kwargs_to_save['epsilon'] = eps_list.tolist() #tensors aren't json compatible, use list
with open(SAVE_DIR+f'{ATK_NAME} parameters.json', 'w') as f:
    json.dump(kwargs_to_save, f)