# Imports

In [22]:
import pandas as pd
import numpy as np
from numpy.linalg import norm
import random
import tensorflow as tf
from sklearn.model_selection import train_test_split
import pickle
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from keras.models import load_model
from sklearn.metrics import mean_absolute_error,r2_score,mean_squared_error
from tensorflow.keras.losses import MSE
tf.random.set_seed(7)

# Utils

In [23]:
def score(model, x_test, y_test):
    y_pred = model.predict(x_test)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    print(f"r2:{r2}; mae:{mae}; mse:{mse}")
    return [r2, mae, mse]

def sample_3Darray(a, perc):
    l = a.shape[0]
    # list of indexes
    b = random.sample(list(range(l)), int(l*perc))
    res = a[b,:,:]
    return res

def save_object(obj, filename):
    with open(filename, 'wb') as outp:  # Overwrites any existing file.
        pickle.dump(obj, outp)

def load_object(filename):
    with open(filename, 'rb') as inp:
        return pickle.load(inp)

# Load Datasets

## California Housing Prices

In [24]:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
data = pd.DataFrame(housing.data)
data.columns = housing.feature_names
data['PRICE'] = housing.target

x_housing = data.drop(['PRICE'], axis = 1)
y_housing = data['PRICE']

x_housing_train, x_housing_test, y_housing_train, y_housing_test = train_test_split(x_housing,y_housing, test_size = 0.3, random_state = 4)
print(f"{type(x_housing_train)} {type(y_housing_train)} {type(x_housing_test)} {type(y_housing_test)}")
print(f"{x_housing_train.shape} {y_housing_train.shape} {x_housing_test.shape} {y_housing_test.shape}")

<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'> <class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'>
(14448, 8) (14448,) (6192, 8) (6192,)


## Daily minimum temperatures in Melbourne

In [25]:
# https://www.kaggle.com/datasets/paulbrabban/daily-minimum-temperatures-in-melbourne
temperature_df = pd.read_csv("datasets/temperature.csv", usecols=[1])
dataset = temperature_df.values
dataset = dataset.astype('float32')

In [26]:
test_split = 0.3
train_size = int(len(dataset) * (1-test_split))
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [27]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [28]:
look_back = 10
x_temperature_train, y_temperature_train = create_dataset(train, look_back)
x_temperature_test, y_temperature_test = create_dataset(test, look_back)

x_temperature_train = np.reshape(x_temperature_train, (x_temperature_train.shape[0], 1, x_temperature_train.shape[1]))
x_temperature_test = np.reshape(x_temperature_test, (x_temperature_test.shape[0], 1, x_temperature_test.shape[1]))

print(f"{type(x_temperature_train)} {type(y_temperature_train)} {type(x_temperature_test)} {type(y_temperature_test)}")
print(f"{x_temperature_train.shape} {y_temperature_train.shape} {x_temperature_test.shape} {y_temperature_test.shape}")

<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>
(2544, 1, 10) (2544,) (1084, 1, 10) (1084,)


# Generate Training adversarial examples

## FGSM

In [29]:
def fgsm(model, x, y, r):
    x = tf.cast(x, tf.float32)
    with tf.GradientTape() as tape:
        tape.watch(x)
        pred = model(x)
        loss = MSE(y, pred)
    gradient = tape.gradient(loss, x)
    signedGrad = tf.sign(gradient)
    x_f = (x + (signedGrad * r)).numpy()
    return x_f

## PGD

In [30]:
# suited for housing dataset
def pgd1(model, x, y, r, lr, iter):
    x = tf.cast(x, tf.float32)
    x_f = x
    for i in range(iter):
        with tf.GradientTape() as tape:
            tape.watch(x_f)
            y_pred = model(x_f)
            loss = MSE(y, y_pred)
        loss_gradient = tape.gradient(loss, x_f)
        x_f = (x_f + (loss_gradient * lr))
        if norm(x_f[0,:]-x[0,:],2)>r:
            x_f = x_f-((x_f-x)/norm(x_f[0,:]-x[0,:]))*abs(norm(x_f[0,:]-x[0,:])-r)
    return x_f

In [31]:
# suited for temperature dataset
def pgd2(model, x, y, r, lr, iter):
    x = tf.cast(x, tf.float32)
    x_f = x
    for i in range(iter):
        with tf.GradientTape() as tape:
            tape.watch(x_f)
            y_pred = model(x_f)
            loss = MSE(y, y_pred)
        loss_gradient = tape.gradient(loss, x_f)
        x_f = (x_f + (loss_gradient * lr))
        if norm(x_f[0,0,:]-x[0,0,:],2)>r:
            x_f = x_f-((x_f-x)/norm(x_f[0,0,:]-x[0,0,:]))*abs(norm(x_f[0,0,:]-x[0,0,:])-r)
    return x_f

## Prepare adversarial training set

In [32]:
# common parameters
percentages = [0, 25, 50, 75, 100]
attacks = ['fgsm', 'pgd']

r=0.5
lr=0.01
iterations=50

In [33]:
# # housing adversarial training sets
# m = load_model('models/model_housing_plain.h5')
#
# for p in percentages:
#     # qui c'è p% del training set campionato a caso
#     adv = x_housing_train.sample(frac=p / 100, random_state=1)
#     for a in attacks:
#         st = time.time()
#         x_housing_train_adv = x_housing_train.copy(deep=True)
#         y_housing_train_adv = y_housing_train.copy(deep=True)
#         # per ogni x del subset creo x_fooled
#         c = 0
#         for i, x in adv.iterrows():
#             print(f"Processing p:{p}%,\tattack:{a},\tx_{c}/{adv.shape[0]} ...")
#             x = np.expand_dims(x, 0)
#             if a == 'fgsm':
#                 x_f = fgsm(m, x, y_housing_train[i], r)
#             elif a == 'pgd':
#                 x_f = pgd1(m, x, y_housing_train[i], r, lr, iterations)
#
#             # ogni volta che creo un x_fooled lo aggiungo a un nuovo adversarial training set
#             # e aggiorno il rispettivo set di labels
#             x_housing_train_adv = np.append(x_housing_train_adv, x_f, 0)
#             y_housing_train_adv = np.append(y_housing_train_adv, y_housing_train[i])
#             c = c+1
#
#         et = time.time()
#         elapsed_time = et - st
#         # aggiungo la coppia x y adversarial training set al set degli adversarial training set
#         x_adv_name = f"x_housing_train_adv_{a}_{p}"
#         y_adv_name = f"y_housing_train_adv_{a}_{p}"
#         t_adv_name = f"housing_train_adv_{a}_{p}"
#
#         x_adv_training_sets[x_adv_name] = x_housing_train_adv
#         y_adv_training_sets[y_adv_name] = y_housing_train_adv
#         t_adv_training_sets[t_adv_name] = elapsed_time
#
#         save_object(x_housing_train_adv,f"adv_training_sets/{x_adv_name}")
#         save_object(y_housing_train_adv,f"adv_training_sets/{y_adv_name}")
#         save_object(elapsed_time,f"adv_training_sets/{t_adv_name}")


In [34]:
# # temperature adversarial training sets
# m = load_model('models/model_temperature_plain.h5')
#
# for p in percentages:
#     # qui c'è p% del training set campionato a caso
#     adv = sample_3Darray(x_temperature_train, p/100)
#     for a in attacks:
#         st = time.time()
#         x_temperature_train_adv = np.copy(x_temperature_train)
#         y_temperature_train_adv = np.copy(y_temperature_train)
#         # per ogni x del subset creo x_fooled
#         for i in range(adv.shape[0]):
#             print(f"Processing p:{p}%,\tattack:{a},\tx_{i}/{adv.shape[0]} ...")
#             x = np.expand_dims(adv[i], 0)
#             if a == 'fgsm':
#                 x_f = fgsm(m, x, y_temperature_train[i], r)
#             elif a == 'pgd':
#                 x_f = pgd2(m, x, y_temperature_train[i], r, lr, iterations)
#
#             # ogni volta che creo un x_fooled lo aggiungo a un nuovo adversarial training set
#             # e aggiorno il rispettivo set di labels
#             x_temperature_train_adv = np.append(x_temperature_train_adv, x_f, 0)
#             y_temperature_train_adv = np.append(y_temperature_train_adv, y_temperature_train[i])
#
#         et = time.time()
#         elapsed_time = et - st
#         # aggiungo la coppia x y adversarial training set al set degli adversarial training set
#         x_adv_name = f"x_temperature_train_adv_{a}_{p}"
#         y_adv_name = f"y_temperature_train_adv_{a}_{p}"
#         t_adv_name = f"temperature_train_adv_{a}_{p}"
#
#         x_adv_training_sets[x_adv_name] = x_temperature_train_adv
#         y_adv_training_sets[y_adv_name] = y_temperature_train_adv
#         t_adv_training_sets[t_adv_name] = elapsed_time
#
#         save_object(x_temperature_train_adv,f"adv_training_sets/{x_adv_name}")
#         save_object(y_temperature_train_adv,f"adv_training_sets/{y_adv_name}")
#         save_object(elapsed_time,f"adv_training_sets/{t_adv_name}")

# Train models with adversarial training set

In [35]:
# # train housing models
# housing_epochs=10
# housing_validation_split=0.2
# housing_batch_size=8
#
# for p in percentages:
#     for a in attacks:
#
#         st = time.time()
#
#         x_train = load_object(f"adv_training_sets/x_housing_train_adv_{a}_{p}")
#         y_train = load_object(f"adv_training_sets/y_housing_train_adv_{a}_{p}")
#
#         model_housing = Sequential()
#         model_housing.add(Dense(15, input_dim=8, activation='relu'))
#         model_housing.add(Dense(1, activation='linear'))
#         model_housing.compile(loss='mse', optimizer='adam')
#         model_housing.fit(
#             x_train,
#             y_train,
#             epochs=housing_epochs,
#             validation_split=housing_validation_split,
#             batch_size=housing_batch_size
#         )
#         et = time.time()
#         elapsed_time = et - st
#
#         model_name = f"model_housing_{a}_{p}.h5"
#         model_training_time_name = f"model_housing_{a}_{p}"
#         model_housing.save(f"models/{model_name}")
#         save_object(elapsed_time,f"models/{model_training_time_name}")
# print("Job terminated.")

In [36]:
# # train temperature models
#
# temperature_epochs=10
# temperature_validation_split=0.2
# temperature_batch_size=1
#
#
# for p in percentages:
#     for a in attacks:
#
#         st = time.time()
#
#         x_train = load_object(f"adv_training_sets/x_temperature_train_adv_{a}_{p}")
#         y_train = load_object(f"adv_training_sets/y_temperature_train_adv_{a}_{p}")
#
#         model_temperature = Sequential()
#         model_temperature.add(LSTM(4, input_shape=(1,look_back)))
#         model_temperature.add(Dense(1))
#         model_temperature.compile(loss='mean_squared_error', optimizer='adam')
#         model_temperature.fit(
#             x_train,
#             y_train,
#             epochs=temperature_epochs,
#             validation_split=temperature_validation_split,
#             batch_size=temperature_batch_size
#         )
#
#         et = time.time()
#         elapsed_time = et - st
#
#         model_name = f"model_temperature_{a}_{p}.h5"
#         model_training_time_name = f"model_temperature_{a}_{p}"
#         model_temperature.save(f"models/{model_name}")
#         save_object(elapsed_time,f"models/{model_training_time_name}")
# print("Job terminated.")

# Generate Test adversarial examples

In [37]:
attacks = ['fgsm', 'pgd']

r=0.5
lr=0.01
iterations=50

In [38]:
# # housing adversarial test sets
# m = load_model('models/model_housing_plain.h5')
#
# adv = x_housing_test.copy(deep=True)
# for a in attacks:
#     x_housing_test_adv = []
#     y_housing_test_adv = []
#     # per ogni x del subset creo x_fooled
#     c = 0
#     for i, x in adv.iterrows():
#         print(f"Processing \tattack:{a},\tx_{c}/{adv.shape[0]} ...")
#         x = np.expand_dims(x, 0)
#         if a == 'fgsm':
#             x_f = fgsm(m, x, y_housing_test[i], r)
#         elif a == 'pgd':
#             x_f = pgd1(m, x, y_housing_test[i], r, lr, iterations)
#
#         # ogni volta che creo un x_fooled lo aggiungo a un nuovo adversarial training set
#         # e aggiorno il rispettivo set di labels
#         x_housing_test_adv.append(x_f)
#         y_housing_test_adv.append(y_housing_test[i])
#         c = c+1
#
#     # aggiungo la coppia x y adversarial training set al set degli adversarial training set
#     x_adv_name = f"x_housing_test_adv_{a}"
#     y_adv_name = f"y_housing_test_adv_{a}"
#
#     x_adv_test_sets[x_adv_name] = pd.DataFrame(np.squeeze(x_housing_test_adv))
#     y_adv_test_sets[y_adv_name] = pd.DataFrame(y_housing_test_adv)


In [39]:
# # temperature adversarial test sets
# m = load_model('models/model_temperature_plain.h5')
# type(x_temperature_test)
# adv = np.copy(x_temperature_test)
#
# for a in attacks:
#     x_temperature_test_adv = []
#     y_temperature_test_adv = []
#     # per ogni x del subset creo x_fooled
#     for i in range(adv.shape[0]):
#         print(f"Processing \tattack:{a},\tx_{i}/{adv.shape[0]} ...")
#         x = np.expand_dims(adv[i], 0)
#         if a == 'fgsm':
#             x_f = fgsm(m, x, y_temperature_test[i], r)
#         elif a == 'pgd':
#             x_f = pgd2(m, x, y_temperature_test[i], r, lr, iterations)
#
#         # ogni volta che creo un x_fooled lo aggiungo a un nuovo adversarial training set
#         # e aggiorno il rispettivo set di labels
#         x_temperature_test_adv.append(x_f)
#         y_temperature_test_adv.append(y_temperature_test[i])
#
#     # aggiungo la coppia x y adversarial training set al set degli adversarial training set
#     x_adv_name = f"x_temperature_test_adv_{a}"
#     y_adv_name = f"y_temperature_test_adv_{a}"
#
#     x_adv_test_sets[x_adv_name] = np.squeeze(np.array(x_temperature_test_adv), axis=1)
#     y_adv_test_sets[y_adv_name] = np.array(y_temperature_test_adv)


In [40]:
# save_object(x_adv_test_sets,"adv_test_sets/x_adv_test_sets")
# save_object(y_adv_test_sets,"adv_test_sets/y_adv_test_sets")

# Test Models

In [41]:
# test house models
for p in percentages:
    for a1 in attacks:
        for a2 in attacks:
            m = load_model(f"models/model_housing_{a1}_{p}.h5")
            x_test = load_object("adv_test_sets/x_adv_test_sets")[f"x_housing_test_adv_{a2}"]
            y_test = load_object("adv_test_sets/y_adv_test_sets")[f"y_housing_test_adv_{a2}"]
            r = score(m,x_test, y_test)
            save_object(r,f"scores/score_housing_{p}_train_{a1}_test_{a2}")

r2:0.024904599662596394; mae:0.8722405255325493; mse:1.2741856358849204
r2:0.14531383457148217; mae:0.7964779364271202; mse:1.116843372250298
r2:-2.952390676021286; mae:1.4375039685750501; mse:5.16470432026366
r2:-2.3660961353089194; mae:1.2296297384256603; mse:4.398576122022802
r2:0.075440769837673; mae:0.8852450953921005; mse:1.2081485464807067
r2:0.3188060839368082; mae:0.686564726988061; mse:0.8901359834120661
r2:-0.21216953465419963; mae:0.9250486902510829; mse:1.5839773306070868
r2:0.09202487784118663; mae:0.6783813970240627; mse:1.1864776082374031
r2:-0.21438713308995894; mae:0.6509892061421915; mse:1.5868751312448786
r2:-0.1580750837401601; mae:0.6043442329571305; mse:1.51329053184678
r2:0.12689298902972745; mae:0.7554097323389756; mse:1.1409144290741093
r2:0.26455788547421244; mae:0.645543354793174; mse:0.9610236885840485
r2:0.531158580835116; mae:0.5485961895848924; mse:0.6126487742646374
r2:0.5049155638674548; mae:0.5718630313611751; mse:0.6469412909259894
r2:0.3367005455352

In [42]:
# test temperature models
for p in percentages:
    for a1 in attacks:
        for a2 in attacks:
            m = load_model(f"models/model_temperature_{a1}_{p}.h5")
            x_test = load_object("adv_test_sets/x_adv_test_sets")[f"x_temperature_test_adv_{a2}"]
            y_test = load_object("adv_test_sets/y_adv_test_sets")[f"y_temperature_test_adv_{a2}"]
            r = score(m,x_test, y_test)
            save_object(r,f"scores/score_temperature_{p}_train_{a1}_test_{a2}")

r2:0.4576411900087728; mae:2.4420413970947266; mse:8.645846366882324
r2:0.5114815474342751; mae:2.2665579319000244; mse:7.787567138671875
r2:0.44169495501480494; mae:2.4551479816436768; mse:8.900049209594727
r2:0.4819354757527017; mae:2.3256683349609375; mse:8.258567810058594
r2:0.5048322042220003; mae:2.3013525009155273; mse:7.893565654754639
r2:0.5595159714817654; mae:2.1243302822113037; mse:7.021841049194336
r2:0.46530772483132254; mae:2.397505521774292; mse:8.52363395690918
r2:0.520083421031156; mae:2.223153591156006; mse:7.650443077087402
r2:0.43790681298581813; mae:2.418644905090332; mse:8.960436820983887
r2:0.454706034461393; mae:2.372938871383667; mse:8.692636489868164
r2:0.43155628802599; mae:2.4302589893341064; mse:9.061671257019043
r2:0.44995786516287684; mae:2.384216547012329; mse:8.768328666687012
r2:0.400219938732727; mae:2.469149589538574; mse:9.561209678649902
r2:0.41361386014236057; mae:2.442878246307373; mse:9.347696304321289
r2:0.36204328826976606; mae:2.575749397277

# Final Analysis

In [69]:
percentages = [0, 25, 50, 75, 100]
attacks = ['fgsm','pgd']
datasets = ['housing','temperature']

results = pd.DataFrame(columns=[
    'dataset',
    'training_attack',
    'test_attack',
    'adv_training_percentage',
    'training_time',
    'R2',
    'MAE',
    'MSE',
    'R2/time',
    'MAE/time',
    'MSE/time'
])

In [73]:
c=0
for d in datasets:
    for tr in attacks:
        for te in attacks:
            for p in percentages:
                print(c)
                c=c+1
                t_adv = load_object(f"adv_training_sets/{d}_train_adv_{tr}_{p}")
                t_train = load_object(f"models/model_{d}_{tr}_{p}")
                t = t_adv+t_train
                print(t)
                score = load_object(f"scores/score_{d}_{p}_train_{tr}_test_{te}")

                new_row = {
                    'dataset':d,
                    'training_attack':tr,
                    'test_attack':te,
                    'adv_training_percentage':p,
                    'training_time':t,
                    'R2':score[0],
                    'MAE':score[1],
                    'MSE':score[2],
                    'R2/time':score[0]/t,
                    'MAE/time':score[1]/t,
                    'MSE/time':score[2]/t
                }
                print(new_row)
                results = results.append(new_row, ignore_index=True)

save_object(results,"results")

0
44.030863523483276
{'dataset': 'housing', 'training_attack': 'fgsm', 'test_attack': 'fgsm', 'adv_training_percentage': 0, 'training_time': 44.030863523483276, 'R2': 0.024904599662596394, 'MAE': 0.8722405255325493, 'MSE': 1.2741856358849204, 'R2/time': 0.0005656168802892965, 'MAE/time': 0.019809752880893452, 'MSE/time': 0.028938465746994727}
1
85.85035347938538
{'dataset': 'housing', 'training_attack': 'fgsm', 'test_attack': 'fgsm', 'adv_training_percentage': 25, 'training_time': 85.85035347938538, 'R2': 0.075440769837673, 'MAE': 0.8852450953921005, 'MSE': 1.2081485464807067, 'R2/time': 0.000878747340927234, 'MAE/time': 0.01031149039595589, 'MSE/time': 0.014072726523727251}
2
99.63143372535706
{'dataset': 'housing', 'training_attack': 'fgsm', 'test_attack': 'fgsm', 'adv_training_percentage': 50, 'training_time': 99.63143372535706, 'R2': -0.21438713308995894, 'MAE': 0.6509892061421915, 'MSE': 1.5868751312448786, 'R2/time': -0.002151802147913842, 'MAE/time': 0.006533974086297918, 'MSE/t

  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results = results.append(new_row, ignore_index=True)
  results 

In [74]:
results

Unnamed: 0,dataset,training_attack,test_attack,adv_training_percentage,training_time,R2,MAE,MSE,R2/time,MAE/time,MSE/time
0,housing,fgsm,fgsm,0,44.030864,0.024905,0.872241,1.274186,0.000566,0.01981,0.028938
1,housing,fgsm,fgsm,25,85.850353,0.075441,0.885245,1.208149,0.000879,0.010311,0.014073
2,housing,fgsm,fgsm,50,99.631434,-0.214387,0.650989,1.586875,-0.002152,0.006534,0.015927
3,housing,fgsm,fgsm,75,189.086894,0.531159,0.548596,0.612649,0.002809,0.002901,0.00324
4,housing,fgsm,fgsm,100,159.441691,0.094088,0.689123,1.183781,0.00059,0.004322,0.007425
5,housing,fgsm,pgd,0,44.030864,0.145314,0.796478,1.116843,0.0033,0.018089,0.025365
6,housing,fgsm,pgd,25,85.850353,0.318806,0.686565,0.890136,0.003714,0.007997,0.010368
7,housing,fgsm,pgd,50,99.631434,-0.158075,0.604344,1.513291,-0.001587,0.006066,0.015189
8,housing,fgsm,pgd,75,189.086894,0.504916,0.571863,0.646941,0.00267,0.003024,0.003421
9,housing,fgsm,pgd,100,159.441691,0.029747,0.734347,1.267858,0.000187,0.004606,0.007952
