# 1 model, 1 attack, and 1 plot simulation benchmark:
***

- **Data Set:** Air Quality

## Necessary Imports:
***

In [None]:
import copy
import json
import os
import uuid
from typing import Tuple

import jespipe.plugin.save as save
import numpy as np
import pandas as pd
import tensorflow as tf
from jespipe.plugin.attack.attack import Attack
from jespipe.plugin.manip.manip import Manipulation
from jespipe.plugin.train.build import Build
from jespipe.plugin.train.evaluate import Evaluate
from jespipe.plugin.train.fit import Fit
from jespipe.plugin.train.predict import Predict
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.losses import MeanAbsoluteError
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam
from tqdm import trange

## Class Definition:
***

### Vanilla Manipulation Class:

In [None]:
class VanillaManip(Manipulation):
    def __init__(self, parameters: dict) -> None:
        """
        Vanilla Manipulation class to facilitate vanilla data manipulation
        on many-to-one datasets with no header row. Target feature holds 
        index position -1 in the passed dataset.
        ### Parameters:
        - :param parameters: Parameter dictionary.
        ### Methods:
        - public 
          - manipulate (abstract): Perform vanilla manipulation on passed dataset.
        - private 
          - _preproc_vanilla: Internal vanilla preprocessing method for passed dataset.
        """
        self.dataset = pd.read_csv(parameters["dataset"], header=None)

    def manipulate(self) -> pd.DataFrame:
        """
        Perform vanilla manipulation on passed dataset.
        """
        features, labels = self._preproc_vanilla()
        recomb = pd.concat([pd.DataFrame(features), pd.DataFrame(labels)], axis=1)
        return recomb

    def _preproc_vanilla(self) -> Tuple[np.ndarray, np.ndarray]:
        """
        Internal vanilla preprocessing method for passed dataset.
        Splits DataFrame into features and labels.
        ### Returns:
        :return: Tuple with dataset features at index 0 and labels at index 1.
        """
        # Features for model training
        features = np.array(self.dataset)[:, :-1]

        # Labels
        labels = np.array(self.dataset)[:, -1]

        return features, labels


### RNN LSTM Class:

In [None]:
class BuildLSTM(Build):
    def __init__(self, parameters: dict) -> None:
        """
        Build class to initialize Sequential LSTM model.
        
        ### Parameters:
        :param parameters: Parameter dictionary sent by Jespipe.
        ### Methods:
        - public
          - build_model (abstract): Build LSTM RNN model using uncompromised data.
        - private
          - _load_data: Internal method for loading/splitting the data into the training and testing data.
        """
        self.dataset_name = parameters["dataset_name"]
        self.dataframe = parameters["dataframe"]
        self.model_params = parameters["model_params"]

    def build_model(self) -> Tuple[Sequential, Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]:
        """
        Build LSTM RNN model using uncompromised data.
        ### Returns:
        :return: (model, (feat_train, label_train, feat_test, label_test))
        - Positional value of each index in the tuple:
          - 0: An unfitted Sequential LSTM model.
          - 1: The training dataset split into training features, training labels, 
          test features, and test labels.
        """
        sequence_length = self.model_params["sequence_length"]
        feature_count = self.dataframe.shape[1]-1
        learn_rate = self.model_params["learning_rate"]

        # Split into training and test
        feat_train, label_train, feat_test, label_test = self._load_data(self.dataframe, sequence_length, feature_count)

        # Start building the model using Keras
        model = Sequential()

        for i in range(5):
            model.add(LSTM(input_shape=feat_train.shape[1:], units=30, return_sequences=True))
            model.add(Dropout(0.1))

        model.add(LSTM(30, return_sequences=False))
        model.add(Dropout(0.1))
        model.add(Dense(units=1))
        opt = Adam(learning_rate=learn_rate)

        # Compile model
        model.compile(loss="mean_squared_error", optimizer=opt, metrics=["mean_squared_error"])

        # Return created model and training data and testing data
        return model, (feat_train, label_train, feat_test, label_test)

    def _load_data(self, data: pd.DataFrame, seq_len: int, feature_count: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        """
        Internal method for loading/splitting the data into the training and testing data.
        
        ### Parameters:
        :param data: Passed dataset to split into training and testing features and labels.
        :param seq_len: User-controlled hyperparameter for LSTM architecture.
        :param feature_count: Number of features in the passed dataset.
        ### Returns:
        :return: (x_train, y_train, x_test, y_test)
        - Positional value of each index in the tuple:
          - 0: Training features.
          - 1: Training labels.
          - 2: Test features.
          - 3: Test labels.
        """
        result = np.zeros((len(data) - seq_len, seq_len, feature_count+1))

        # Sequence lengths remain together
        # (i.e, 6 consecutive candles stay together at all times if seq_len=6)
        for index in range(len(data) - seq_len):
            result[index] = data[index: index + seq_len]

        # Shuffling with for reproducable results
        np.random.seed(2020)

        # In-place shuffling for saving space
        np.random.shuffle(result)

        # Amount of data to train on. Train: 85%; Test: 15%
        row = len(result) * 0.85
        train = result[:int(row), :]

        x_train = train[:, :, :-1]
        y_train = train[:, -1][:, -1]
        x_test = result[int(row):, :, :-1]
        y_test = result[int(row):, -1][:, -1]

        x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], feature_count))
        x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], feature_count))

        return x_train, y_train, x_test, y_test


class FitLSTM(Fit):
    def __init__(self, model: Sequential, feat_train: np.ndarray, 
                    label_train: np.ndarray, parameters: dict) -> None:
        """
        Fit class to facilitate fitting Sequential LSTM model to training data.
        
        ### Paramters:
        :param model: Sequential LSTM model to fit to training data.
        :param feat_train: Training features.
        :param label_train: Training labels.
        :param parameters: Parameter dictionary sent by Jespipe.
        
        ### Methods:
        - public
          - model_fit (abstract): Fit Sequential LSTM model using user-specified hyperparameters.
        """
        self.model = model
        self.feat_train = feat_train
        self.label_train = label_train
        self.model_params = parameters["model_params"]
        self.batch_size = self.model_params["batch_size"]
        self.epochs = self.model_params["epochs"]
        self.validation_split = self.model_params["validation_split"]
        self.verbose = self.model_params["verbose"]

    def model_fit(self) -> None:
        """
        Fit Sequential LSTM model using user-specified hyperparameters.
        """
        self.model.fit(
            self.feat_train,
            self.label_train,
            batch_size=self.batch_size,
            epochs=self.epochs,
            validation_split=self.validation_split,
            verbose=self.verbose
        )


class PredictLSTM(Predict):
    def __init__(self, model: Sequential, predictee: np.ndarray) -> None:
        """
        Prediction class to facilitate making predictions with Sequential LSTM model.
        
        ### Parameters:
        :param model: Sequential LSTM model to make predictions with.
        :param predictee: Data to make prediction on.
        
        ### Methods:
        - public
          - model_predict (abstract): Make prediction on data using Sequential LSTM model.
        """
        self.model = model
        self.predictee = predictee

    def model_predict(self) -> np.ndarray:
        """
        Make prediction on data using Sequential LSTM model.
        ### Returns:
        :return: Sequential LSTM model's prediction
        """
        prediction = self.model.predict(self.predictee)
        return prediction


class EvaluateLSTM(Evaluate):
    def __init__(self, feature_test: np.ndarray, label_test: np.ndarray, 
                    model_to_eval: Sequential) -> None:
        """
        Evaluation class to facilitate evalutions predictions made by fitted Sequential LSTM model.
        
        ### Parameters:
        :param feature_test: Test features.
        :param label_test: Test labels.
        :param model_to_eval: Sequential LSTM model to evaulate.
        ### Methods:
        - public
          - model_evaluate (abstract): Evaluate the mean squared error and root mean squared error of 
          the Sequential LSTM model's prediction.
        - private:
          - _eval_mse: Internal method to evaluate the mean squared error 
          of the Sequential LSTM model's prediction.
          - _eval_rmse: Internal method to evaluate the root 
          mean squared error of the Sequential LSTM model's prediction
        """
        self.feature_test = feature_test
        self.label_test = label_test
        self.model_to_eval = model_to_eval

    def model_evaluate(self) -> Tuple[float, float]:
        """
        Evaluate the mean squared error and root mean squared error of 
        the Sequential LSTM model's prediction.
        ### Returns:
        :return: (mse, rmse)
        - Positional value of each index in the tuple:
          - 0: Mean squared error of model's prediction.
          - 1: Root mean squared error of model's prediction.
          - 2: Scatter index of model's prediction.
          - 3: Mean absolute error of model's prediction.
        """
        mse = self._eval_mse(); rmse = self._eval_rmse(mse)
        return mse, rmse, self._eval_scatter_index(rmse), self._eval_mean_absolute_error()

    def _eval_mse(self) -> float:
        """
        Internal method to evaluate the mean squared error 
        of the Sequential LSTM model's prediction.
        ### Returns:
        :return: Mean squared error of the Sequential LSTM model's prediction.
        """
        score = self.model_to_eval.evaluate(self.feature_test, self.label_test, verbose=0)

        # Index 1 is MSE; index 0 is loss
        return score[1]

    def _eval_rmse(self, mse: float) -> float:
        """
        Internal method to evaluate the root mean 
        squared error of the Sequential LSTM model's prediction.
        
        ### Parameters:
        :param mse: Mean squared error of the Sequential LSTM model's prediction.
        
        ### Returns:
        :return: Root mean squared error of the Sequential LSTM model's prediction.
        """
        return np.sqrt(mse)

    def _eval_scatter_index(self, rmse: float) -> float:
        """
        Internal method to evaluate the scatter index
        of the Sequential LSTM model's prediction.
        ### Parameters:
        :param rmse: Root mean squared error of the Sequential LSTM model's prediction.
        ### Returns:
        :return: Scatter index of the Sequential LSTM model's prediction.
        """
        return np.multiply(np.divide(rmse, np.mean(self.feature_test)), 100)

    def _eval_mean_absolute_error(self) -> float:
        """
        Internal method to evaluate the mean absolute error
        of the Sequential LSTM model's prediction.
        ### Returns:
        :return: Mean absolute error of the Sequential LSTM model's prediction.
        """
        mae = MeanAbsoluteError()
        return mae(self.label_test, self.model_to_eval.predict(self.feature_test)).numpy()


### Carlini & Wagner L<sub>2</sub> Class:

In [None]:
class CarliniL2(Attack):
    """
    This is a modified version of the L_2 optimized attack of Carlini and Wagner (2016).
    It has been modified to fit time series regression problems.
    """
    def __init__(self, model: str, features: np.ndarray, parameters: dict) -> None:
        """
        Create a Carlini&Wagner L_2 attack instance.
        ### Parameters:
        :param model: System file path to trained regressor model.
        :param model_test_features: Test features to use for adversarial example generation.
        :param parameters: Parameter dictionary for the attack.
        ### Methods:
        - public
          - attack (abstract): Launch L_2 attack on the given time series data.
        - private
          - _generate: Internal method to perform the L_2 attack on the given time series data.
          - _generate_batch: Internal method to generate batched adversarial samples and return them in an array.
        """
        self.model = load_model(model)
        self.features = features
        self.min_change = parameters["change"]
        self.learning_rate = parameters["learning_rate"]
        self.max_iter = parameters["max_iter"]
        self.binary_search_steps = parameters["binary_search_steps"]
        self.batch_size = parameters["batch_size"]
        self.initial_const = parameters["initial_const"]
        self.sequence_length = parameters["sequence_length"]
        self.verbose = parameters["verbose"]

    def attack(self) -> np.ndarray:
        """
        Launch L_2 attack on the given time series data.
        ### Returns:
        :return: An array holding the adversarial examples.
        """
        return self._generate(self.features)

    def _generate(self, x: np.ndarray, **kwargs) -> np.ndarray:
        """
        Internal method to perform the L_2 attack on the given time series data.
        ### Parameters:
        :param x: An array with the original inputs to be attacked.
        ### Returns:
        :return: An array holding the adversarial examples.
        """
        pred = self.model.predict(x)
        self.mean = pred.mean()
        
        # Generate adversarial examples
        x_adv = np.zeros(x.shape)
        nb_batches = int(np.ceil(x.shape[0] / float(self.batch_size)))
        for i in trange(nb_batches, desc="C&W L_2", disable = not self.verbose):
            index = i * self.batch_size
            x_adv[index:index+self.batch_size] = (self._generate_batch(x[index:index+self.batch_size]))
        print(x_adv.shape)
        return x_adv
    
    def _generate_batch(self, x: np.ndarray, **kwargs) -> np.ndarray:
        """
        Internal method to generate batched adversarial samples and return them in an array.
        ### Parameters:
        :param x: An array with the batched original inputs to be attacked.
        ### Returns:
        :return: An array holding the batched adversarial examples.
        """
        # Initialize constant for binary search:
        c_current = np.ones(x.shape[0]) * self.initial_const
        c_best = np.zeros(x.shape[0])
        
        # Initialize placeholders for best l2 distance and attack found so far
        best_l2dist = np.inf * np.ones(x.shape[0])
        best_x_adv = x.copy()
        
        pred = self.model.predict(x)
        
        # Initialize boolean to decide if advesarial examples should predict above or below original
        # Since the adv examples are normalized between [0,1], adv examples that predict values approaching 0 or 1 are difficult to generate, hence the bool
        mean = pred.mean()
        above = (mean <= self.mean)
        
        if above:
            if mean + self.min_change > 0.9:
                above = False
        else:
            if mean - self.min_change < 0.1:
                above = True
        
        for bss in range(self.binary_search_steps):
            
            # Initialize variable to optimize
            w = tf.Variable(np.zeros(x.shape), trainable=True, dtype=tf.float32)
            
            for i_iter in range(self.max_iter):
                
                # Calculate loss
                with tf.GradientTape() as tape:
                    tape.watch(w)
                    
                    # Generate adversarial examples using w
                    x_adv = (tf.tanh(w) + 1.0) / 2.0
                    pred_adv = self.model(x_adv)
                    
                    # Calculate distance using the l2 metric
                    square_diff = tf.square(tf.subtract(x, x_adv))
                    l2dist = tf.reduce_sum(tf.reduce_sum(square_diff, axis=2), keepdims=(True))
                    
                    # Loss depends if adv prediction is meant to be above or below the benign prediction
                    if above:
                        f_sum = tf.add(tf.add(pred, self.min_change), tf.negative(pred_adv))
                    else:
                        f_sum = tf.add(tf.add(tf.negative(pred), self.min_change), pred_adv)
                    c_loss = tf.multiply(c_current, tf.maximum(f_sum, tf.zeros(x_adv.shape[0])))
                    
                    # Add the two sums from the loss function
                    loss = tf.add(l2dist, c_loss)
                
                # Calculate loss gradient w.r.t our optimization variable w 
                gradients = tape.gradient(loss, w)
                
                # Update w
                w = tf.subtract(w, tf.multiply(self.learning_rate, gradients))
                
                # Calculate l2dist and generate new adversarial predictions
                x_adv = x_adv.numpy()
                pred_adv = self.model.predict(x_adv)
                l2dist = np.sum(np.square(x - x_adv).reshape(x.shape[0], -1), axis=1)
                
                # Update adversarial examples if new best is found
                for e in range(x.shape[0]): 
                    if above:
                        if pred_adv[e] >= pred[e] + self.min_change and l2dist[e] <= best_l2dist[e]:
                            best_x_adv[e] = x_adv[e]
                            best_l2dist[e] = l2dist[e]
                    else:
                        if pred_adv[e] <= pred[e] - self.min_change and l2dist[e] <= best_l2dist[e]:
                            best_x_adv[e] = x_adv[e]
                            best_l2dist[e] = l2dist[e]
            
            pred_adv = self.model.predict(x_adv)
            
            # Update constant c using modified binary search
            for e in range(x.shape[0]):
                if above:
                    if pred_adv[e] >= pred[e] + self.min_change:
                        c_best[e] = c_current[e]
                        c_current[e] /= 2
                    else:
                        if c_best[e] == 0:
                            c_current[e] *= 10
                        else:
                            c_current[e] = (c_current[e] + c_best[e]) / 2
                else:
                    if pred_adv[e] <= pred[e] - self.min_change:
                        c_best[e] = c_current[e]
                        c_current[e] /= 2
                    else:
                        if c_best[e] == 0:
                            c_current[e] *= 10
                        else:
                            c_current[e] = (c_current[e] + c_best[e]) / 2
        return best_x_adv


### Plotting Class:

## Simulation:
***

### Training stage:

In [None]:
os.makedirs("data/air-quality/models/vanilla1/data", exist_ok=True)
os.makedirs("data/air-quality/models/vanilla1/stat", exist_ok=True)

# Use vanilla manipulation on data
manip_params = {"dataset": "tmp/air-quality/air-quality.csv"}
vanillamanip = VanillaManip(manip_params)
dataframe = vanillamanip.manipulate()

sc = MinMaxScaler(feature_range=(0, 1))
dataframe = pd.DataFrame(sc.fit_transform(dataframe))

# Read in model hyperparameters
fin = open(".config.json", "rt"); config_file = fin.read(); fin.close()
config_file = json.loads(config_file)

# Train model
model_params = config_file.get("air-quality")
params = {"dataset_name": "air-quality", "dataframe": dataframe, "model_params": model_params}

lstm_build = BuildLSTM(params)
model, data = lstm_build.build_model()

lstm_fit = FitLSTM(model, data[0], data[1], params)
lstm_fit.model_fit()

save.dictionary("data/air-quality/vanilla1", "model_parameters", model_params)
save.features("data/air-quality/vanilla1", data[2]); save.labels("data/air-quality/vanilla1", data[3])
save.compress_dataframe("data/air-quality/vanilla1/data", "baseline-data-normalized", dataframe)
with open("data/air-quality/vanilla1/model_summary.txt", "wt") as fout: lstm_fit.model.summary(print_fn=lambda x: fout.write(x + "\n"))
lstm_fit.model.save("data/air-quality/vanilla1/air-quality-vanilla1.h5", include_optimizer=True)

lstm_predict = PredictLSTM(lstm_fit.model, data[2])
pred = lstm_predict.model_predict()
save.compress_dataframe("data/air-quality/vanilla1/data", "baseline-prediction", pd.DataFrame(pred))

lstm_evaluate = EvaluateLSTM(data[2], data[3], lstm_fit.model)
mse, rmse, scatter_index, mae = lstm_evaluate.model_evaluate()

log_dict = {"0.0": {"mse": mse, "rmse": rmse, "scatter_index": scatter_index, "mae": mae}}

### Attack stage:

In [None]:
os.makedirs("")

### Cleaning stage:

## Summary:
***