In this notebook, how to run the anomaly detection on F16 CSAF system is described.

Before anything, let's import the required libraries and functions:

In [6]:
import argparse
import json
import math
import os
import random
import typing as typ

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import sklearn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.ensemble import IsolationForest
from sklearn.metrics import roc_curve
from sklearn.neighbors import LocalOutlierFactor
from sklearn.neighbors import NearestNeighbors
from sklearn.svm import OneClassSVM
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

sns.set()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
iqn_base_path = "./models"
env_name = "CSAF_airspeed"
if not os.path.exists(os.path.join(iqn_base_path, env_name)):
    os.mkdir(os.path.join(iqn_base_path, env_name))

test_performances = []
gvd_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16']
num_inputs = 17
num_actions = 4
num_outputs = 1


First, one should generate a set of initial conditions for the system to begin with:

In [14]:
! python ../src/generate_f16_ic.py --ic-file ic.json --n-samples 2

Now that the initial conditions for 100 samples are stored, they can be used to generate trajectory data:

In [None]:
! python ../src/generate_dataset.py --ic-file=ic.json --config-file=../examples/f16/f16_simple_airspeed_config.toml --data-format openai --output-file data_airspeed.json


So far, we have generated 100 trajectories with different length using the "airspeed" autopilot. These data can then be
used to train the predictors.

Predictors, or recurrent general value distributions, are very close in principle to value functions in common
reinforcement learning literature. They estimate the expected distribution of returns given a state and horizon.

Following code cell defines the architecture of the predictors (rGVDs):

In [7]:
class RecurrentIQN(nn.Module):
    def __init__(self, num_inputs, num_outputs, gru_size, quantile_embedding_dim, num_quantile_sample, device,
                 fc1_units=32, fc2_units=64, fc3_units=32):
        super(RecurrentIQN, self).__init__()
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        self.gru_size = gru_size
        self.quantile_embedding_dim = quantile_embedding_dim
        self.num_quantile_sample = num_quantile_sample
        self.device = device

        self.gru = nn.GRUCell(num_inputs, gru_size)
        self.post_gru = nn.Linear(gru_size, fc1_units)
        self.fc = nn.Linear(fc1_units, num_outputs)

        self.phi = nn.Linear(self.quantile_embedding_dim, 32)

        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)

    def forward(self, state, hx, tau, num_quantiles):
        input_size = state.size()[0]  # batch_size(train) or 1(get_action)
        tau = tau.expand(input_size * num_quantiles, self.quantile_embedding_dim)
        pi_mtx = torch.Tensor(np.pi * np.arange(0, self.quantile_embedding_dim)).expand(input_size * num_quantiles,
                                                                                        self.quantile_embedding_dim)
        cos_tau = torch.cos(tau * pi_mtx).to(self.device)

        phi = self.phi(cos_tau)
        phi = F.relu(phi)

        state_tile = state.expand(input_size, num_quantiles, self.num_inputs)
        state_tile = state_tile.flatten().view(-1, self.num_inputs).to(self.device)

        ghx = self.gru(state_tile, hx)
        x = self.post_gru(ghx)
        x = self.fc(x * phi)

        z = x.view(-1, num_quantiles, self.num_outputs)

        z = z.transpose(1, 2)  # [input_size, num_output, num_quantile]
        return z, ghx

    @classmethod
    def train_model(cls, model, optimizer, hx, states, actions, target, batch_size, num_tau_sample, device):
        tau = torch.Tensor(np.random.rand(batch_size * num_tau_sample, 1))
        states = states.reshape(states.shape[0], 1, -1)
        # states_actions = torch.cat((states, actions.unsqueeze(1)), 2)
        # z_a, hx = model(states_actions, hx, tau, num_tau_sample)
        z_a, hx = model(states, hx, tau, num_tau_sample)
        z_a = torch.mean(z_a, dim=1)
        T_z = target.to(device).unsqueeze(1).expand(-1, num_tau_sample)

        error_loss = T_z - z_a
        huber_loss = F.smooth_l1_loss(z_a, T_z.detach(), reduction='none')
        tau = torch.arange(0, 1, 1 / num_tau_sample).view(1, num_tau_sample)

        loss = (tau.to(device) - (error_loss < 0).float()).abs() * huber_loss
        loss = loss.sum(dim=1).mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        return loss, hx

    @classmethod
    def eval_model(cls, model, hx, states, actions, target, batch_size, num_tau_sample, device):
        tau = torch.Tensor(np.random.rand(batch_size * num_tau_sample, 1))
        states = states.reshape(states.shape[0], 1, -1)
        # states_actions = torch.cat((states, actions.unsqueeze(1)), 2)
        # z_a, hx = model(states_actions, hx, tau, num_tau_sample)
        z_a, hx = model(states, hx, tau, num_tau_sample)
        z_a = torch.mean(z_a, dim=1)
        T_z = target.to(device).unsqueeze(1).expand(-1, num_tau_sample)

        error_loss = T_z - z_a
        huber_loss = F.smooth_l1_loss(z_a, T_z.detach(), reduction='none')
        tau = torch.arange(0, 1, 1 / num_tau_sample).view(1, num_tau_sample)

        loss = (tau.to(device) - (error_loss < 0).float()).abs() * huber_loss
        loss = loss.sum(dim=1).mean()
        return loss, hx

    @classmethod
    def test_model(cls, model, hx, states, actions, target, batch_size, num_tau_sample, device):
        tau = torch.Tensor(np.random.rand(batch_size * num_tau_sample, 1))
        states = states.reshape(states.shape[0], 1, -1)
        # states_actions = torch.cat((states, actions.unsqueeze(1)), 2)
        # z_a, hx = model(states_actions, hx, tau, num_tau_sample)
        z_a, hx = model(states, hx, tau, num_tau_sample)
        z_a = torch.mean(z_a, dim=1)
        T_z = target.to(device).unsqueeze(1).expand(-1, num_tau_sample)

        error_loss = T_z - z_a
        huber_loss = F.smooth_l1_loss(z_a, T_z.detach(), reduction='none')
        tau = torch.arange(0, 1, 1 / num_tau_sample).view(1, num_tau_sample)

        loss = (tau.to(device) - (error_loss < 0).float()).abs() * huber_loss
        loss = loss.sum(dim=1).mean()
        return z_a.squeeze(0), loss, hx

    @classmethod
    def feed_forward(cls, model, hx, states, batch_size, num_tau_sample):
        tau = torch.Tensor(np.random.rand(batch_size * num_tau_sample, 1))
        states = states.reshape(states.shape[0], 1, -1)
        # states_actions = torch.cat((states, actions.unsqueeze(1)), 2)
        # z_a, hx = model(states_actions, hx, tau, num_tau_sample)
        z_a, hx = model(states, hx, tau, num_tau_sample)
        z_a = torch.mean(z_a, dim=1)
        return z_a.squeeze(0), hx


Now, in order to make the data ready for training, the following function can be used. It basically calculates the return
of each predictor based on the current state and horizon, and then shuffles them for training:

In [8]:
def construct_gvd_data_undiscounted(input_len, dataset, batch_size, horizon, device, gvd_name, sum_type):
    states, actions = [], []
    episodes_states = []
    episodes_actions = []
    episodes_returns = []
    episodes_len = []
    all_states = []
    assert len(dataset) != 0, "Memory is empty!"
    for i, data in enumerate(dataset):
        for j in range(len(data['time'])):
            all_states.append(np.array(data['state'][j]))

    for i, data in enumerate(dataset):
        for j in range(len(data['time'])):
            states.append(np.array(data['state'][j]))
            actions.append(np.array(data['actions'][j]))

        normalized_states = (np.array(states) - np.array(all_states).min(axis=0)) / (np.array(all_states).max(axis=0) - np.array(all_states).min(axis=0))
        returns = np.zeros(normalized_states.shape[0])
        for j in range(normalized_states.shape[0]):
            feature_index = int(gvd_name)
            if sum_type == "delta":
                returns[j] = sum(np.diff(normalized_states[j: j + horizon + 1, feature_index]))
            elif sum_type == "abs_delta":
                returns[j] = sum(abs(np.diff(normalized_states[j: j + horizon + 1, feature_index])))
            elif sum_type == "time_avg":
                returns[j] = sum(np.diff(normalized_states[j: j + horizon + 1, feature_index])) / len(normalized_states[j: j + horizon, feature_index])
            else:
                assert False, "Undefined/unknown method given to calculate the target return for GVDs. Notice the" \
                              " given arguments!"

        episodes_states.append(states)
        episodes_actions.append(actions)
        episodes_returns.append(returns)
        episodes_len.append(len(states))
        states = []
        actions = []

    max_len = len(max(episodes_states, key=len))
    for i, _ in enumerate(episodes_states):
        episodes_states[i] = np.concatenate((episodes_states[i], np.zeros((max_len - len(episodes_states[i]), input_len))), axis=0)
        episodes_actions[i] = np.concatenate((episodes_actions[i], np.zeros((max_len - len(episodes_actions[i]), num_actions))), axis=0)
        episodes_returns[i] = np.concatenate((episodes_returns[i], np.zeros((max_len - len(episodes_returns[i])))), axis=0)

        episodes_states[i] = torch.Tensor(episodes_states[i]).to(device)
        episodes_actions[i] = torch.Tensor(episodes_actions[i]).to(device)
        episodes_returns[i] = torch.Tensor(episodes_returns[i]).to(device)

    episodes_states = torch.stack(episodes_states)
    episodes_actions = torch.stack(episodes_actions)
    episodes_returns = torch.stack(episodes_returns)
    episodes_len = torch.Tensor(episodes_len).to(device)[:, None, None]

    tensor_dataset = torch.utils.data.TensorDataset(episodes_states, episodes_actions, episodes_returns, episodes_len)
    all_indices = np.arange(len(episodes_states))
    np.random.shuffle(all_indices)
    train_indices = all_indices[:int(len(all_indices) * 90 / 100)]
    test_indices = all_indices[int(len(all_indices) * 90 / 100):]
    train_sampler = SubsetRandomSampler(train_indices)
    test_sampler = SubsetRandomSampler(test_indices)
    train_dl = DataLoader(tensor_dataset, batch_size, sampler=train_sampler)
    test_dl = DataLoader(tensor_dataset, batch_size, sampler=test_sampler)
    return train_dl, test_dl, max_len


These following functions form the training step, validation step, switching between training and validation every given
 interval, to see how rGVD is performing. At each iteration, a plot of training loss and validation loss is saved:

In [11]:
def learn_undiscounted(model, optimizer, memory, max_len, gru_size, num_tau_sample, device):
    total_loss = 0
    count = 0
    model.train()

    for s_batch, a_batch, mc_returns, _ in memory:
        h_gvfs = None
        for i in range(max_len):
            s, a, mc_return = s_batch[:, i, :], a_batch[:, i, :], mc_returns[:, i]
            if h_gvfs is None:
                h_gvfs = torch.zeros(len(s_batch) * num_tau_sample, gru_size)
            loss, h_gvfs = RecurrentIQN.train_model(model, optimizer, h_gvfs.detach().to(device), s, a, mc_return, len(s_batch), num_tau_sample, device)
            total_loss += loss
            count += 1

    return total_loss / count

def save_model(model, file_path):
    torch.save(model.state_dict(), file_path)

def evaluation_undiscounted(model, memory, max_len, gru_size, num_tau_sample, device, best_gvf_total_loss, is_test=False):
    total_loss = 0
    count = 0
    model.eval()
    for s_batch, a_batch, mc_returns, _ in memory:
        h_gvfs = None
        for i in range(max_len):
            s, a, mc_return = s_batch[:, i, :], a_batch[:, i, :], mc_returns[:, i]
            if h_gvfs is None:
                h_gvfs = torch.zeros(len(s_batch) * num_tau_sample, gru_size)

            loss, h_gvfs = RecurrentIQN.eval_model(model, h_gvfs.detach().to(device), s, a, mc_return, len(s_batch), num_tau_sample, device)
            total_loss += loss
            count += 1
    if not is_test:
        print("GVD avg loss is:", round(total_loss.item() / count, 3))
        if total_loss.item() / count <= best_gvf_total_loss:
            print("Saving the best model!")
            best_gvf_total_loss = total_loss.item() / count
#             save_model(model, "./models/" + env_name + "/" + gvd_name + "_gvd_" + target_return_type + "_h_" + str(horizon[0]) + ".pt")
    return round(total_loss.item() / count, 3), best_gvf_total_loss


def plot_losses(train_loss, test_loss, result_folder, horizon, gvd_name, info, bootstrapped):
    plt.plot(train_loss, label="training loss")
    plt.plot(test_loss, label="test loss")
    plt.legend()
    if not bootstrapped:
        plt.savefig(os.path.join(result_folder, "losses_" + gvd_name + "_" + info + "_h" + str(horizon) + ".png"))
    else:
        plt.savefig(os.path.join(result_folder, "losses_" + gvd_name + "_" + info + "_h" + str(horizon) + "_bootstrap.png"))
    plt.clf()

def update_recurrent_gvds(train_memory, test_memory, r_gvd_model, optimizer, device, horizon, max_len):
    all_train_losses, all_test_losses = [], []
    best_gvf_total_loss = float("inf")
    for i in range(num_iterations):
        total_loss = learn_undiscounted(r_gvd_model, optimizer, train_memory, max_len, gru_units, num_tau_sample, device)
        if i % test_interval == 0:
            print("train loss : {}".format(total_loss))
            all_train_losses.append(total_loss)
            avg_eval_loss, best_gvf_total_loss = evaluation_undiscounted(r_gvd_model, test_memory, max_len, gru_units, num_tau_sample, device, best_gvf_total_loss)
            all_test_losses.append(avg_eval_loss)
            plot_losses(all_train_losses, all_test_losses, os.path.join(iqn_base_path, env_name), horizon,
                        gvd_name, target_return_type, bootstrapped=False)

Now that all the necessary functions for training predictors have been defined, we can go ahead and start the training process:

In [12]:
data_path = "data_airspeed.json"
test_data_path = "data_airspeed_test.json"
noisy_data_path = "data_airspeed_noisy.json"
gru_units = 32
quantile_embedding_dim = 64
num_quantile_sample = 32
gvd_name = "0"
target_return_type = "time_avg"
horizon = [1]
lr = 0.001
batch_size = 64
num_iterations = 100000
test_interval = 10
num_tau_sample = 16
score_calc_method = "knn"
merge_type = "avg"

print("Loading GVD training data!")
with open(data_path) as f:
    memory = json.load(f)
print("GVD data loaded!")

recurrent_gvd_model = RecurrentIQN(num_inputs, num_outputs, gru_units, quantile_embedding_dim,
                                   num_quantile_sample, device)
gvd_path = os.path.join(iqn_base_path, env_name, gvd_name + "_gvd_" + target_return_type + "_h_"
                        + str(horizon[0]) + ".pt")
if os.path.exists(gvd_path):
    print("Loading pre-trained model!")
    recurrent_gvd_model.load_state_dict(torch.load(gvd_path, map_location=device))
    print("Pre-trained model loaded!")
optimizer = optim.Adam(recurrent_gvd_model.parameters(), lr=lr)
recurrent_gvd_model.to(device)
recurrent_gvd_model.train()
train_rb, test_rb, max_len = construct_gvd_data_undiscounted(num_inputs, memory, batch_size, horizon[0],
                                                             device, gvd_name, target_return_type)
update_recurrent_gvds(train_rb, test_rb, recurrent_gvd_model, optimizer, device, horizon[0], max_len)


Loading GVD training data!
GVD data loaded!
Loading pre-trained model!
Pre-trained model loaded!


KeyboardInterrupt: 

When predictors are trained, we can simply run them over a nominal *unseen* trajectory to see how they perform in terms 
of predicting the features throughout the trajectory. First, we need to prepare data for test, define the test function,
and define the function which plot the test accuracy:

In [None]:
def construct_test_gvd_data_undiscounted(input_len, dataset, train_dataset, batch_size, horizon, device, sum_type):
    states, actions = [], []
    episodes_states = []
    episodes_actions = []
    episodes_returns = []
    episodes_len = []
    all_training_states = []
    assert len(dataset) != 0, "Memory is empty!"

    for i, data in enumerate(train_dataset):
        for j in range(len(data['time'])):
            all_training_states.append(np.array(data['state'][j]))

    for j in range(len(dataset['time'])):
        states.append(np.array(dataset['state'][j]))
        actions.append(np.array(dataset['actions'][j]))

    normalized_states = (np.array(states) - np.array(all_training_states).min(axis=0)) / (np.array(all_training_states).max(axis=0) - np.array(all_training_states).min(axis=0))
    returns = []
    for j in range(normalized_states.shape[0]):
        if sum_type == "delta":
            returns.append(sum(np.diff(normalized_states[j: j + horizon + 1], axis=0)))
        elif sum_type == "abs_delta":
            returns.append(sum(abs(np.diff(normalized_states[j: j + horizon + 1], axis=0))))
        elif sum_type == "time_avg":
            if j == normalized_states.shape[0] - 1:
                returns.append(np.zeros(input_len))
            else:
                returns.append(sum(np.diff(normalized_states[j: j + horizon + 1], axis=0)) / len(normalized_states[j: j + horizon]))
        else:
            assert False, "Undefined/unknown method given to calculate the target return for GVDs. Notice the" \
                          " given arguments!"

    episodes_states.append(states)
    episodes_actions.append(actions)
    episodes_returns.append(np.array(returns))
    episodes_len.append(len(states))

    max_len = len(max(episodes_states, key=len))
    episodes_states[0] = torch.Tensor(episodes_states[0]).to(device)
    episodes_actions[0] = torch.Tensor(episodes_actions[0]).to(device)
    episodes_returns[0] = torch.Tensor(episodes_returns[0]).to(device)

    episodes_states = torch.stack(episodes_states)
    episodes_actions = torch.stack(episodes_actions)
    episodes_returns = torch.stack(episodes_returns)
    episodes_len = torch.Tensor(episodes_len).to(device)[:, None, None]

    tensor_dataset = torch.utils.data.TensorDataset(episodes_states, episodes_actions, episodes_returns, episodes_len)
    all_indices = np.arange(len(episodes_states))
    test_sampler = SubsetRandomSampler(all_indices)
    test_dl = DataLoader(tensor_dataset, batch_size, sampler=test_sampler)
    return test_dl, max_len

def test_undiscounted(model, memory, max_len, gru_size, num_tau_sample, device, gvd_name):
    total_loss = 0
    count = 0
    model.eval()
    dists = []
    mcs = []
    for s_batch, a_batch, mc_returns, _ in memory:
        h_gvfs = None
        for i in range(max_len):
            s, a, mc_return = s_batch[:, i, :], a_batch[:, i, :], mc_returns[:, i, gvd_name]
            if h_gvfs is None:
                h_gvfs = torch.zeros(len(s) * num_tau_sample, gru_size)

            distributional_return, loss, h_gvfs = RecurrentIQN.test_model(model, h_gvfs.detach().to(device), s, a, mc_return, len(s), num_tau_sample, device)
            dists.append(distributional_return.squeeze(0).detach().cpu().numpy())
            mcs.append(mc_return.item())
            total_loss += loss
            count += 1
    return mcs, dists

def plot_rgvd_accuracy(results, result_folder, horizon, info, plot_dist=True):
    fig, axs = plt.subplots(math.ceil(len(results) / 3), 3, figsize=(20, 20))
    r, c = 0, 0
    for key in results.keys():
        if plot_dist:
            axs[r, c].plot(results[key][1], color='limegreen')
            # for i in range(len(results[key][1])):
            #     scattered_dist = np.zeros((len(results[key][1][i]))) + i
            #     axs[r, c].scatter(scattered_dist, results[key][1][i], color='limegreen', s=10)
        else:
            axs[r, c].plot(np.array(results[key][1]).mean(axis=1), color='limegreen')
            axs[r, c].plot(np.array(results[key][1]).max(axis=1), color='palegreen')
            axs[r, c].plot(np.array(results[key][1]).min(axis=1), color='palegreen')
        axs[r, c].plot(results[key][0], color='teal')
        axs[r, c].set(xlabel='step', ylabel='return')
        axs[r, c].set_title("GVD: " + key.split("_")[0])
        if r < math.ceil(len(results) / 3) - 1:
            r += 1
        else:
            c += 1
            r = 0
    if plot_dist:
        labels = ["actual MC returns", "rGVD returns"]
        fig.legend(labels=labels, labelcolor=['teal', 'limegreen'], handlelength=0)
    else:
        labels = ["actual MC returns", "rGVD returns mean", "rGVD returns min & max"]
        fig.legend(labels=labels, labelcolor=['teal', 'limegreen', 'palegreen'], handlelength=0)
    # fig.show()
    fig.suptitle("Recurrent GVD accuracy\nhorizon: " + str(horizon) + "\n")
    fig.tight_layout()
    fig.savefig(os.path.join(result_folder, "rGVD_accuracy_" + info + "_h" + str(horizon) + ".png"))

Once the defining step is completed, we shall start the test:

In [None]:
print("Loading GVD training data!")
with open(data_path) as f:
    train_memory = json.load(f)
print("GVD training data loaded!")
test_results = {}
print("Loading GVD test data!")
with open(test_data_path) as f:
    memory = json.load(f)
memory = memory[random.randint(0, len(memory) - 1)]
# memory = memory[45]
# memory = memory[33]
print("GVD test data loaded!")
test_rb, max_len = construct_test_gvd_data_undiscounted(num_inputs, memory, train_memory, batch_size,
                                                        horizon[0], device, target_return_type)
for rgvd_name in gvd_names:
    recurrent_gvd_model = RecurrentIQN(num_inputs, num_outputs, gru_units, quantile_embedding_dim,
                                       num_quantile_sample, device)
    gvd_path = os.path.join(iqn_base_path, env_name,
                            rgvd_name + "_gvd_" + target_return_type + "_h_" + str(horizon[0]) + ".pt")
    recurrent_gvd_model.load_state_dict(torch.load(gvd_path, map_location=device))
    recurrent_gvd_model.to(device)
    recurrent_gvd_model.eval()

    actual_returns, dist_returns = test_undiscounted(recurrent_gvd_model, test_rb, max_len, gru_units,
                                                     num_tau_sample, device, int(rgvd_name))

    test_results[rgvd_name] = (actual_returns, dist_returns)
plot_rgvd_accuracy(test_results, os.path.join(iqn_base_path, env_name), horizon[0],
                   target_return_type, plot_dist=True)


In the last step, using the predictors, we want to detect anomalies happening in an anomalous environment. But before that,
we need to have trajectories representing the anomalous systems. This can be done simply by just going to the first step
 and instead of using the nominal system configuration file, use the noisy system configuration. Once the noisy data are
generated and stored, anomaly detection can be started.

In [None]:
def local_outlier_factor(distribution, actual_return):
    lof = LocalOutlierFactor(n_neighbors=8)
    lof.fit_predict(np.append(distribution, actual_return).reshape(-1, 1))
    score = abs(lof.negative_outlier_factor_[-1])
    return score


def k_nearest_neighbors(distribution, actual_return):
    neigh = NearestNeighbors(n_neighbors=8)
    neigh.fit(distribution.reshape(-1, 1))
    distances, indices = neigh.kneighbors(np.array(actual_return).reshape(-1, 1))
    return distances.sum()


def isolation_forest(distribution, actual_return):
    clf = IsolationForest(n_estimators=10, contamination=0.03)
    clf.fit(distribution.reshape(-1, 1))
    score = abs(clf.score_samples(np.array(actual_return).reshape(-1, 1)))[0]
    return score
    # return 0


def measure_as(as_method, value_dist, ac_return):
    if as_method == "lof":
        score = local_outlier_factor(value_dist, round(ac_return, 5))
    elif as_method == "knn":
        score = k_nearest_neighbors(value_dist, round(ac_return, 5))
    elif as_method == "iforest":
        score = isolation_forest(value_dist, round(ac_return, 5))
    else:
        assert False, "Anomaly score measuring method is not given properly! Check '--score_calc_method'!"
    return score


def anomaly_detection(all_models, memory, max_len, gru_size, num_tau_sample, device, as_method, h, merge_type):
    ep_scores = {}
    ep_dists = {}
    mcs = {}
    hs_dict = {}
    for _, rgvd_name in all_models:
        ep_scores[rgvd_name] = []
        ep_dists[rgvd_name] = []
        mcs[rgvd_name] = []
        hs_dict[rgvd_name] = torch.zeros(num_tau_sample, gru_size)

    for s_batch, a_batch, mc_returns, _ in memory:
        for i in range(max_len):
            s, a, mc_return = s_batch[:, i, :], a_batch[:, i, :], mc_returns[:, i]

            for rgvd_model, rgvd_name in all_models:
                distributional_return, h_gvfs = RecurrentIQN.feed_forward(rgvd_model, hs_dict[rgvd_name].detach().to(device),
                                                                          s, len(s), num_tau_sample)
                hs_dict[rgvd_name] = h_gvfs
                anomaly_score = measure_as(as_method, distributional_return.squeeze(0).detach().cpu().numpy(),
                                           mc_return.squeeze(0)[int(rgvd_name)].item())
                ep_scores[rgvd_name].append(anomaly_score)
                ep_dists[rgvd_name].append(distributional_return.squeeze(0).detach().cpu().numpy())
                mcs[rgvd_name].append(mc_return.squeeze(0)[int(rgvd_name)].item())

    if merge_type == "avg":
        scores_merged = np.zeros(max_len)
        for key, values in ep_scores.items():
            scores_merged += np.array(values).copy()
    elif merge_type == "max":
        scores_merged = []
        for key, values in ep_scores.items():
            scores_merged.append(values)
        scores_merged = np.array(scores_merged).max(axis=0)
    return ep_scores, scores_merged, mcs, ep_dists

def merged_confusion_matrix(nominal_scores, anom_scores):
    scores = np.append(nominal_scores, anom_scores)

    norm_labels = np.zeros(len(nominal_scores))
    anorm_labels = np.ones(len(anom_scores))

    labels = np.append(norm_labels, anorm_labels)

    fpr, tpr, thresholds = roc_curve(labels, scores)
    auc = sklearn.metrics.auc(fpr, tpr)
    results = (fpr, tpr, thresholds, auc)
    return results

def separated_confusion_matrix(nominal_scores, anom_scores):
    results = {}
    for key in nominal_scores.keys():
        scores = np.append(nominal_scores[key], anom_scores[key])

        norm_labels = np.zeros(len(nominal_scores[key]))
        anorm_labels = np.ones(len(anom_scores[key]))

        labels = np.append(norm_labels, anorm_labels)

        fpr, tpr, thresholds = roc_curve(labels, scores)
        auc = sklearn.metrics.auc(fpr, tpr)
        results[key] = (fpr, tpr, thresholds, auc)
    return results


batch_size = 1
print("Loading noisy data!")
with open(noisy_data_path) as f:
    anomalous_memory = json.load(f)
print("Noisy data loaded!")
print("Loading GVD training data!")
with open(data_path) as f:
    train_memory = json.load(f)
print("GVD training data loaded!")
print("Loading nominal data!")
with open(test_data_path) as f:
    nominal_memory = json.load(f)
print("Nominal data loaded!")
all_data_merged_results = []
all_data_separated_results = []
for data_index in range(len(anomalous_memory)):
    single_anomalous_memory = anomalous_memory[data_index]
    single_nominal_memory = nominal_memory[data_index]
    merged_results = {}
    separated_results = {}
    for h in horizon:
        all_rgvd_models = []
        for rgvd_name in gvd_names:
            recurrent_gvd_model = RecurrentIQN(num_inputs, num_outputs, gru_units, quantile_embedding_dim,
                                               num_quantile_sample, device)
            gvd_path = os.path.join(iqn_base_path, env_name,
                                    rgvd_name + "_gvd_" + target_return_type + "_h_" + str(h) + ".pt")
            recurrent_gvd_model.load_state_dict(torch.load(gvd_path, map_location=device))
            recurrent_gvd_model.to(device)
            recurrent_gvd_model.eval()
            all_rgvd_models.append((recurrent_gvd_model, rgvd_name))

        anomalous_rb, anom_max_len = construct_test_gvd_data_undiscounted(num_inputs, single_anomalous_memory, train_memory,
                                                                          batch_size, h, device, target_return_type)

        rgvds_anomaly_scores_a, merged_anomaly_scores_a, mcs_anom, dists_anom = anomaly_detection(all_rgvd_models, anomalous_rb, anom_max_len,
                                                                            gru_units, num_tau_sample, device,
                                                                            score_calc_method, h, merge_type)

        nominal_rb, nom_max_len = construct_test_gvd_data_undiscounted(num_inputs, single_nominal_memory, train_memory,
                                                                       batch_size, h, device, target_return_type)

        rgvds_anomaly_scores_n, merged_anomaly_scores_n, mcs_nom, dists_nom = anomaly_detection(all_rgvd_models, nominal_rb, nom_max_len,
                                                                            gru_units, num_tau_sample, device,
                                                                            score_calc_method, h, merge_type)
        merged_results[h] = merged_confusion_matrix(merged_anomaly_scores_n, merged_anomaly_scores_a)
        separated_results[h] = separated_confusion_matrix(rgvds_anomaly_scores_n, rgvds_anomaly_scores_a)
        data_comparison = {}
        for rgvd_name in gvd_names:
            anom_mcs = []
            nom_mcs = []
            for _, _, mc_returns, _ in anomalous_rb:
                for i in range(min(nom_max_len, anom_max_len)):
                    mc_return = mc_returns[:, i, int(rgvd_name)]
                    anom_mcs.append(mc_return.item())
            for _, _, mc_returns, _ in nominal_rb:
                for i in range(min(nom_max_len, anom_max_len)):
                    mc_return = mc_returns[:, i, int(rgvd_name)]
                    nom_mcs.append(mc_return.item())
            data_comparison[rgvd_name] = (nom_mcs, anom_mcs)
        print("Processing for h =", str(h), "is done! Move on to the next step!")
    all_data_merged_results.append(merged_results)
    all_data_separated_results.append(separated_results)

each_horizon_merged_auc = {}
each_horizon_separated_auc = {}
for h in horizon:
    each_horizon_merged_auc[h] = []
    for item in all_data_merged_results:
        each_horizon_merged_auc[h].append(item[h][3])
    each_horizon_separated_auc[h] = []
    for gvd_n in gvd_names:
        tmp_storage = []
        for item in all_data_separated_results:
            tmp_storage.append(item[h][gvd_n][3])
        each_horizon_separated_auc[h].append(tmp_storage)

print("Number of runs:", len(anomalous_memory))
for h in horizon:
    print("-------- horizon", h, "--------")
    print("Max combined AUC:", round(np.array(each_horizon_merged_auc[h]).max(), 3))
    print("Min combined AUC:", round(np.array(each_horizon_merged_auc[h]).min(), 3))
    print("Average combined AUC:", round(np.array(each_horizon_merged_auc[h]).mean(), 3))
    print("Individual feature with max AUC (feature #, AUC):", (np.array(each_horizon_separated_auc[h]).mean(axis=1).argmax(),
                                                                round(np.array(each_horizon_separated_auc[h]).mean(axis=1).max(), 3)))
    print("Individual feature with min AUC (feature #, AUC):", (np.array(each_horizon_separated_auc[h]).mean(axis=1).argmin(),
                                                                round(np.array(each_horizon_separated_auc[h]).mean(axis=1).min(), 3)))
    print("Individual features' AUCs (feature #, AUC):", end=' ')
    for k in range(len(each_horizon_separated_auc[h])):
        print((k, round(np.array(each_horizon_separated_auc[h][k]).mean(), 3)), end=' ')
    print("\n")
