In [1]:
import abc
import numpy as np
import tensorflow as tf
from typing import Optional, Text, List, Dict, Tuple, Union
import pickle
import os
from sklearn.preprocessing import MinMaxScaler

from tf_agents.agents import tf_agent
from tf_agents.drivers import driver
from tf_agents.environments import py_environment
from tf_agents.policies import tf_policy
from tf_agents.specs import array_spec
from tf_agents.specs import tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.trajectories import trajectory
from tf_agents.trajectories import policy_step
from tf_agents.typing import types

import pandas as pd

2024-01-29 11:54:07.656945: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-29 11:54:07.710237: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-29 11:54:07.710275: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-29 11:54:07.711677: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-29 11:54:07.719086: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-29 11:54:07.719828: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [2]:
import sys
sys.path.append('..')
sys.path.append('../..')
from Components.feature_extractor import FeatureExtractor
from Components.data_processing import data_process

In [3]:
class BanditPyEnvironment(py_environment.PyEnvironment):
  """Base class for Bandit Python environments.

  Every bandit Python environment should derive from this class.
  Subclasses need to implement functions _observe() and _apply_action().

  Usage:

  To receive the first observation, the environment's reset() function should be
  called. To take an action, use the step(action) function. The time step
  returned by step(action) will contain the reward and the next observation.
  """

  def __init__(
      self,
      observation_spec: types.NestedArray,
      action_spec: types.NestedArray,
      reward_spec: Optional[types.NestedArray] = None,
      name: Optional[Text] = None,
  ):
    self._observation_spec = observation_spec
    self._action_spec = action_spec
    self._reward_spec = reward_spec
    self._name = name
    super(BanditPyEnvironment, self).__init__()

  def _reset(self) -> ts.TimeStep:
    """Returns a time step containing an observation.

    It should not be overridden by Bandit environment implementations.

    Returns:
      A time step of type FIRST containing an observation.
    """
    return ts.restart(
        self._observe(),
        batch_size=self.batch_size,
        reward_spec=self.reward_spec(),
    )

  def _step(self, action: types.NestedArray) -> ts.TimeStep:
    """Returns a time step containing the reward for the action taken.

    The returning time step also contains the next observation.
    It should not be overridden by bandit environment implementations.

    Args:
      action: The action taken by the Bandit policy.

    Returns:
      A time step of type LAST containing the reward for the action taken and
      the next observation.
    """
    # This step will take an action and return a reward.
    reward = self._apply_action(action)
    return ts.termination(self._observe(), reward)

  def action_spec(self) -> types.NestedArraySpec:
    return self._action_spec

  def observation_spec(self) -> types.NestedArraySpec:
    return self._observation_spec

  def reward_spec(self) -> types.NestedArraySpec:
    return self._reward_spec

  def _empty_observation(self):
    return tf.nest.map_structure(
        lambda x: np.zeros(x.shape, x.dtype), self.observation_spec()
    )

  @abc.abstractmethod
  def _apply_action(self, action: types.NestedArray) -> types.Float:
    """Applies `action` to the Environment and returns the corresponding reward.

    Args:
      action: A value conforming action_spec that will be taken as action in the
        environment.

    Returns:
      A float value that is the reward received by the environment.
    """

  @abc.abstractmethod
  def _observe(self) -> types.NestedArray:
    """Returns an observation."""

  @property
  def name(self) -> Optional[Text]:
    return self._name

### Multi-Armed Bandit Environment for Model Selction in Anomaly Detection

To define an environment for MAB Environment, we first need to define our observation and action space.

**Observation Space** : The Observation Space is composed of all the selected features from the time series. Each Subsequence of the time series will be passed to the feature extractor component. This will return the necessary features to the observation space.

**Action Space**: The Action Space consists of the models that will be selected based on the given observation by the agent.

For the agent to have all the necessary information to make a good policy, the following functions need to be implemented.

* **_observe**: This function will call the feature_extractor module and return all the relevant features.

* **_reset**: This function initializes our environment. All the parameters will be reset back to their initial value.

* **_apply_action**: Applies the 'action' given by the agent and returns the corresponding reward.

* **_step**: This will return a time step in the environment. This includes the reward from the current action as well as the next time step.

In [7]:
class InitialEnv(BanditPyEnvironment):

    def __init__(self, list_pred_sc: List[Union[int, float]], list_thresholds: List[float], list_gtruth: List[float], model_path = '../../saved_models/'):

        # Length of the testing data, number of models
        self.len_data = len(list_pred_sc[0])
        self.num_models = len(list_pred_sc)

        #List of ground truth labels
        self.gtruth = list_gtruth
        
        # Get the list of pretrained models
        self.model_path = model_path 
        self.model_list = [f for f in os.listdir(self.model_path) if f.endswith('.sav')]

        # Extract the model names
        self.model_names = [f.split('_')[0] for f in self.model_list]
        
        # Raw scores and thresholds of the testing data
        self.list_pred_sc = list_pred_sc
        self.list_thresholds = list_thresholds

        # Scale the raw scores/thresholds and save each scaler
        self.scaler = []
        self.list_scaled_sc = []
        self.list_scaled_thresholds = []
        for i in range(self.num_models):
            scaler_tmp = MinMaxScaler()
            self.list_scaled_sc.append(scaler_tmp.fit_transform(self.list_pred_sc[i].reshape(-1,1)))
            self.scaler.append(scaler_tmp)
            self.list_scaled_thresholds.append(scaler_tmp.transform(self.list_thresholds[i].reshape(-1,1)))

        # Extract predictions
        self.list_pred = []
        for i in range(self.num_models):
            pred_tmp = np.zeros(self.len_data)
            for length in range(self.len_data):
                if self.list_scaled_sc[i][length] > self.list_scaled_thresholds[i]:
                    pred_tmp[length] = 1
            self.list_pred.append(pred_tmp)

        # Extract prediction-concensus confidence
        self.list_concensus_conf = [] # how many models have predicted 1 (anomaly)
        for length in range(self.len_data):
            num_a_tmp=0 # number of models have predicted 1 (anomaly)
            for i in range(self.num_models):
                if self.list_scaled_sc[i][length] > self.list_scaled_thresholds[i]:
                    num_a_tmp += 1
            self.list_concensus_conf.append(num_a_tmp/self.num_models)

        # Extract distance-to-threshold confidence
        self.dist_conf=[]
        for length in range(self.len_data):
            dist_tmp = []
            for i in range(self.num_models):
                dist_tmp.append(self.list_scaled_sc[i][length] - self.list_scaled_thresholds[i])
            self.dist_conf.append(dist_tmp)

In [6]:
class MyModelSelectionEnv(InitialEnv):

    def __init__(self, list_pred_sc, list_thresholds, list_gtruth, time_series_dir):

        super().__int__(list_pred_sc, list_thresholds, list_gtruth)

        self.time_series = pd.read_csv(time_series_dir)
        self.subsequences = data_process(time_series_dir)

        action_spec = array_spec.BoundedArraySpec(shape=(2,), dtype=np.int32, minimum=0, maximum = 1, name='Select Models')
        observation_spec = array_spec.BoundedArraySpec(shape=(30,), dtype=np.int32, name='observation')

    def _reset(self)

    def _observe(self):

        dodgers_features = FeatureExtractor()
        self._observation = dodgers_features.feature_extractor('Dodgers')
        return self._observation
    
    def _apply_action(self, action):

        if action == 0:
            model = pickle.load(open(f'../../saved_models/iforest_dodgers_v1.sav','rb'))
            feats = self._observe()
            score = model.decision_function(feats)

        elif action == 1:
            model = pickle.load(open(f'../../saved_models/osvm_dodgers_v1.sav', 'rb'))
            feats = self._observe()
            score = model.decision_function(feats)

            reward = self._reward_function(score)
        
        return reward
    
    def _reward_function(self, score):

        if self.gtruth[self.pointer]==1: # If the ground truth is 1 anomaly
            if observation[2]==1: # If the model predicts 1 anomaly correctly - True Positive (TP)
                reward = 1
            else: # If the model predicts 0 normal incorrectly - False Negative (FN)
                reward = -1.5
        else: # If the ground truth is 0 normal
            if observation[2]==1: # If the model predicts 1 anomaly incorrectly - False Positive (FP)
                reward = -0.5
            else: # If the model predicts 0 normal correctly - True Negative (TN)
                reward = 0.1

        return reward