In [1]:
import numpy as np
import pandas as pd

import sys
import tensorflow as tf

sys.path.append('..')
sys.path.append('../..')

from src.Environment.environment import MyModelSelectionEnv
from src.utils import train_test_anomaly
from src.Environment import trainer

2024-03-16 12:15:42.445251: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-16 12:15:42.491794: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-16 12:15:42.491836: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-16 12:15:42.493081: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-16 12:15:42.500339: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-16 12:15:42.501226: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [2]:
import tf_agents.bandits.agents as bandit_agents
from tf_agents.specs import array_spec, tensor_spec
from tf_agents.trajectories import time_step as ts
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import tf_py_environment
from tf_agents.trajectories import StepType, restart
from tf_agents.trajectories import policy_step
from tf_agents.networks import q_network
from tf_agents.agents import data_converter

### Importing Data and Setting Up the Environment

In [3]:
file_path = '../datasets/Dodgers/101-freeway-traffic.test.out'

columns = ['value', 'anomaly']

df = pd.read_csv(file_path, names=columns, header=None)

In [4]:
_, test_data = train_test_anomaly(df)

list_threshold = [-0.03, +5]
list_gtruth = test_data['anomaly']

In [5]:
test_np = test_data['value'].values.reshape(-1, 1)

In [6]:
env = MyModelSelectionEnv(test_data, list_thresholds=list_threshold, list_gtruth=list_gtruth)
environment = tf_py_environment.TFPyEnvironment(env) # Converts the PyEnvironment to TFEnvironment


### Setting Up the Neural Epsilon Greedy Agent

In [7]:
action_spec = environment.action_spec()
observation_spec = environment.time_step_spec().observation


EPSILON = 0.1
LAYERS = (50, 50, 50)
LR = 0.005

TRAINING_LOOPS = 500
STEPS_PER_LOOP = 1

network = q_network.QNetwork(
          input_tensor_spec=observation_spec,
          action_spec=action_spec,
          fc_layer_params=LAYERS
      )

In [8]:
environment.time_step_spec()

TimeStep(
{'step_type': TensorSpec(shape=(), dtype=tf.int32, name='step_type'),
 'reward': TensorSpec(shape=(), dtype=tf.float32, name='reward'),
 'discount': BoundedTensorSpec(shape=(), dtype=tf.float32, name='discount', minimum=array(0., dtype=float32), maximum=array(1., dtype=float32)),
 'observation': TensorSpec(shape=(159,), dtype=tf.float64, name='observation')})

In [13]:

eps_agent = bandit_agents.neural_epsilon_greedy_agent.NeuralEpsilonGreedyAgent(action_spec=action_spec, time_step_spec=environment.time_step_spec(), reward_network=network ,optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=LR),
        epsilon=EPSILON,
        emit_policy_info='predicted_rewards_mean',
        info_fields_to_inherit_from_greedy=['predicted_rewards_mean'])

eps_agent.initialize()

TRAINING_LOOPS = 500
STEPS_PER_LOOP = 1

In [14]:
driver = dynamic_step_driver.DynamicStepDriver(
      env=environment,
      policy=eps_agent.collect_policy,
      num_steps=STEPS_PER_LOOP * environment.batch_size
  )

In [15]:
driver.run()

*** Feature extraction started ***



*** Feature extraction finished ***


(TimeStep(
 {'step_type': <tf.Tensor: shape=(1,), dtype=int32, numpy=array([1], dtype=int32)>,
  'reward': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-0.17], dtype=float32)>,
  'discount': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>,
  'observation': <tf.Tensor: shape=(1, 159), dtype=float64, numpy=
 array([[ 2.92070000e+04,  9.16500000e+00,  2.92070000e+04,
          5.96061224e+04,  2.37800000e-01,  1.00000000e+01,
          4.00000000e+01, -1.00000000e+00,  3.20000000e+01,
          2.00000000e-02,  4.00000000e-02,  6.00000000e-02,
          8.00000000e-02,  1.00000000e-01,  1.20000000e-01,
          1.40000000e-01,  1.60000000e-01,  1.80000000e-01,
          2.00000000e-01,  7.04900000e-01,  1.78300000e-01,
          2.55940000e+00,  1.45690000e+00,  4.97220000e+00,
          1.88210000e+00,  5.43070000e+00,  2.27450000e+00,
          2.16000000e-01,  1.93740000e+00,  3.50660000e+00,
          1.43680000e+00,  1.47192000e+01,  1.00809000e+01,
 

### Training the Agent

In [None]:
output_dir = f'../agent_outputs'



trainer.train(
    root_dir=output_dir,
    agent=eps_agent,
    environment=environment,
    training_loops=TRAINING_LOOPS,
    steps_per_loop=STEPS_PER_LOOP,
    save_policy=True
  )

### Setting Up LinUCB Agent

In [None]:
AGENT_ALPHA = 10.0

linucb_agent = bandit_agents.lin_ucb_agent.LinearUCBAgent(
        time_step_spec=environment.time_step_spec(),
        action_spec=environment.action_spec(),
        tikhonov_weight=0.001,
        alpha=AGENT_ALPHA,
        dtype=tf.float32
    )

In [None]:
output_dir = f'../agent_outputs/lin_UCB'

TRAINING_LOOPS = 500
STEPS_PER_LOOP = 2

trainer.train(
    root_dir=output_dir,
    agent=linucb_agent,
    environment=environment,
    training_loops=TRAINING_LOOPS,
    steps_per_loop=STEPS_PER_LOOP,
    save_policy=True
  )

### Evaluating the Policy

In [None]:
output_dir = f'../agent_outputs/policy_194/'

In [None]:
saved_policy = tf.saved_model.load(output_dir)

In [None]:
policy_state = saved_policy.get_initial_state(batch_size=1)
policy_state

time_step = 0

In [None]:
saved_policy.signatures

In [None]:
feat_tf = env.reset()

In [None]:
feat_np = env._observe()

In [None]:
step_type = tf.constant(0, dtype=tf.int32)
reward = tf.constant(0.0, dtype=tf.float32)
discount = tf.constant(1.0, dtype=tf.float32)
observation = tf.expand_dims(tf.convert_to_tensor(feat_np, dtype=tf.float64), axis=0)

In [None]:
time_step = ts.TimeStep(step_type, reward, discount, observation)

In [1]:
step_type_1 = tf.constant(0, dtype=tf.int32, shape=(1,))
reward_1 = tf.constant(0, dtype=tf.float32, shape=(1,))
discount_1 = tf.constant(0, dtype=tf.float32, shape=(1,))
observation_1 = tf.random.uniform((159,), minval=0.0, maxval=1.0, dtype=tf.float64)

NameError: name 'tf' is not defined

In [None]:
time_step_1 = (step_type_1, reward_1, discount_1,observation)

In [None]:
policy_step = saved_policy.action(time_step_1, policy_state)