# Increasing the Notebook Size

In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
# Importing Native Python Libraries
import os
import math
import random
import argparse
import collections
import re
from collections import namedtuple, deque, defaultdict
from itertools import count

# Importing Numpy and Pandas
import numpy as np
import pandas as pd
# import modin.pandas as pd

# Importing Matplotlib
import matplotlib
import matplotlib.pyplot as plt

import seaborn as sns

# Importing TA-Lib
import talib

# Importing PyTorch Libraries
import torch
import torch.utils.data as utils
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.autograd import Variable

# Importing SciKit Learn
from sklearn.preprocessing import StandardScaler, MinMaxScaler, scale

# Importing Gymnasium Libraries
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import Discrete, Box

# Importing TQDM to track cell progress
from tqdm import tqdm

# Ignoring Warnings
# import warnings

# warnings.filterwarnings('ignore')

In [3]:
# Importing Tensorflow Libraries
import tensorflow as tf
from tensorflow import keras
from keras.models import load_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv1D
from tensorflow.keras.optimizers import Adam


# Importing Tensorflow Reinforcement Learning Libraries
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [4]:
# Importing Custom Code
from MainCode.env import Simulator, Window, CustomTensorDataReader

make sure to add .gitignore to temp_directory
file already exist


# Verifying Tensorflow Utilizes GPU

In [5]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


# Initializing Custom Classes Built from Pre-processing Files

In [6]:
# Initializing our class to read our numpy matrix files and merging them
dr = CustomTensorDataReader()

In [7]:
# Initializing our class to manage our environment Window
# This is our login window (1 second time stamp)
wd = Window()

# Showing the Shape of Data

In [8]:
# 62223 Time Stamps
# 1553 Logins for each time stamp (this includes padding entries with 0)
# 17 items per login entry
dr.data.shape

(62223, 1553, 17)

In [9]:
observation, info = wd.reset_window()

In [10]:
observation.shape

(1553, 17)

In [11]:
wd.target_window

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0.], dtype=float32)

In [12]:
wd.current_index

49468

In [13]:
# This is how to access our data
# This accesses the first login, represented by j, of our first time stamp, i
# wd.data[i][j]
wd.data.shape

(62223, 1553, 17)

In [14]:
wd.target_data.shape

(14863575,)

# Converting Numpy Arrays to Tensors

In [101]:
# Tensor Conversion
# wd.target_data = tf.convert_to_tensor(wd.target_data)
# wd.data = tf.convert_to_tensor(wd.data)


In [15]:
if (1 not in wd.target_data):
    print('Not in')
else:
    print("In")

# if (np.any(wd.target_data, 1) == True):
#     print('Got it')
# else:
#     print('damn')

In


# Creating Custom Environment

In [16]:
class NetworkEnv(gym.Env):
    def __init__(self):
        # Actions we can take: We can Flag an item, Continue to the next item, or Finish/Hold once we have analyzed the current log/activity
        self.action_space = Discrete(3)

        # Time Stamp Login Array
        self.observation_space = Box(low=0, high=1552, shape=(1553,17), dtype=int)
#         self.observation_space = Box(low= np.array([0]), high = np.array([1552]), shape=(1553,17), dtype=int)
        
        # Set Starting Login Entry
        self.state = random.randint(0,1552)
        
        # Set Network Log Length
        self.log_length = 1553
        
        # Set Network Health
        self.network_health = 100

        
    def step(self, action):
        # Apply Actions
        # -1 If we have run out of items in the list we want to declare that we are finished OR if we are in a live environment we want to hold until our next activity
        # 0 If we flag the item we want to perform an action before we continue OR if we are holding we want to flag when we start
        # 1 If we don't detect abnormal behavior we want to continue to process the next item in the list
        self.state = action
        
        # Decreamenting the length of our log by 1
        self.log_length -= 1 
        
        # Calculating Reward
        # Cases which our agent is correct
        if ((self.state == 1) and (1 in wd.target_window)) or ((self.state == 0) and (1 not in wd.target_window)):
            reward = 1 
        # Cases which our agent is incorrect    
        else:
            reward = -1
            self.network_health -= 25
            
            
        if self.log_length == 0 or self.network_health <= 0:
            done = True
        else:
            done = False
        
        # Placeholder for info
        info = {}
    
        return self.state, reward, done, info
    
    def render(self):
        pass
    
    def reset(self):
        # Reset our environment
        self.state = random.randint(0,1552)
        # Reset our log length
        self.log_length = 1553
        # Reset our network health
        self.network_health = 100
        
        return self.state


# Setting env Variable to Custom Env

In [17]:
env = NetworkEnv()

In [18]:
env.observation_space.sample()

array([[ 521,  134,  594, ...,  429, 1015,  895],
       [ 249,  655, 1189, ..., 1228, 1144, 1300],
       [1438, 1153, 1489, ...,  549,  582,  406],
       ...,
       [ 334, 1456,  395, ..., 1265,   70, 1545],
       [1069,  202,   64, ...,  947,  987,  373],
       [ 822,  834,  820, ...,  312,  731,  592]])

# Testing Custom Environment

In [19]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
#     network_health = 100
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
#         network_health += health
    print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:-4
Episode:2 Score:0
Episode:3 Score:-4
Episode:4 Score:-2
Episode:5 Score:-1
Episode:6 Score:-3
Episode:7 Score:-4
Episode:8 Score:-1
Episode:9 Score:-3
Episode:10 Score:-2


# Building Replay Buffer

In [20]:
class ReplayBuffer():
    def __init__(self, max_size, input_dims):
        self.mem_size = max_size
        self.mem_cntr = 0

        self.state_memory = np.zeros((self.mem_size, *input_dims), 
                                    dtype=np.float32)
        self.new_state_memory = np.zeros((self.mem_size, *input_dims),
                                dtype=np.float32)
        self.action_memory = np.zeros(self.mem_size, dtype=np.int32)
        self.reward_memory = np.zeros(self.mem_size, dtype=np.float32)
        self.terminal_memory = np.zeros(self.mem_size, dtype=np.int32)

    def store_transition(self, state, action, reward, state_, done):
        index = self.mem_cntr % self.mem_size
        self.state_memory[index] = state
        self.new_state_memory[index] = state_
        self.reward_memory[index] = reward
        self.action_memory[index] = action
        self.terminal_memory[index] = 1 - int(done)
        self.mem_cntr += 1

    def sample_buffer(self, batch_size):
        max_mem = min(self.mem_cntr, self.mem_size)
        batch = np.random.choice(max_mem, batch_size, replace=False)

        states = self.state_memory[batch]
        states_ = self.new_state_memory[batch]
        rewards = self.reward_memory[batch]
        actions = self.action_memory[batch]
        terminal = self.terminal_memory[batch]

        return states, actions, rewards, states_, terminal

# Building Neural Network

In [88]:
# Alternate implementation to build model which adds layers on top
def build_neuralnet(lr, states, actions, window):
    model = Sequential()
    model.add(Conv1D(24, activation='relu', kernel_size=window, input_shape=states))
    model.add(Conv1D(24, activation='relu', kernel_size=window))
    model.add(Dense(actions, activation=None))
    model.compile(optimizer=Adam(learning_rate=lr), loss='mae')
    return model

In [89]:
# def build_neuralnet(states, actions, lr, window):
#     model = Sequential([
#         Conv1D(24, activation='relu', kernel_size=window, input_shape=states),
#         Conv1D(24, activation='relu', kernel_size=window),
#         Linear(actions, activation=None)
#     ])
#     model.compile(optimizer=Adam(learning_rate=lr), loss='mae')
#     return model

In [90]:
states = env.observation_space.shape
actions = env.action_space.n
print(actions)
print(states)

3
(1553, 17)


In [92]:
# del model # This is here to prevent issues after initial build, uncomment to rebuilding model
model = build_neuralnet(3, states, actions, 17)

In [93]:
model.build()

## Summary of Deep Learning Model

In [94]:
model.summary()

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_11 (Conv1D)          (None, 1537, 24)          6960      
                                                                 
 conv1d_12 (Conv1D)          (None, 1521, 24)          9816      
                                                                 
 dense_15 (Dense)            (None, 1521, 3)           75        
                                                                 
Total params: 16,851
Trainable params: 16,851
Non-trainable params: 0
_________________________________________________________________


# Building Agent

In [95]:
class DQAgent():
    def __init__(self, lr, gamma, actions, epsilon, batch_size, input_dims, epsilon_dec=1e-3, epsilon_end=0.01, mem_size=50000, fname='dqn_model.h5', **kwargs):
        
        # Setting Hyperparameters
        window = 17
        self.action_space = [i for i in range(actions)] # Possible list of actions
        self.gamma = gamma 
        self.epsilon = epsilon
        self.eps_min = epsilon_end
        self.batch_size = batch_size
        self.model_file = fname
        self.memory = ReplayBuffer(mem_size, input_dims)
        self.q_eval = build_neuralnet(lr, actions, input_dims, window)
        
    # Storing our transitions
    def store_transition(self, state, action, reward, new_state, done):
        self.memory.store_transition(state, action, reward, new_state, done)
        
    # Dictating how the Agent chooses an action, random vs maximal reward
    def choose_action(self, observation):
        # If we are in the exploratory phase choose a random action (in our case randomly provide a label to learn)
        # else we make a predicition based off of what we have learned
        if np.random.random() < self.epsilon:
            action = np.random.choice(self.action_space)
        else:
            state = np.array([observation])
            actions = self.q_eval.predict(state)     
            
            # Choosing the "Greediest" action
            action = np.argmax(actions)
            return action
    
    # Controlling Agent's learning
    def learn(self):
        # If we have not completed learning equal to a batch size, we want to continue
        if self.memory.mem_cntr < self.batch_size:
            return
        
        states, actions, rewards, states_, dones = self.memory.sample_buffer(self.batch_size)
        
        q_eval = self.q_eval.predict(states)
        q_eval = self.q_eval.predict(states_)
        
        q_target = np.copy(q_eval)
        batch_index = np.arange(self.batch_size, dtype=np.int32)
        
        # Updating Q target value with actions the agent took
        q_target[batch_index, actions] = rewards + self.gamma * np.max(q_next, axis=1)*dones
        
        self.q_eval.train_on_batch(states, q_target)
        
        self.epsilon = self.epsilon - self.eps_dec if self.epsilon > self.eps_min else self.eps_min
      
    # Saving our model
    def save_model(self):
        self.q_eval.save(self.model_file)
       
    # Loading our model
    def load_model(self):
        self.q_eval = load_model(self.model_file)

In [96]:
episodes = 10
lr = 0.001
agent = DQAgent(lr=lr, gamma=0.99, actions=actions, epsilon=1.0, batch_size=64, input_dims=env.observation_space.shape)
scores = []
eps_history = []

for i in range(1, episodes+1):
    done = False
    score = 0
    observation = env.reset()
    
    while not done:
        action = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        score += reward
        agent.store_transition(observation, action, reward, observation_, done)
        observation = observation_
        agent.learn()
    eps_history.append(agent.epsilon)
    scores.append(score)
    
    avg_score = np.mean(scores[-100:])
    print('episode: ', i, 'score %.2f' % score,
         'average_score %.2f' % avg_score,
         'epsilon %.2f' % agent.epsilon)

TypeError: 'numpy.int64' object is not iterable