In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import tensorflow as tf
import pandas as pd
import joblib as jb

In [2]:
configuration = tf.compat.v1.ConfigProto()
configuration.gpu_options.allow_growth = True
configuration.gpu_options.per_process_gpu_memory_fraction = 1
session = tf.compat.v1.Session(config=configuration)


In [3]:
model=jb.load('rfDefense2021.sav')
print(model)

RandomForestClassifier(max_depth=50, n_estimators=250, random_state=45)


In [4]:
data=pd.read_csv('datasets/malicious_data_generated.csv')
npdata=data.to_numpy()
malData=np.copy(npdata)
print(type(malData[1,:]))
print(malData[:1].shape)

<class 'numpy.ndarray'>
(1, 10)


In [5]:
class MalwareEnv():
    def __init__(self):
        # Actions we can take, decrease, increse, none
        self.action_space = Discrete(10)
        # max-min array
        self.observation_space = Box(low=-100,high=100,shape=(10,))
        
        # Set start 
        self.state = np.asarray(malData[random.randint(0,499),:])
        
        # Set time 
        self.length = 60
        
    def step(self, action):
        # Apply action for each state
        if(action<10):
            if(action==0):
                self.state[0]=round(random.uniform(-0.290698,133.441860),6)
            elif(action==1):
                 self.state[1]=round(random.uniform(0,1184),6)
            elif(action==2):
                 self.state[2]=round(random.uniform(-0.666667,10.666667),6)
            elif(action==3):
                 self.state[3]=round(random.uniform(-0.312383,109.259173),6)
            elif(action==4):
                 self.state[4]=round(random.uniform(0,30),6)
            elif(action==5):
                 self.state[5]=round(random.uniform(-0.322,127.488889),6)
            elif(action==6):
                 self.state[6]=round(random.uniform(-0.282353,147.976471),6)
            elif(action==7):
                 self.state[7]=round(random.uniform(-0.164688,715.616633),6)
            elif(action==8):
                 self.state[8]=round(random.uniform(-0.324081,106.407677),6)
            else:
                 self.state[9]=round(random.uniform(-0.750000,227.5),6)                     
        
        
        self.length -= 1 
            
        
        # Calculate reward in ranges
        
        
        
        result=model.predict(self.state.reshape(1,-1))
        result=np.array2string(result)         
        if(result=="['benign']"):
           reward=1
        else:
            reward=-1
                 
        
        # Check if is done
        if self.length <= 0: 
            done = True
        else:
            done = False
        
        
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        pass
    
    def reset(self):
        # Reset 
        self.state = malData[random.randint(0,498),:]
        # Reset time
        self.length = 60 
        return self.state

In [6]:
env = MalwareEnv()

In [7]:
env.observation_space.sample()

array([-20.437513,  80.77766 ,  18.623774,  87.57744 , -83.37215 ,
       -69.76168 ,  63.0373  , -31.155664, -85.04034 ,  48.737904],
      dtype=float32)

In [8]:
with tf.device('/gpu:0'):
    episodes = 10
    for episode in range(1, episodes+1):
        state = env.reset()
        done = False
        score = 0 
    
        while not done:
            #env.render()
            action = env.action_space.sample()
            n_state, reward, done, info = env.step(action)
            score+=reward
        print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:52
Episode:2 Score:56
Episode:3 Score:60
Episode:4 Score:60
Episode:5 Score:54
Episode:6 Score:50
Episode:7 Score:60
Episode:8 Score:56
Episode:9 Score:58
Episode:10 Score:60


In [9]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
import tensorflow

In [10]:
states = env.observation_space.shape
actions = env.action_space.n

In [11]:
actions

10

In [12]:
states

(10,)

In [13]:
def build_model(states, actions):
    with tf.device('/gpu:0'):
        model = tensorflow.keras.models.Sequential()   
        model.add(tensorflow.keras.layers.Dense(24, activation='relu', input_shape=(None,10,1)))
        model.add(tensorflow.keras.layers.Dense(24, activation='relu'))
        model.add(tensorflow.keras.layers.Dense(10, activation='linear'))
        #model.add(Flatten())

        return model

In [None]:
def build_modelVersion2(states, actions):
    with tf.device('/gpu:0'):
        model = tensorflow.keras.models.Sequential()   
        model.add(tensorflow.keras.layers.Dense(24, activation='relu', input_shape=(None,10,1)))
        model.add(tensorflow.keras.layers.Dense(24, activation='relu'))
        model.add(tensorflow.keras.layers.Dense(10, activation='linear'))
        #model.add(Flatten())

        return model

In [14]:

#with tf.device('/gpu:0'):
model = build_model(states, actions)

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, None, 10, 24)      48        
_________________________________________________________________
dense_1 (Dense)              (None, None, 10, 24)      600       
_________________________________________________________________
dense_2 (Dense)              (None, None, 10, 10)      250       
Total params: 898
Trainable params: 898
Non-trainable params: 0
_________________________________________________________________


In [16]:
from rl.agents import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory


In [17]:
def build_agent(model, actions):
    policy = EpsGreedyQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [20]:
with tf.device('/gpu:0'):
    dqn = build_agent(model, actions)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    dqn.fit(env, nb_steps=10000, visualize=False, verbose=1)

ValueError: Model output "KerasTensor(type_spec=TensorSpec(shape=(None, None, 10, 10), dtype=tf.float32, name=None), name='dense_2/BiasAdd:0', description="created by layer 'dense_2'")" has invalid shape. DQN expects a model that has one dimension for each action, in this case 10.