#Libraries & random seeds

In [None]:
# read & manipulate data
import pandas as pd 
import numpy as np
import tensorflow as tf

# visualisations
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid', context='notebook')
%matplotlib notebook

# misc
import random as rn

# manual parameters
RANDOM_SEED = 42
TRAINING_SAMPLE = 200000
VALIDATE_SIZE = 0.2

# setting random seeds for libraries to ensure reproducibility
np.random.seed(RANDOM_SEED)
rn.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

In [None]:
#util
import pandas as pd
import random
from sklearn.preprocessing import Normalizer, MinMaxScaler
from sklearn.pipeline import Pipeline
import joblib

# from sklearn.externals.joblib import Memory
from sklearn.datasets import load_svmlight_file
from sklearn.metrics import average_precision_score, roc_auc_score

import time
import datetime

#model
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from sklearn.utils.random import sample_without_replacement
from sklearn.neighbors import KDTree
import copy

# rdp_tree
# import numpy as np
# import random
import os

# train
# import datetime
import platform
import shutil
# import os
import sys
# import time
# import torch


#test
# import datetime
# import platform
# import time

# import numpy as np
# import sys

# import torch

from sklearn.metrics import classification_report

# RDP

In [None]:
# # Load the extension and start TensorBoard
# %load_ext tensorboard
# %tensorboard --logdir logs

## Util

In [None]:
def pure_dataLoading(path, logfile=None, pipeline=None):

    # loading data
    df_og = pd.read_csv(path)

    df = df_og[df_og['Label'] == 'NormalTraffic']
    df.reset_index(drop=True, inplace=True)
    labels = df['Label']
    labels = (labels!='NormalTraffic').astype(int)
    x_df = df.drop(['Src Port',	'Dst Port',	'Protocol','Label', 'Flow ID', 'Src IP', 'Dst IP', 'Timestamp'], axis=1)
    
    x_df.replace([np.inf, -np.inf], -1, inplace=True)
    x_df.replace([np.nan], 0, inplace=True)

    x = x_df.values

    print("Data shape: (%d, %d)" % x.shape)
    if logfile:
        logfile.write("Data shape: (%d, %d)\n" % x.shape)

    if pipeline is None:
      
        # configure our pipeline
        pipeline = Pipeline([('normalizer', Normalizer()),
                            ('scaler', MinMaxScaler())])

        # get normalization parameters by fitting to the training data
        pipeline.fit(x)
        x = pipeline.transform(x)
        joblib.dump(pipeline, 'Preprocess_pipeline_RDP.pkl')

    # transform the data
    x = pipeline.transform(x)

    return x, labels, pipeline

In [None]:
def dataLoading(path, logfile=None, pipeline=None):

    if pipeline is None:
        raise Exception("Pipeline Required")

    # loading data
    df = pd.read_csv(path)
    labels = df['Label']
    labels = (labels!='NormalTraffic').astype(int)
    test_df = df.drop(['Src Port',	'Dst Port',	'Protocol', 'Flow ID', 'Src IP', 'Dst IP', 'Timestamp'], axis=1)
    
    test_df.replace([np.inf, -np.inf], -1, inplace=True)
    test_df.replace([np.nan], 0, inplace=True)

    pivoting_test_df = test_df[test_df['Label'].isin(['NormalTraffic', 'Pivoting'])]

    reconnaissance_test_df = test_df[test_df['Label'].isin(['NormalTraffic', 'Reconnaissance'])]

    lateralmovement_test_df = test_df[test_df['Label'].isin(['NormalTraffic', 'LateralMovement'])]

    dataexfiltration_test_df = test_df[test_df['Label'].isin(['NormalTraffic', 'DataExfiltration'])]

    initialcompromise_test_df = test_df[test_df['Label'].isin(['NormalTraffic', 'InitialCompromise'])]


    for df in [test_df, pivoting_test_df, reconnaissance_test_df, lateralmovement_test_df, dataexfiltration_test_df, initialcompromise_test_df]:
        df.reset_index(drop=True, inplace=True)
        X_test, y_test = df.drop(columns=['Label']), df['Label']

        print(y_test.unique())
        
        y_test = (y_test!='NormalTraffic').astype(int)

        # transform the test set with the pipeline fitted to the training set
        X_test_transformed = pipeline.transform(X_test)

        yield X_test_transformed, y_test

# def dataLoading(path, logfile=None):

#     # loading data
#     df = pd.read_csv(path)
#     labels = df['class']
#     x_df = df.drop(['class'], axis=1)
#     x = x_df.values
#     print("Data shape: (%d, %d)" % x.shape)
#     if logfile:
#         logfile.write("Data shape: (%d, %d)\n" % x.shape)

#     return x, labels

# random sampling with replacement
def random_list(start, stop, length):
    if length >= 0:
        length = int(length)
    start, stop = (int(start), int(stop)) if start <= stop else (int(stop), int(start))
    random_list = []
    for i in range(length):
        random_list.append(random.randint(start, stop))  # including start and stop
    return random_list


def aucPerformance(scores, labels, logfile=None):
    roc_auc = roc_auc_score(labels, scores)
#    print(roc_auc)
    ap = average_precision_score(labels, scores)
    print("AUC-ROC: %.4f, AUC-PR: %.4f" % (roc_auc, ap))
    if logfile:
        logfile.write("AUC-ROC: %.4f, AUC-PR: %.4f\n" % (roc_auc, ap))

#    plt.title('Receiver Operating Characteristic')
#    plt.plot(fpr, tpr, label='AUC = %0.4f'% roc_auc)
#    plt.legend(loc='lower right')
#    plt.plot([0,1],[0,1],'r--')
#    plt.xlim([-0.001, 1])
#    plt.ylim([0, 1.001])
#    plt.ylabel('True Positive Rate')
#    plt.xlabel('False Positive Rate')
#    plt.show();

    return roc_auc, ap


def tic_time():
    print("=====================================================")
    tic_datetime = datetime.datetime.now()
    print("tic_datetime:", tic_datetime)
    print("tic_datetime.strftime:", tic_datetime.strftime('%Y-%m-%d %H:%M:%S.%f'))
    tic_walltime = time.time()
    print("tic_walltime:", tic_walltime)
    tic_cpu = time.time()
    print("tic_cpu:", tic_cpu)
    print("=====================================================\n")


## Model

In [None]:
MAX_GRAD_NORM = 0.1  # clip gradient
LR_GAMMA = 0.1
LR_DECAY_EPOCHS = 5000
cos_activation = False

# the init method switch only controls RN
init_method = 'kaiming'
# init_method = 'rn_orthogonal'
# init_method = 'rn_uniform'
# init_method = 'rn_normal'

MAX_INT = np.iinfo(np.int32).max
MAX_FLOAT = np.finfo(np.float32).max


class RTargetNet(nn.Module):
    def __init__(self, in_c, out_c):
        super(RTargetNet, self).__init__()
        # architecture def
        c = in_c
        layers = []

        for h in [out_c]:
            layers.append(nn.Linear(c, h))
            if not cos_activation and init_method != 'rn_orthogonal':
                layers.append(nn.LeakyReLU(inplace=True, negative_slope=2.5e-1))
            c = h

        self.layers = nn.Sequential(*layers)

        # init
        for m in self.modules():
            if isinstance(m, nn.Linear):
                if cos_activation:
                    stdv = 1. / math.sqrt(m.weight.size(1))
                    m.weight.data.normal_(std=stdv)
                    if m.bias is not None:
                        # m.bias.data.normal_(std=stdv)
                        m.bias.data.uniform_(0, math.pi)
                elif init_method == 'kaiming':
                    nn.init.kaiming_normal_(m.weight)
                    nn.init.constant_(m.bias, 0.0)
                elif init_method == 'rn_orthogonal':
                    nn.init.orthogonal_(m.weight, gain=np.sqrt(2))
                    nn.init.constant_(m.bias, 0.0)
                elif init_method == 'rn_uniform':
                    stdv = 1. / math.sqrt(m.weight.size(1))
                    m.weight.data.uniform_(-stdv, stdv)
                    if m.bias is not None:
                        m.bias.data.uniform_(-stdv, stdv)
                elif init_method == 'rn_normal':
                    stdv = 1. / math.sqrt(m.weight.size(1))
                    m.weight.data.normal_(std=stdv)
                    if m.bias is not None:
                        m.bias.data.normal_(std=stdv)
                else:
                    raise ValueError('could not find init_method %s' % init_method)

    def forward(self, x):
        x = self.layers(x)
        if cos_activation:
            x = torch.cos(x)
        return x


class RNet(nn.Module):
    def __init__(self, in_c, out_c, dropout_r):
        super(RNet, self).__init__()

        # architecture def
        c = in_c
        layers = []

        for h in [out_c]:
            layers.append(nn.Linear(c, h))
            # if not cos_activation:
            if True:
                layers.append(nn.LeakyReLU(negative_slope=2e-1, inplace=True))
            layers.append(nn.Dropout(dropout_r))
            c = h

        self.layers = nn.Sequential(*layers)

        # one more layer than target network for enough capacity
        self.fc2 = nn.Linear(out_c, out_c)

        # init
        for m in self.modules():
            if isinstance(m, nn.Linear):
                if True:
                    nn.init.kaiming_normal_(m.weight)
                    nn.init.constant_(m.bias, 0.0)
                else:
                    stdv = 1. / math.sqrt(m.weight.size(1))
                    # m.weight.data.uniform_(-stdv, stdv)
                    m.weight.data.normal_(std=stdv)
                    if m.bias is not None:
                        # m.bias.data.uniform_(-stdv, stdv)
                        m.bias.data.normal_(std=stdv)

    def forward(self, x):
        x = self.layers(x)
        # if cos_activation:
        if False:
            x = torch.cos(x)
        return x


class RDP_Model:
    def __init__(self, in_c, out_c, logfile=None, USE_GPU=False, LR=1e-4, dropout_r=0.2):
        self.r_target_net = RTargetNet(in_c, out_c)
        self.r_net = RNet(in_c, out_c, dropout_r)
        self.USE_GPU = USE_GPU
        self.LR = LR
        self.logfile = logfile

        print(self.r_target_net)
        if self.logfile:
            self.logfile.write(str(self.r_target_net))
        print(self.r_net)
        if self.logfile:
            self.logfile.write(str(self.r_net))

        if USE_GPU:
            self.r_target_net = self.r_target_net.cuda()
            self.r_net = self.r_net.cuda()

        # define optimizer for predict network
        # self.r_net_optim = torch.optim.Adam(self.r_net.parameters(), lr=LR)
        self.r_net_optim = torch.optim.SGD(self.r_net.parameters(), lr=LR, momentum=0.9)

        self.epoch = 0

    def train_model(self, x, epoch):
        self.r_net.train()

        x_random = copy.deepcopy(x)
        np.random.shuffle(x_random)
        x_random = torch.FloatTensor(x_random)
        if self.USE_GPU:
            x_random = x_random.cuda()

        x = torch.FloatTensor(x)

        if self.USE_GPU:
            x = x.cuda()

        if epoch % LR_DECAY_EPOCHS == 0 and self.epoch != epoch:
            self.adjust_learning_rate()
            self.epoch = epoch

        r_target = self.r_target_net(x).detach()
        r_pred = self.r_net(x)
        gap_loss = torch.mean(F.mse_loss(r_pred, r_target, reduction='none'), dim=1).mean()

        r_target_random = self.r_target_net(x_random).detach()
        r_pred_random = self.r_net(x_random)

        xy = (F.normalize(r_target, p=1, dim=1) * F.normalize(r_target_random, p=1, dim=1)).sum(dim=1)
        x_y_ = (F.normalize(r_pred, p=1, dim=1) * F.normalize(r_pred_random, p=1, dim=1)).sum(dim=1)
        pair_wise_loss = F.mse_loss(xy, x_y_)

        loss = gap_loss + pair_wise_loss

        self.r_net_optim.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(self.r_net.parameters(), MAX_GRAD_NORM)
        self.r_net_optim.step()
        return gap_loss.data.cpu().numpy()

    def eval_model(self, x):
        self.r_net.eval()
        x_random = copy.deepcopy(x)
        np.random.shuffle(x_random)

        x = torch.FloatTensor(x)
        x_random = torch.FloatTensor(x_random)

        if self.USE_GPU:
            x = x.cuda()
            x_random = x_random.cuda()

        r_target = self.r_target_net(x)
        r_pred = self.r_net(x)
        gap_loss = torch.mean(F.mse_loss(r_pred, r_target, reduction='none'), dim=1)

        r_target_random = self.r_target_net(x_random).detach()
        r_pred_random = self.r_net(x_random)

        xy = F.normalize(r_target, p=1, dim=1) * F.normalize(r_target_random, p=1, dim=1)
        x_y_ = F.normalize(r_pred, p=1, dim=1) * F.normalize(r_pred_random, p=1, dim=1)
        pair_wise_loss = torch.mean(F.mse_loss(xy, x_y_, reduction='none'), dim=1)
        scores = gap_loss + pair_wise_loss
        return scores.data.cpu().numpy()

    def eval_model_lesinn(self, x):
        self.r_net.eval()
        x = torch.FloatTensor(x)

        if self.USE_GPU:
            x = x.cuda()

        r_pred = self.r_net(x)
        scores = self.lesinn(r_pred.data.cpu().numpy())
        return scores.squeeze()

    def lesinn(self, x_train):
        rng = np.random.RandomState(42)
        ensemble_size = 50
        subsample_size = 8
        scores = np.zeros([x_train.shape[0], 1])
        # for reproductibility purpose
        seeds = rng.randint(MAX_INT, size=ensemble_size)
        for i in range(0, ensemble_size):
            rs = np.random.RandomState(seeds[i])
            #        sid = np.random.choice(x_train.shape[0], subsample_size)
            sid = sample_without_replacement(n_population=x_train.shape[0], n_samples=subsample_size, random_state=rs)
            subsample = x_train[sid]
            kdt = KDTree(subsample, metric='euclidean')
            dists, indices = kdt.query(x_train, k=1)
            scores += dists
        scores = scores / ensemble_size
        return scores

    def adjust_learning_rate(self):
        self.LR *= LR_GAMMA
        print(' * adjust C_LR == {}'.format(self.LR))
        if self.logfile:
            self.logfile.write(' * adjust C_LR == {}\n'.format(self.LR))

        for param_group in self.r_net_optim.param_groups:
            param_group['lr'] = self.LR

    def save_model(self, path):
        dict_to_save = {
            'r_net': self.r_net.state_dict(),
            'r_target_net': self.r_target_net.state_dict(),
            # 'r_net_optim': self.r_net_optim,
            # 'LR': self.LR,
        }
        torch.save(dict_to_save, path)

    def load_model(self, name):
        states = torch.load(name)
        self.r_net.load_state_dict(states['r_net'])
        self.r_target_net.load_state_dict(states['r_target_net'])
        if 'r_net_optim' in states:
            self.r_net_optim = states['r_net_optim']
        if 'LR' in states:
            self.LR = states['LR']


## RDP tree

In [None]:
is_batch_replace = True
is_eval = False
test_1l_only = True


class RDPTree():
    def __init__(self,
                 t_id,
                 tree_depth,
                 filter_ratio=0.1):

        self.t_id = t_id
        self.tree_depth = tree_depth
        self.filter_ratio = filter_ratio
        self.thresh = []

    # include train and eval
    def training_process(self,
                         x,
                         labels,
                         batch_size,
                         node_batch,
                         node_epoch,
                         eval_interval,
                         out_c,
                         USE_GPU,
                         LR,
                         save_path,
                         logfile=None,
                         dropout_r=0.1,
                         svm_flag=False,
                         ):
        if svm_flag:
            x_ori = x.toarray()
        else:
            x_ori = x
        labels_ori = labels
        x_level = np.zeros(x_ori.shape[0])
        for level in range(1, self.tree_depth+1):

            # form x and labels
            keep_pos = np.where(x_level == 0)
            x = x_ori[keep_pos]
            labels = labels_ori[keep_pos]
            group_num = int(x.shape[0] / batch_size) + 1
            batch_x = np.array_split(x, group_num)
            model = RDP_Model(in_c=x.shape[1], out_c=out_c, USE_GPU=USE_GPU,
                            LR=LR, logfile=logfile, dropout_r=dropout_r)
            best_auc = best_epoch = 0

            for epoch in range(0, node_epoch):
                if not is_batch_replace:
                    random.shuffle(batch_x)
                    batch_cnt = 0
                    for batch_i in batch_x:
                        gap_loss = model.train_model(batch_i, epoch)
                        # print("epoch ", epoch, "loss: ", loss)
                        batch_cnt += 1
                        if batch_cnt >= node_batch:
                            break

                else:
                    # random sampling with replacement
                    for batch_i in range(node_batch):
                        random_pos = random_list(0, x.shape[0] - 1, batch_size)
                        batch_data = x[random_pos]
                        gap_loss = model.train_model(batch_data, epoch)

                if epoch % eval_interval == 0:
                    # print("epoch ", epoch, "gap_loss:", gap_loss, " recon_loss:", recon_loss)
                    # if logfile:
                    #     logfile.write("epoch " + str(epoch) + " gap_loss: " + str(gap_loss) +
                    #                   " recon_loss: " + str(recon_loss) + '\n')

                    print("tree_id:", self.t_id, "level:", level)
                    print("keep_pos.size ==", keep_pos[0].size)
                    if logfile:
                        logfile.write("tree_id: " + str(self.t_id) + " level: " + str(level)
                                      + "keep_pos.size == " + str(keep_pos[0].size) + '\n')
                    print("epoch ", epoch, "gap_loss:", gap_loss)
                    if logfile:
                        logfile.write("epoch " + str(epoch) + " gap_loss: " + str(gap_loss) + '\n')
                    model.save_model(save_path + 't' + str(self.t_id) + '_l' + str(level) + '_latest.h5')

                    scores = model.eval_model(x)

                    # eval
                    if is_eval:
                        try:
                            roc_auc, ap = aucPerformance(scores, labels, logfile)
                            if roc_auc > best_auc:
                                best_auc = roc_auc
                                best_epoch = epoch

                            print("Best AUC-ROC: %.4f" % best_auc)
                            if logfile:
                                logfile.write("Best AUC-ROC: %.4f\n" % best_auc)
                            print("Best Epoch %d\n" % best_epoch)
                            if logfile:
                                logfile.write("Best Epoch %d\n\n" % best_epoch)
                        except ValueError:
                            print("Only one class present in y_true. ROC AUC score is not defined in that case.")

                    if logfile:
                        logfile.flush()

            # filter anomaly elements. the higher the scores are, the more abnormal
            ranking_scores = scores
            score_ranking_idx = np.argsort(ranking_scores)
            filter_num = int(self.filter_ratio * score_ranking_idx.size)
            filter_idx = score_ranking_idx[score_ranking_idx.size-filter_num:]
            x_level[keep_pos[0][filter_idx]] = self.tree_depth+1 - level
            self.thresh.append(ranking_scores[score_ranking_idx[score_ranking_idx.size-filter_num]])

            # epoch for
        # level for

        # save self.thresh
        filename = save_path + 'threshList_t' + str(self.t_id) + '.txt'
        list_save(self.thresh, filename, 'w')

    def testing_process(self,
                        x,
                        out_c,
                        USE_GPU,
                        load_path,
                        dropout_r,
                        testing_method='last_layer',
                        svm_flag=False,
                        ):

        if svm_flag:
            x_ori = x.toarray()
        else:
            x_ori = x
        x_level = np.zeros(x_ori.shape[0])
        self.thresh = list_read(load_path + 'threshList_t' + str(self.t_id) + '.txt')
        for level in range(1, self.tree_depth + 1):
            # form x
            keep_pos = np.where(x_level == 0)
            x = x_ori[keep_pos]
            model = RDP_Model(in_c=x.shape[1], out_c=out_c, USE_GPU=USE_GPU,
                            dropout_r=dropout_r)
            
            if testing_method == 'last_layer':
                # high --> low load
                model.load_model(
                    load_path + 't' + str(self.t_id) + '_l' + str(self.tree_depth + 1 - level) + '_latest.h5')
            else:
                # low --> high load
                model.load_model(load_path + 't' + str(self.t_id) + '_l' + str(level) + '_latest.h5')

            # eval
            scores = model.eval_model(x)

            if level == 1:
                first_level_scores = scores
                if test_1l_only and testing_method != 'level':
                    return x_level, first_level_scores
            # filter elements
            if testing_method == 'last_layer':
                filter_idx = np.where(scores >= float(self.thresh[self.tree_depth + 1 - level - 1]))
            else:
                filter_idx = np.where(scores >= float(self.thresh[level-1]))
            x_level[keep_pos[0][filter_idx]] = self.tree_depth+1 - level

        return x_level, first_level_scores


def list_save(content, filename, mode='a'):
    # Try to save a list variable in txt file.
    file = open(filename, mode)
    for i in range(len(content)):
        file.write(str(content[i])+'\n')
    file.close()


def list_read(filename):
    # Try to read a txt file and return a list. Return [] if there was a mistake.
    try:
        file = open(filename, 'r')
    except IOError:
        error = []
        return error
    content = file.readlines()

    for i in range(len(content)):
        content[i] = content[i][:len(content[i]) - 1]

    file.close()
    return content

## Train

In [None]:
data_path = "SCVIC_APT/Training.csv"
# data_path = "bank-additional-full_normalised.csv"
save_path = "RDP_model/"
log_path = "logs/log.log"
logfile = open(log_path, 'w')
node_batch = 30
node_epoch = 200  # epoch for a node training
eval_interval = 24
batch_size = 192
out_c = 50
USE_GPU = True
LR = 1e-1
tree_depth = 8
forest_Tnum = 30

# tree_depth = 3
# forest_Tnum = 10

filter_ratio = 0.05  # filter those with high anomaly scores
dropout_r = 0.1
random_size = 10000  # randomly choose 1024 size of data for training

if not torch.cuda.is_available():
    USE_GPU = False

# Set mode
dev_flag = True
if dev_flag:
    print("Running in DEV_MODE!")
# else:
#     # running on servers
#     print("Running in SERVER_MODE!")
#     data_path = sys.argv[1]
#     save_path = sys.argv[2]
#     if not os.path.exists(save_path):
#         os.makedirs(save_path)
#     logfile = None


def train():
    global random_size

    shutil.rmtree(save_path)
    os.mkdir(save_path)

    svm_flag = False
    # if 'svm' in data_path:
    #     svm_flag = True
    #     from util import get_data_from_svmlight_file
    #     x_ori, labels_ori = get_data_from_svmlight_file(data_path)
    #     random_size = 1024
    # else:
    #     x_ori, labels_ori = dataLoading(data_path, logfile)
    
    x_ori, labels_ori, _ = pure_dataLoading(data_path, logfile)
    data_size = labels_ori.size

    # build forest
    forest = []
    for i in range(forest_Tnum):
        forest.append(RDPTree(t_id=i+1,
                              tree_depth=tree_depth,
                              filter_ratio=filter_ratio,
                              ))

    print("Init tic time.")
    tic_time()

    # training process
    for i in range(forest_Tnum):

        # random sampling with replacement
        random_pos = random_list(0, data_size-1, random_size)
        # random sampling without replacement
        # random_pos = random.sample(range(0, data_size), random_size)

        # to form x and labels
        x = x_ori[random_pos]
        if svm_flag:
            labels = labels_ori[random_pos]
        else:
            labels = labels_ori[random_pos].values

        print("tree id:", i, "tic time.")
        tic_time()

        forest[i].training_process(
            x=x,
            labels=labels,
            batch_size=batch_size,
            node_batch=node_batch,
            node_epoch=node_epoch,
            eval_interval=eval_interval,
            out_c=out_c,
            USE_GPU=USE_GPU,
            LR=LR,
            save_path=save_path,
            logfile=logfile,
            dropout_r=dropout_r,
            svm_flag=svm_flag,
        )

        print("tree id:", i, "tic time end.")
        tic_time()


# if __name__ == "__main__":
#     main()


Running in DEV_MODE!


In [None]:
# train()

## Test

In [None]:
data_path = "SCVIC_APT/Testing.csv"
load_path = "RDP_model/"
prep_path = "Preprocess_pipeline_RDP.pkl"
out_c = 50
USE_GPU = True
tree_depth = 8
forest_Tnum = 30

# tree_depth = 3
# forest_Tnum = 10

dropout_r = 0.1

# count from 1
testing_methods_set = ['last_layer', 'first_layer', 'level']
testing_method = 1

if not torch.cuda.is_available():
    USE_GPU = False

# Set mode
dev_flag = True
if dev_flag:
    print("Running in DEV_MODE!")
# else:
#     # running on servers
#     print("Running in SERVER_MODE!")
#     data_path = sys.argv[1]
#     load_path = sys.argv[2]
#     tree_depth = int(sys.argv[3])
#     testing_method = int(sys.argv[4])


def test():
    svm_flag = False
    # if 'svm' in data_path:
    #     svm_flag = True
    #     from util import get_data_from_svmlight_file
    #     x, labels = get_data_from_svmlight_file(data_path)
    # else:
    #     x, labels = dataLoading(data_path)
    pipeline = joblib.load(prep_path)
    for x, labels in dataLoading(data_path, None, pipeline):
        data_size = labels.size

        # build forest
        forest = []
        for i in range(forest_Tnum):
            forest.append(RDPTree(t_id=i+1,
                                  tree_depth=tree_depth,
                                  ))

        sum_result = np.zeros(data_size, dtype=np.float64)

        # print("Init tic time.")
        # tic_time()

        # testing process
        for i in range(forest_Tnum):

            # print("tree id:", i, "tic time.")
            # tic_time()

            x_level, first_level_scores = forest[i].testing_process(
                x=x,
                out_c=out_c,
                USE_GPU=USE_GPU,
                load_path=load_path,
                dropout_r=dropout_r,
                testing_method=testing_methods_set[testing_method - 1],
                svm_flag=svm_flag,
            )




            if testing_methods_set[testing_method - 1] == 'level':
                sum_result += x_level
            else:
                sum_result += first_level_scores

            # print("tree id:", i, "tic time.")
            # tic_time()

        scores = sum_result / forest_Tnum

        print(aucPerformance(scores, labels))

        ind = sorted(range(len(scores)), key=lambda i: scores[i])[-sum(labels):]
        top_k = np.zeros(scores.shape)
        top_k[ind] = 1

        print(classification_report(labels,top_k))


# if __name__ == "__main__":
#     main()


Running in DEV_MODE!


In [None]:
test()

['NormalTraffic' 'InitialCompromise' 'Reconnaissance' 'Pivoting'
 'LateralMovement' 'DataExfiltration']
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)




RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0



RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0



RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0



RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0



RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0



RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout(p=0.1, inplace=False)
  )
  (fc2): Linear(in_features=50, out_features=50, bias=True)
)
RTargetNet(
  (layers): Sequential(
    (0): Linear(in_features=76, out_features=50, bias=True)
    (1): LeakyReLU(negative_slope=0.25, inplace=True)
  )
)
RNet(
  (layers): Sequential(
    (0