In [1]:
from __future__ import print_function

import joblib
import gc

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

import cklib
from cklib import ckconst
from cklib.ckstd import fprint
from cklib import ckstd
from cklib import DataFrame
from cklib.ckmachine import PacketMachine
from cklib.Threshold import HighThreshold

seed = 22
root = './bin/'
ext = '.csv'
path = ['iscx2017_pkt_train', 'iscx2017_pkt_test', 'iscx2017_sess_train', 'iscx2017_sess_test']

In [2]:
dataframe = DataFrame.Flow_Dataset(random_state = seed)
dataframe.read_csv(
    ptrain_path = root + path[0] + ext,
    ptest_path = root + path[1] + ext,
    strain_path = root + path[2] + ext,
    stest_path = root + path[3] + ext
)
dataframe.modelling()
dataframe.predict()

label_encoder = dataframe.getLabelEncoder()
train_ppred, train_spred = dataframe.getTrainPred()
train_pprob, train_sprob = dataframe.getTrainProb()
test_ppred, test_spred = dataframe.getTestPred()
test_pprob, test_sprob = dataframe.getTestProb()
train_true = label_encoder.transform(dataframe.getTrainLabel())
test_true = label_encoder.transform(dataframe.getTestLabel())
train_flows = dataframe.getTrainFlow()
test_flows = dataframe.getTestFlow()
train_duration = dataframe.getTrainDuration()
test_duration = dataframe.getTestDuration()
train_fin_count = dataframe.getTrainFin()
test_fin_count = dataframe.getTestFin()
train_protocol = dataframe.getTrainProtocol()
test_protocol = dataframe.getTestProtocol()
train_pmean, train_smean = dataframe.getTrainMean()
test_pmean, test_smean = dataframe.getTestMean()

dataframe = None
del dataframe
gc.collect()

[2020-09-04 20:36:12] Reading dataset
[2020-09-04 20:37:16] ---> Done (63.7028 seconds)

[2020-09-04 20:37:16] Training label encoder and scaler
[2020-09-04 20:37:26] ---> Done (9.8476 seconds)

[2020-09-04 20:37:26] Training random forest model
[2020-09-04 20:44:30] ---> Done (424.6492 seconds)
[2020-09-04 20:44:30] Predict session training dataset
[2020-09-04 20:44:34] Session training dataset predict time: 3.3270472889998928 seconds
[2020-09-04 20:44:34] Predict session test dataset
[2020-09-04 20:44:37] Session test dataset predict time: 3.31007398100337 seconds
[2020-09-04 20:44:42] Predict packet training dataset
[2020-09-04 20:45:56] Packet training dataset predict time: 34.65549365300103 seconds
[2020-09-04 20:45:56] Predict packet test dataset
[2020-09-04 20:46:36] Packet test dataset predict time: 19.29983525899297 seconds
[2020-09-04 20:46:36] Processing of predict part is finished (126.0103766520042 seconds)


48

In [27]:
ckstd.reload(HighThreshold)
hth = HighThreshold.StaticThreshold()
hth.initializing(
    ppreds = train_ppred,
    pprobs = train_pprob,
    spreds = train_spred,
    flows = train_flows,
    classes = label_encoder.transform(label_encoder.classes_),
    y_true = train_true,
    start = 0.9,
    end = 1.0
)
hth.calculate()

[2020-09-04 21:31:49] Initializing compilte
[2020-09-04 21:31:49] Start getting f1-scores
[2020-09-04 21:33:24] ---> Done (94.7881 sec)


'<Function calculating f1-scores>'

In [32]:
threshold = hth.getThreshold(omega = 0.0001)[0]

hth = None
del hth
gc.collect()

[2020-09-04 21:42:40] Omega: 0.0001 --> [F1-Score: [0.98183267 0.98184394 0.98184394 0.98184566 0.98189453 0.98189508]] [Threshold: [0.995 0.996 0.997 0.998 0.999 1.   ]]


In [None]:
pm = PacketMachine(random_state = seed)

pm.initalizing(
    ppreds = train_ppred,
    pprobs = train_pprob,
    spreds = train_spred,
    flows = train_flows,
    classes = label_encoder.transform(label_encoder.classes_),
    y_true = train_true,
    duration = train_duration,
    protocol = train_protocol,
    fin_count = train_fin_count,
    ptime_mean = train_pmean,
    stime_mean = train_smean
)

report = pm.classifying(h_threshold = threshold)