Skip to content

Commit

Permalink
full parametrized
Browse files Browse the repository at this point in the history
  • Loading branch information
albact7 committed Jan 21, 2021
1 parent 12a4e24 commit 665d5a4
Show file tree
Hide file tree
Showing 23 changed files with 59 additions and 41 deletions.
Binary file modified dangerousness_svg.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions kass_nn/config/config.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
##################################### SETTINGS #############################################
n_threads: 16

##################################### LEVEL 1 ##############################################
################################# DANGER VALUES ############################################
# Values [0-1]
Expand Down
8 changes: 4 additions & 4 deletions kass_nn/integration/level_1_and_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ def run_level_1(train_filename, test_filename, config_file, logpar):
return level1.run_level_1(train_filename, test_filename, config_file, logpar)


def run_level_2(train_filename, test_filename, config_file, logpar, n_threads):
def run_level_2(train_filename, test_filename, config_file, logpar):
print("*" * 40 + " LEVEL 2 " + "*" * 40)
return level2.run_level_2(train_filename, test_filename, config_file, logpar, n_threads)
return level2.run_level_2(train_filename, test_filename, config_file, logpar)


def run_all_levels(train_filename, test_filename, config_file, n_threads):
def run_all_levels(train_filename, test_filename, config_file):
print("*" * 40 + " PARSING TRAINING DATA " + "*" * 40)
logpar = LogParser(train_filename)
should_run_level_2 = run_level_1(train_filename, test_filename, config_file, logpar)
if should_run_level_2:
run_level_2(train_filename, test_filename, config_file, logpar, n_threads)
run_level_2(train_filename, test_filename, config_file, logpar)
Binary file modified kass_nn/level_2/__pycache__/main.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
7 changes: 6 additions & 1 deletion kass_nn/level_2/characteristics/characteristic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from kass_nn.level_2.eif_module import eif
from kass_nn.util import load_parsed_logs as lp

import time


def get_eif(charac):
# Loading training data
Expand All @@ -9,7 +11,10 @@ def get_eif(charac):
charac.X_train = X_train
# Training model
print("\tTRAINING")
clf = eif.train_model(X_train, charac)
st = time.time()
clf = eif.train_model(X_train, charac, charac.n_threads)
end = time.time()
print("\tTime: ",end-st)
# Return model
return clf

Expand Down
8 changes: 5 additions & 3 deletions kass_nn/level_2/characteristics/foreach_ip_min_vs_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(self, logpar, config_file):
self.X_train = []
self.X_test = []
self.clfs_by_ip = {}
self.n_threads = 1
self.read_params(config_file)

def read_params(self, config_file):
Expand All @@ -30,6 +31,8 @@ def read_params(self, config_file):
self.ntrees = params["ntrees_min_long"]
self.sample_size = params["sample_size_min_long"]
self.mesh = params["mesh_min_long"]
self.n_threads = params["n_threads"]


def get_group_criteria(self, log):
"""
Expand Down Expand Up @@ -59,19 +62,18 @@ def main(test_file):
clf = eif.train_model(X_train)
# Predicting model
i = 0
n_threads = 10
for log in X_test:
ip = characteristic.get_group_criteria(log)
if ip in X_train:
anomaly_scores = eif.predict_wo_train([log], characteristic.clfs_by_ip[ip], n_threads)
anomaly_scores = eif.predict_wo_train([log], characteristic.clfs_by_ip[ip], characteristic.n_threads)
print("TEST {}\n\tFull anomaly value: {}\n\tDangerousness in range [0-5]: {}".format(i, anomaly_scores[0],
get_dangerousness_int(
anomaly_scores[
0])))
# Plotting model
fig = plt.open_plot()
plt.plot_model(fig, X_train[ip], [log], anomaly_scores, characteristic.clfs_by_ip[ip],
characteristic.mesh, [1, 1, 1], "Min vs URL by IP", n_threads)
characteristic.mesh, [1, 1, 1], "Min vs URL by IP", characteristic.n_threads)
plt.close_plot()
i += 1

7 changes: 4 additions & 3 deletions kass_nn/level_2/characteristics/min_vs_file_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def __init__(self, logpar, config_file):
self.X_train = []
self.X_test = []
self.clf = None
self.n_threads = 1
self.read_params(config_file)

def read_params(self, config_file):
Expand All @@ -31,6 +32,7 @@ def read_params(self, config_file):
self.ntrees = params["ntrees_min_file_ext"]
self.sample_size = params["sample_size_min_file_ext"]
self.mesh = params["mesh_min_file_ext"]
self.n_threads = params["n_threads"]


def main(test_file):
Expand All @@ -48,8 +50,7 @@ def main(test_file):
# Training model
clf = eif.train_model(X_train, characteristic)
# Predicting model
n_threads = 10
anomaly_scores = eif.predict_wo_train(X_test, clf, n_threads)
anomaly_scores = eif.predict_wo_train(X_test, clf, characteristic.n_threads)
i = 0
for anom in anomaly_scores:
print("TEST {}\n\tFull anomaly value: {}\n\tDangerousness in range [0-5]: {}".format(i, anom,
Expand All @@ -59,7 +60,7 @@ def main(test_file):
# Plotting model
fig = plt.open_plot()
plt.plot_model(fig, X_train, X_test, anomaly_scores, clf,
characteristic.mesh, [1, 1, 1], "Min vs File Extension", n_threads)
characteristic.mesh, [1, 1, 1], "Min vs File Extension", characteristic.n_threads)
plt.close_plot()


Expand Down
6 changes: 4 additions & 2 deletions kass_nn/level_2/characteristics/min_vs_long_req.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def __init__(self, logpar, config_file):
self.X_train = []
self.X_test = []
self.clf = None
self.n_threads = 1
self.read_params(config_file)

def read_params(self, config_file):
Expand All @@ -31,6 +32,7 @@ def read_params(self, config_file):
self.ntrees = params["ntrees_min_long"]
self.sample_size = params["sample_size_min_long"]
self.mesh = params["mesh_min_long"]
self.n_threads = params["n_threads"]


def main(test_file):
Expand All @@ -49,7 +51,7 @@ def main(test_file):
clf = eif.train_model(X_train, characteristic)
# Predicting model
n_threads = 10
anomaly_scores = eif.predict_wo_train(X_test, clf, n_threads)
anomaly_scores = eif.predict_wo_train(X_test, clf, characteristic.n_threads)
i = 0
for anom in anomaly_scores:
print("TEST {}\n\tFull anomaly value: {}\n\tDangerousness in range [0-5]: {}".format(i, anom,
Expand All @@ -59,6 +61,6 @@ def main(test_file):
# Plotting model
fig = plt.open_plot()
plt.plot_model(fig, X_train, X_test, anomaly_scores, clf,
characteristic.mesh, [1, 1, 1], "Min vs Request length", n_threads)
characteristic.mesh, [1, 1, 1], "Min vs Request length", characteristic.n_threads)
plt.close_plot()

8 changes: 5 additions & 3 deletions kass_nn/level_2/characteristics/min_vs_meth.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def __init__(self, logpar, config_file):
self.X_train = []
self.X_test = []
self.clf = None
self.n_threads = 1
self.read_params(config_file)

def read_params(self, config_file):
Expand All @@ -31,6 +32,7 @@ def read_params(self, config_file):
self.ntrees = params["ntrees_min_meth"]
self.sample_size = params["sample_size_min_meth"]
self.mesh = params["mesh_min_meth"]
self.n_threads = params["n_threads"]


def main(test_file):
Expand All @@ -45,9 +47,9 @@ def main(test_file):
# Loading testing data
X_test = lp.load_parsed_data(test_filename, False, characteristic)
# Training model
clf = eif.train_model(X_train, characteristic)
clf = eif.train_model(X_train, characteristic, characteristic.n_threads)
# Predicting model
anomaly_scores = eif.predict_wo_train(X_test, clf)
anomaly_scores = eif.predict_wo_train(X_test, clf, characteristic.n_threads)
i = 0
for anom in anomaly_scores:
print("TEST {}\n\tFull anomaly value: {}\n\tDangerousness in range [0-5]: {}".format(i, anom,
Expand All @@ -57,7 +59,7 @@ def main(test_file):
# Plotting model
fig = plt.open_plot()
plt.plot_model(fig, X_train, X_test, anomaly_scores, clf,
characteristic.mesh, [1, 1, 1], "Min vs Meth", n_threads)
characteristic.mesh, [1, 1, 1], "Min vs Meth", characteristic.n_threads)
plt.close_plot()


7 changes: 4 additions & 3 deletions kass_nn/level_2/characteristics/min_vs_url_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(self,logpar,config_file):
self.X_train = []
self.X_test = []
self.clf = None
self.n_threads = 1
self.read_params(config_file)

def read_params(self,config_file):
Expand All @@ -33,6 +34,7 @@ def read_params(self,config_file):
self.ntrees = params["ntrees_min_dir"]
self.sample_size = params["sample_size_min_dir"]
self.mesh = params["mesh_min_dir"]
self.n_threads = params["n_threads"]


def main(test_file):
Expand All @@ -50,8 +52,7 @@ def main(test_file):
# Training model
clf = eif.train_model(X_train, characteristic)
# Predicting model
n_threads = 10
anomaly_scores = eif.predict_wo_train(X_test, clf, n_threads)
anomaly_scores = eif.predict_wo_train(X_test, clf, characteristic.n_threads)
i = 0
for anom in anomaly_scores:
print("TEST {}\n\tFull anomaly value: {}\n\tDangerousness in range [0-5]: {}".format(i, anom,
Expand All @@ -61,7 +62,7 @@ def main(test_file):
# Plotting model
fig = plt.open_plot()
plt.plot_model(fig, X_train, X_test, anomaly_scores, clf,
characteristic.mesh, [1, 1, 1], "Min vs Dir", n_threads)
characteristic.mesh, [1, 1, 1], "Min vs Dir", characteristic.n_threads)
plt.close_plot()
# Plotting with hours
# plt.plot_model_hours(X_train, X_test, anomaly_scores, clf, 4000)
Expand Down
Binary file modified kass_nn/level_2/eif_module/__pycache__/eif.cpython-37.pyc
Binary file not shown.
4 changes: 2 additions & 2 deletions kass_nn/level_2/eif_module/eif.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
from kass_nn.util import load_parsed_logs as lp


def train_model(X_train, characteristic):
def train_model(X_train, characteristic, n_threads):
X_train = pd.DataFrame(X_train)
X_train = lp.load_data_float(X_train)
# Train block
train_len = len(X_train)
if train_len > 1000:
clf = iso.iForest(X_train, ntrees=characteristic.ntrees, sample_size=characteristic.sample_size, ExtensionLevel=1)
clf = iso.iForest(X_train, ntrees=characteristic.ntrees, sample_size=characteristic.sample_size, ExtensionLevel=1, n_threads=n_threads)
else:
clf = iso.iForest(X_train, ntrees=5000, sample_size=train_len, ExtensionLevel=1)
return clf
Expand Down
Binary file modified kass_nn/level_2/kass_main/__pycache__/train_predict.cpython-37.pyc
Binary file not shown.
29 changes: 16 additions & 13 deletions kass_nn/level_2/kass_main/train_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
import kass_nn.level_2.danger_labeling.dangerousness as dang

import time
import yaml


class TrainPredict:

def __init__(self, train_filename, config_file, logpar, n_threads):
def __init__(self, train_filename, config_file, logpar):
"""Constructor"""
self.train_filename = train_filename
self.logpar = logpar
Expand All @@ -20,7 +21,9 @@ def __init__(self, train_filename, config_file, logpar, n_threads):
self.min_file_ext = None
self.min_long = None
self.config_file = config_file
self.n_threads
yaml_document = open(config_file)
params = yaml.safe_load(yaml_document)
self.n_threads = params["n_threads"]

def train_all(self):
print("Min vs Meth")
Expand All @@ -39,13 +42,13 @@ def train_all(self):
self.min_long = MinLong(self.logpar,self.config_file)
self.min_long.clf = charac.get_eif(self.min_long)

def predict_all(self, test_filename, n_threads):
def predict_all(self, test_filename):
print("\tPREDICTING")
st = time.time()
min_meth_pred = charac.get_prediction(test_filename, self.min_meth, self.min_meth.clf, n_threads)[0]
min_dir_pred = charac.get_prediction(test_filename, self.min_dir, self.min_dir.clf, n_threads)[0]
min_file_ext_pred = charac.get_prediction(test_filename, self.min_file_ext, self.min_file_ext.clf, n_threads)[0]
min_long_pred = charac.get_prediction(test_filename, self.min_long, self.min_long.clf, n_threads)[0]
min_meth_pred = charac.get_prediction(test_filename, self.min_meth, self.min_meth.clf, self.n_threads)[0]
min_dir_pred = charac.get_prediction(test_filename, self.min_dir, self.min_dir.clf, self.n_threads)[0]
min_file_ext_pred = charac.get_prediction(test_filename, self.min_file_ext, self.min_file_ext.clf, self.n_threads)[0]
min_long_pred = charac.get_prediction(test_filename, self.min_long, self.min_long.clf, self.n_threads)[0]

anomaly_scores = [min_meth_pred, min_dir_pred, min_file_ext_pred, min_long_pred]
calculated_dang = dang.get_dangerousness_label(anomaly_scores,self.config_file)
Expand All @@ -63,20 +66,20 @@ def predict_all(self, test_filename, n_threads):
print("=" * 80)
print(calculated_dang)

self.plot_dangerousness(min_meth_pred, min_dir_pred, min_file_ext_pred, min_long_pred, n_threads)
self.plot_dangerousness(min_meth_pred, min_dir_pred, min_file_ext_pred, min_long_pred)

def plot_dangerousness(self, min_meth_pred, min_dir_pred, min_file_ext_pred, min_long_pred, n_threads):
def plot_dangerousness(self, min_meth_pred, min_dir_pred, min_file_ext_pred, min_long_pred):
fig = plt.open_plot()
plt.plot_model(fig, self.min_meth.X_train, self.min_meth.X_test, min_meth_pred, self.min_meth.clf,
self.min_meth.mesh, [2, 2, 1], "Min vs Meth", n_threads)
self.min_meth.mesh, [2, 2, 1], "Min vs Meth", self.n_threads)
plt.plot_model(fig, self.min_dir.X_train, self.min_dir.X_test, min_dir_pred, self.min_dir.clf,
self.min_dir.mesh, [2, 2, 2], "Min vs Dir", n_threads)
self.min_dir.mesh, [2, 2, 2], "Min vs Dir", self.n_threads)
if min_file_ext_pred is not None:
plt.plot_model(fig, self.min_file_ext.X_train, self.min_file_ext.X_test, min_file_ext_pred,
self.min_file_ext.clf,
self.min_file_ext.mesh, [2, 2, 3], "Min vs FileExt", n_threads)
self.min_file_ext.mesh, [2, 2, 3], "Min vs FileExt", self.n_threads)
plt.plot_model(fig, self.min_long.X_train, self.min_long.X_test, min_long_pred, self.min_long.clf,
self.min_long.mesh, [2, 2, 4], "Min vs Long", n_threads)
self.min_long.mesh, [2, 2, 4], "Min vs Long", self.n_threads)

plt.close_plot()

Expand Down
9 changes: 4 additions & 5 deletions kass_nn/level_2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import sys
from kass_nn.util.parse_logs import LogParser

def run_level_2(train_filename, test_filename, config_file, logpar, n_threads):
def run_level_2(train_filename, test_filename, config_file, logpar):
trainpredict = TrainPredict(train_filename, config_file, logpar)
trainpredict.train_all()
trainpredict.predict_all(test_filename, n_threads)
trainpredict.predict_all(test_filename)


def main(test_file):
Expand All @@ -15,9 +15,8 @@ def main(test_file):
test_filename = kassnn_f / str("level_2/test_logs/main/" + test_file)
config_file = kassnn_f / "config/config.yml"
logpar = LogParser(train_filename)
trainpredict = TrainPredict(train_filename, config_file, logpar, n_threads)
trainpredict = TrainPredict(train_filename, config_file, logpar)
trainpredict.train_all()
n_threads = 1
trainpredict.predict_all(test_filename, n_threads)
trainpredict.predict_all(test_filename)


Binary file modified kass_nn/util/__pycache__/kass_plotter.cpython-37.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion kass_nn/util/kass_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def close_plot():
plt.savefig("dangerousness_svg.png", dpi=100)
plt.show()

def plot_data_hours(data_train, data_test, col_X, colY, anomaly_scores, clf, mesh, extra_points, n_threads=10):
def plot_data_hours(data_train, data_test, col_X, colY, anomaly_scores, clf, mesh, extra_points, n_threads):
"""
Plots 2D data set training and testing
:param data_train: pandas data frame train
Expand Down
2 changes: 1 addition & 1 deletion kassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
test_filename = integration_f/"test_logs/main/test_main_0.log"
config_f = Path("kass_nn/config")
config_file = config_f/"config.yml"
run_all_levels(train_filename, test_filename, config_file, 10)
run_all_levels(train_filename, test_filename, config_file)

0 comments on commit 665d5a4

Please sign in to comment.