full parametrized

Egida-Kassandra · Jan 21, 2021 · 665d5a4 · 665d5a4
1 parent 12a4e24
commit 665d5a4
Show file tree

Hide file tree

Showing 23 changed files with 59 additions and 41 deletions.
diff --git a/dangerousness_svg.png b/dangerousness_svg.png
diff --git a/kass_nn/config/config.yml b/kass_nn/config/config.yml
@@ -1,3 +1,6 @@
+##################################### SETTINGS #############################################
+n_threads: 16
+
 ##################################### LEVEL 1 ##############################################
 ################################# DANGER VALUES ############################################
 # Values [0-1]

diff --git a/kass_nn/integration/level_1_and_2.py b/kass_nn/integration/level_1_and_2.py
@@ -8,14 +8,14 @@ def run_level_1(train_filename, test_filename, config_file, logpar):
     return level1.run_level_1(train_filename, test_filename, config_file, logpar)
 
 
-def run_level_2(train_filename, test_filename, config_file, logpar, n_threads):
+def run_level_2(train_filename, test_filename, config_file, logpar):
     print("*" * 40 + " LEVEL 2 " + "*" * 40)
-    return level2.run_level_2(train_filename, test_filename, config_file, logpar, n_threads)
+    return level2.run_level_2(train_filename, test_filename, config_file, logpar)
 
 
-def run_all_levels(train_filename, test_filename, config_file, n_threads):
+def run_all_levels(train_filename, test_filename, config_file):
     print("*" * 40 + " PARSING TRAINING DATA " + "*" * 40)
     logpar = LogParser(train_filename)
     should_run_level_2 = run_level_1(train_filename, test_filename, config_file, logpar)
     if should_run_level_2:
-        run_level_2(train_filename, test_filename, config_file, logpar, n_threads)
+        run_level_2(train_filename, test_filename, config_file, logpar)
diff --git a/kass_nn/level_2/__pycache__/main.cpython-37.pyc b/kass_nn/level_2/__pycache__/main.cpython-37.pyc
diff --git a/kass_nn/level_2/characteristics/__pycache__/characteristic.cpython-37.pyc b/kass_nn/level_2/characteristics/__pycache__/characteristic.cpython-37.pyc
diff --git a/kass_nn/level_2/characteristics/__pycache__/min_vs_file_ext.cpython-37.pyc b/kass_nn/level_2/characteristics/__pycache__/min_vs_file_ext.cpython-37.pyc
diff --git a/kass_nn/level_2/characteristics/__pycache__/min_vs_long_req.cpython-37.pyc b/kass_nn/level_2/characteristics/__pycache__/min_vs_long_req.cpython-37.pyc
diff --git a/kass_nn/level_2/characteristics/__pycache__/min_vs_meth.cpython-37.pyc b/kass_nn/level_2/characteristics/__pycache__/min_vs_meth.cpython-37.pyc
diff --git a/kass_nn/level_2/characteristics/__pycache__/min_vs_url_directory.cpython-37.pyc b/kass_nn/level_2/characteristics/__pycache__/min_vs_url_directory.cpython-37.pyc
diff --git a/kass_nn/level_2/characteristics/characteristic.py b/kass_nn/level_2/characteristics/characteristic.py
@@ -1,6 +1,8 @@
 from kass_nn.level_2.eif_module import eif
 from kass_nn.util import load_parsed_logs as lp
 
+import time
+
 
 def get_eif(charac):
     # Loading training data
@@ -9,7 +11,10 @@ def get_eif(charac):
     charac.X_train = X_train
     # Training model
     print("\tTRAINING")
-    clf = eif.train_model(X_train, charac)
+    st = time.time()
+    clf = eif.train_model(X_train, charac, charac.n_threads)
+    end = time.time()
+    print("\tTime: ",end-st)
     # Return model
     return clf
 

diff --git a/kass_nn/level_2/characteristics/foreach_ip_min_vs_url.py b/kass_nn/level_2/characteristics/foreach_ip_min_vs_url.py
@@ -22,6 +22,7 @@ def __init__(self, logpar, config_file):
         self.X_train = []
         self.X_test = []
         self.clfs_by_ip = {}
+        self.n_threads = 1
         self.read_params(config_file)
 
     def read_params(self, config_file):
@@ -30,6 +31,8 @@ def read_params(self, config_file):
         self.ntrees = params["ntrees_min_long"]
         self.sample_size = params["sample_size_min_long"]
         self.mesh = params["mesh_min_long"]
+        self.n_threads = params["n_threads"]
+
 
     def get_group_criteria(self, log):
         """
@@ -59,19 +62,18 @@ def main(test_file):
         clf = eif.train_model(X_train)
     # Predicting model
     i = 0
-    n_threads = 10
     for log in X_test:
         ip = characteristic.get_group_criteria(log)
         if ip in X_train:
-            anomaly_scores = eif.predict_wo_train([log], characteristic.clfs_by_ip[ip], n_threads)
+            anomaly_scores = eif.predict_wo_train([log], characteristic.clfs_by_ip[ip], characteristic.n_threads)
             print("TEST {}\n\tFull anomaly value: {}\n\tDangerousness in range [0-5]: {}".format(i, anomaly_scores[0],
                                                                                                  get_dangerousness_int(
                                                                                                      anomaly_scores[
                                                                                                          0])))
         # Plotting model
         fig = plt.open_plot()
         plt.plot_model(fig, X_train[ip], [log], anomaly_scores, characteristic.clfs_by_ip[ip],
-                       characteristic.mesh, [1, 1, 1], "Min vs URL by IP", n_threads)
+                       characteristic.mesh, [1, 1, 1], "Min vs URL by IP", characteristic.n_threads)
         plt.close_plot()
         i += 1
 
diff --git a/kass_nn/level_2/characteristics/min_vs_file_ext.py b/kass_nn/level_2/characteristics/min_vs_file_ext.py
@@ -23,6 +23,7 @@ def __init__(self, logpar, config_file):
         self.X_train = []
         self.X_test = []
         self.clf = None
+        self.n_threads = 1
         self.read_params(config_file)
 
     def read_params(self, config_file):
@@ -31,6 +32,7 @@ def read_params(self, config_file):
         self.ntrees = params["ntrees_min_file_ext"]
         self.sample_size = params["sample_size_min_file_ext"]
         self.mesh = params["mesh_min_file_ext"]
+        self.n_threads = params["n_threads"]
 
 
 def main(test_file):
@@ -48,8 +50,7 @@ def main(test_file):
     # Training model
     clf = eif.train_model(X_train, characteristic)
     # Predicting model
-    n_threads = 10
-    anomaly_scores = eif.predict_wo_train(X_test, clf, n_threads)
+    anomaly_scores = eif.predict_wo_train(X_test, clf, characteristic.n_threads)
     i = 0
     for anom in anomaly_scores:
         print("TEST {}\n\tFull anomaly value: {}\n\tDangerousness in range [0-5]: {}".format(i, anom,
@@ -59,7 +60,7 @@ def main(test_file):
     # Plotting model
     fig = plt.open_plot()
     plt.plot_model(fig, X_train, X_test, anomaly_scores, clf,
-                   characteristic.mesh, [1, 1, 1], "Min vs File Extension", n_threads)
+                   characteristic.mesh, [1, 1, 1], "Min vs File Extension", characteristic.n_threads)
     plt.close_plot()
 
 

diff --git a/kass_nn/level_2/characteristics/min_vs_long_req.py b/kass_nn/level_2/characteristics/min_vs_long_req.py
@@ -23,6 +23,7 @@ def __init__(self, logpar, config_file):
         self.X_train = []
         self.X_test = []
         self.clf = None
+        self.n_threads = 1
         self.read_params(config_file)
 
     def read_params(self, config_file):
@@ -31,6 +32,7 @@ def read_params(self, config_file):
         self.ntrees = params["ntrees_min_long"]
         self.sample_size = params["sample_size_min_long"]
         self.mesh = params["mesh_min_long"]
+        self.n_threads = params["n_threads"]
 
 
 def main(test_file):
@@ -49,7 +51,7 @@ def main(test_file):
     clf = eif.train_model(X_train, characteristic)
     # Predicting model
     n_threads = 10
-    anomaly_scores = eif.predict_wo_train(X_test, clf, n_threads)
+    anomaly_scores = eif.predict_wo_train(X_test, clf, characteristic.n_threads)
     i = 0
     for anom in anomaly_scores:
         print("TEST {}\n\tFull anomaly value: {}\n\tDangerousness in range [0-5]: {}".format(i, anom,
@@ -59,6 +61,6 @@ def main(test_file):
     # Plotting model
     fig = plt.open_plot()
     plt.plot_model(fig, X_train, X_test, anomaly_scores, clf,
-                   characteristic.mesh, [1, 1, 1], "Min vs Request length", n_threads)
+                   characteristic.mesh, [1, 1, 1], "Min vs Request length", characteristic.n_threads)
     plt.close_plot()
 
diff --git a/kass_nn/level_2/characteristics/min_vs_meth.py b/kass_nn/level_2/characteristics/min_vs_meth.py
@@ -23,6 +23,7 @@ def __init__(self, logpar, config_file):
         self.X_train = []
         self.X_test = []
         self.clf = None
+        self.n_threads = 1
         self.read_params(config_file)
 
     def read_params(self, config_file):
@@ -31,6 +32,7 @@ def read_params(self, config_file):
         self.ntrees = params["ntrees_min_meth"]
         self.sample_size = params["sample_size_min_meth"]
         self.mesh = params["mesh_min_meth"]
+        self.n_threads = params["n_threads"]
 
 
 def main(test_file):
@@ -45,9 +47,9 @@ def main(test_file):
     # Loading testing data
     X_test = lp.load_parsed_data(test_filename, False, characteristic)
     # Training model
-    clf = eif.train_model(X_train, characteristic)
+    clf = eif.train_model(X_train, characteristic, characteristic.n_threads)
     # Predicting model
-    anomaly_scores = eif.predict_wo_train(X_test, clf)
+    anomaly_scores = eif.predict_wo_train(X_test, clf, characteristic.n_threads)
     i = 0
     for anom in anomaly_scores:
         print("TEST {}\n\tFull anomaly value: {}\n\tDangerousness in range [0-5]: {}".format(i, anom,
@@ -57,7 +59,7 @@ def main(test_file):
     # Plotting model
     fig = plt.open_plot()
     plt.plot_model(fig, X_train, X_test, anomaly_scores, clf,
-                   characteristic.mesh, [1, 1, 1], "Min vs Meth", n_threads)
+                   characteristic.mesh, [1, 1, 1], "Min vs Meth", characteristic.n_threads)
     plt.close_plot()
 
 
diff --git a/kass_nn/level_2/characteristics/min_vs_url_directory.py b/kass_nn/level_2/characteristics/min_vs_url_directory.py
@@ -25,6 +25,7 @@ def __init__(self,logpar,config_file):
         self.X_train = []
         self.X_test = []
         self.clf = None
+        self.n_threads = 1
         self.read_params(config_file)
 
     def read_params(self,config_file):
@@ -33,6 +34,7 @@ def read_params(self,config_file):
         self.ntrees = params["ntrees_min_dir"]
         self.sample_size = params["sample_size_min_dir"]
         self.mesh = params["mesh_min_dir"]
+        self.n_threads = params["n_threads"]
 
 
 def main(test_file):
@@ -50,8 +52,7 @@ def main(test_file):
     # Training model
     clf = eif.train_model(X_train, characteristic)
     # Predicting model
-    n_threads = 10
-    anomaly_scores = eif.predict_wo_train(X_test, clf, n_threads)
+    anomaly_scores = eif.predict_wo_train(X_test, clf, characteristic.n_threads)
     i = 0
     for anom in anomaly_scores:
         print("TEST {}\n\tFull anomaly value: {}\n\tDangerousness in range [0-5]: {}".format(i, anom,
@@ -61,7 +62,7 @@ def main(test_file):
     # Plotting model
     fig = plt.open_plot()
     plt.plot_model(fig, X_train, X_test, anomaly_scores, clf,
-                   characteristic.mesh, [1, 1, 1], "Min vs Dir", n_threads)
+                   characteristic.mesh, [1, 1, 1], "Min vs Dir", characteristic.n_threads)
     plt.close_plot()
     # Plotting with hours
     # plt.plot_model_hours(X_train, X_test, anomaly_scores, clf, 4000)

diff --git a/kass_nn/level_2/eif_module/__pycache__/eif.cpython-37.pyc b/kass_nn/level_2/eif_module/__pycache__/eif.cpython-37.pyc
diff --git a/kass_nn/level_2/eif_module/eif.py b/kass_nn/level_2/eif_module/eif.py
@@ -4,13 +4,13 @@
 from kass_nn.util import load_parsed_logs as lp
 
 
-def train_model(X_train, characteristic):
+def train_model(X_train, characteristic, n_threads):
     X_train = pd.DataFrame(X_train)
     X_train = lp.load_data_float(X_train)
     # Train block
     train_len = len(X_train)
     if train_len > 1000:
-        clf = iso.iForest(X_train, ntrees=characteristic.ntrees, sample_size=characteristic.sample_size, ExtensionLevel=1)
+        clf = iso.iForest(X_train, ntrees=characteristic.ntrees, sample_size=characteristic.sample_size, ExtensionLevel=1, n_threads=n_threads)
     else:
         clf = iso.iForest(X_train, ntrees=5000, sample_size=train_len, ExtensionLevel=1)
     return clf

diff --git a/kass_nn/level_2/kass_main/__pycache__/train_predict.cpython-37.pyc b/kass_nn/level_2/kass_main/__pycache__/train_predict.cpython-37.pyc
diff --git a/kass_nn/level_2/kass_main/train_predict.py b/kass_nn/level_2/kass_main/train_predict.py
@@ -7,11 +7,12 @@
 import kass_nn.level_2.danger_labeling.dangerousness as dang
 
 import time
+import yaml
 
 
 class TrainPredict:
 
-    def __init__(self, train_filename, config_file, logpar, n_threads):
+    def __init__(self, train_filename, config_file, logpar):
         """Constructor"""
         self.train_filename = train_filename
         self.logpar = logpar
@@ -20,7 +21,9 @@ def __init__(self, train_filename, config_file, logpar, n_threads):
         self.min_file_ext = None
         self.min_long = None
         self.config_file = config_file
-        self.n_threads
+        yaml_document = open(config_file)
+        params = yaml.safe_load(yaml_document)
+        self.n_threads = params["n_threads"]
 
     def train_all(self):
         print("Min vs Meth")
@@ -39,13 +42,13 @@ def train_all(self):
         self.min_long = MinLong(self.logpar,self.config_file)
         self.min_long.clf = charac.get_eif(self.min_long)
 
-    def predict_all(self, test_filename, n_threads):
+    def predict_all(self, test_filename):
         print("\tPREDICTING")
         st = time.time()
-        min_meth_pred = charac.get_prediction(test_filename, self.min_meth, self.min_meth.clf, n_threads)[0]
-        min_dir_pred = charac.get_prediction(test_filename, self.min_dir, self.min_dir.clf, n_threads)[0]
-        min_file_ext_pred = charac.get_prediction(test_filename, self.min_file_ext, self.min_file_ext.clf, n_threads)[0]
-        min_long_pred = charac.get_prediction(test_filename, self.min_long, self.min_long.clf, n_threads)[0]
+        min_meth_pred = charac.get_prediction(test_filename, self.min_meth, self.min_meth.clf, self.n_threads)[0]
+        min_dir_pred = charac.get_prediction(test_filename, self.min_dir, self.min_dir.clf, self.n_threads)[0]
+        min_file_ext_pred = charac.get_prediction(test_filename, self.min_file_ext, self.min_file_ext.clf, self.n_threads)[0]
+        min_long_pred = charac.get_prediction(test_filename, self.min_long, self.min_long.clf, self.n_threads)[0]
 
         anomaly_scores = [min_meth_pred, min_dir_pred, min_file_ext_pred, min_long_pred]
         calculated_dang = dang.get_dangerousness_label(anomaly_scores,self.config_file)
@@ -63,20 +66,20 @@ def predict_all(self, test_filename, n_threads):
         print("=" * 80)
         print(calculated_dang)
 
-        self.plot_dangerousness(min_meth_pred, min_dir_pred, min_file_ext_pred, min_long_pred, n_threads)
+        self.plot_dangerousness(min_meth_pred, min_dir_pred, min_file_ext_pred, min_long_pred)
 
-    def plot_dangerousness(self, min_meth_pred, min_dir_pred, min_file_ext_pred, min_long_pred, n_threads):
+    def plot_dangerousness(self, min_meth_pred, min_dir_pred, min_file_ext_pred, min_long_pred):
         fig = plt.open_plot()
         plt.plot_model(fig, self.min_meth.X_train, self.min_meth.X_test, min_meth_pred, self.min_meth.clf,
-                       self.min_meth.mesh, [2, 2, 1], "Min vs Meth", n_threads)
+                       self.min_meth.mesh, [2, 2, 1], "Min vs Meth", self.n_threads)
         plt.plot_model(fig, self.min_dir.X_train, self.min_dir.X_test, min_dir_pred, self.min_dir.clf,
-                       self.min_dir.mesh, [2, 2, 2], "Min vs Dir", n_threads)
+                       self.min_dir.mesh, [2, 2, 2], "Min vs Dir", self.n_threads)
         if min_file_ext_pred is not None:
             plt.plot_model(fig, self.min_file_ext.X_train, self.min_file_ext.X_test, min_file_ext_pred,
                            self.min_file_ext.clf,
-                           self.min_file_ext.mesh, [2, 2, 3], "Min vs FileExt", n_threads)
+                           self.min_file_ext.mesh, [2, 2, 3], "Min vs FileExt", self.n_threads)
         plt.plot_model(fig, self.min_long.X_train, self.min_long.X_test, min_long_pred, self.min_long.clf,
-                       self.min_long.mesh, [2, 2, 4], "Min vs Long", n_threads)
+                       self.min_long.mesh, [2, 2, 4], "Min vs Long", self.n_threads)
 
         plt.close_plot()
 

diff --git a/kass_nn/level_2/main.py b/kass_nn/level_2/main.py
@@ -3,10 +3,10 @@
 import sys
 from kass_nn.util.parse_logs import LogParser
 
-def run_level_2(train_filename, test_filename, config_file, logpar, n_threads):
+def run_level_2(train_filename, test_filename, config_file, logpar):
     trainpredict = TrainPredict(train_filename, config_file, logpar)
     trainpredict.train_all()
-    trainpredict.predict_all(test_filename, n_threads)
+    trainpredict.predict_all(test_filename)
 
 
 def main(test_file):
@@ -15,9 +15,8 @@ def main(test_file):
     test_filename = kassnn_f / str("level_2/test_logs/main/" + test_file)
     config_file = kassnn_f / "config/config.yml"
     logpar = LogParser(train_filename)
-    trainpredict = TrainPredict(train_filename, config_file, logpar, n_threads)
+    trainpredict = TrainPredict(train_filename, config_file, logpar)
     trainpredict.train_all()
-    n_threads = 1
-    trainpredict.predict_all(test_filename, n_threads)
+    trainpredict.predict_all(test_filename)
 
 
diff --git a/kass_nn/util/__pycache__/kass_plotter.cpython-37.pyc b/kass_nn/util/__pycache__/kass_plotter.cpython-37.pyc
diff --git a/kass_nn/util/kass_plotter.py b/kass_nn/util/kass_plotter.py
@@ -65,7 +65,7 @@ def close_plot():
     plt.savefig("dangerousness_svg.png", dpi=100)
     plt.show()
 
-def plot_data_hours(data_train, data_test, col_X, colY, anomaly_scores, clf, mesh, extra_points, n_threads=10):
+def plot_data_hours(data_train, data_test, col_X, colY, anomaly_scores, clf, mesh, extra_points, n_threads):
     """
     Plots 2D data set training and testing
     :param data_train: pandas data frame train

diff --git a/kassandra.py b/kassandra.py
@@ -7,4 +7,4 @@
     test_filename = integration_f/"test_logs/main/test_main_0.log"
     config_f = Path("kass_nn/config")
     config_file = config_f/"config.yml"
-    run_all_levels(train_filename, test_filename, config_file, 10)
+    run_all_levels(train_filename, test_filename, config_file)