In [1]:

###############################################################################
# Random Forest Classification Model (TensorFlow)                             #
# For Slowloris Dataset                                                       #
# Based on the Implementation of:                                             #
# https://www.tensorflow.org/decision_forests/tutorials/beginner_colab        #
###############################################################################


In [2]:
# Installieren aller benötigten Pakete
!pip install numpy==1.19.2
!pip install six==1.15.0
!pip install wheel==0.35
!pip install tensorflow_decision_forests
!pip install pandas
!pip install wurlitzer
!pip install matplotlib
!pip install onnxruntime
!pip install keras2onnx



In [3]:
# Laden der benötigten Python Pakete
import os
# os.environ["TF_KERAS"]='1'
import pandas as pd
import numpy as np
import tensorflow_decision_forests as tfdf
import tensorflow as tf
from wurlitzer import sys_pipes
import matplotlib.pyplot as plt
import onnx
import keras2onnx as k2o

In [4]:
# Prüfung der installierten TensorFlow Decision Forests Version
print(f"Found TensorFlow Decision Forests v{tfdf.__version__}")


Found TensorFlow Decision Forests v0.1.7


In [5]:
# Laden der Netzwerk Traffic Daten für den GoldenEye Angriff
data_Slowloris = pd.read_csv('../Data/Thursday-15-02-2018_Slowloris-Attack.csv')
# Umbenennen der einzelnen Spalte für eine bessere Kompatibilität mit TensorFlow
data_Slowloris.rename(columns={
    'Flow Duration':'flow_duration',
    'Bwd IAT Mean':'bwd_iat_mean',
    'Fwd IAT Min':'fwd_iat_min',
    'Fwd IAT Mean':'fwd_iat_mean',
    'Label':'label'
},
inplace=True)


In [6]:
# Festlegen des Wertes der bestimmten Variable
label = 'label'

In [7]:
# Aufteilen des Datasets in Training- und Test-Daten
def split_dataset(dataset,  test_ratio=0.30):
    """Splits a panda dataframe in two dataframes."""
    test_indices = np.random.rand(len(dataset)) < test_ratio
    return dataset[~test_indices], dataset[test_indices]

training_data_Slowloris, testing_data_Slowloris = split_dataset(data_Slowloris)

print("{} examples in training, {} examples for testing.".format(
    len(training_data_Slowloris), len(testing_data_Slowloris)))


705517 examples in training, 301550 examples for testing.


In [8]:
# Konvertieren des Panda Dataframes in ein TensorFlow Dataset
print("Converting Panda Dataframe into TensorFlow Dataset...")
training_dataset_Slowloris = tfdf.keras.pd_dataframe_to_tf_dataset(training_data_Slowloris, label=label)
testing_dataset_Slowloris = tfdf.keras.pd_dataframe_to_tf_dataset(testing_data_Slowloris, label=label)


Converting Panda Dataframe into TensorFlow Dataset...


In [9]:
# Erstellen des Random Forest Modells
model = tfdf.keras.RandomForestModel()
model.compile(metrics=["accuracy"])


In [10]:
# Trainieren des Modells
print("Training the Model: ")
with sys_pipes():
    model.fit(x=training_dataset_Slowloris)

Training the Model: 
2021-06-29 17:07:18.429984: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2021-06-29 17:07:18.448871: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2199995000 Hz
[INFO kernel.cc:746] Start Yggdrasil model training
[INFO kernel.cc:747] Collect training examples
[INFO kernel.cc:392] Number of batches: 11024
[INFO kernel.cc:393] Number of examples: 705517
[INFO kernel.cc:769] Dataset:
Number of records: 705517
Number of columns: 5

Number of columns by type:
	NUMERICAL: 4 (80%)
	CATEGORICAL: 1 (20%)

Columns:

NUMERICAL: 4 (80%)
	0: "bwd_iat_mean" NUMERICAL mean:1.41201e+06 min:0 max:1.19913e+08 sd:7.43499e+06
	1: "flow_duration" NUMERICAL mean:1.44362e+07 min:0 max:1.2e+08 sd:3.3857e+07
	2: "fwd_iat_mean" NUMERICAL mean:3.47284e+06 min:0 max:1.19992e+08 sd:1.23694e+07
	3: "fwd_iat_min" NUMERICAL mean:2.64068e+06 min:0 max:1.19992e+08 sd:1.22671e+07

CATEGORIC

In [11]:
# Evaluation des trainierten Modells mit den Testdaten
print("Evaluating the Model...")
evaluation = model.evaluate(testing_dataset_Slowloris, return_dict=True)

print()

for name, value in evaluation.items():
    print(f"{name}: {value:.4f}")

Evaluating the Model...

loss: 0.0000
accuracy: 0.9994


In [12]:
data_path = "../Data"
model_path = "Models"
onnx_path = "ONNX_Models"
model_name = "slowloris_model"

# Trainiertes Modell für die spätere Verwendung abspeichern
model.save(os.path.join(data_path,model_path,model_name),overwrite=True)

# Konvertieren in das ONNX Modell
# onnx_model = k2o.convert_keras(model,df_model_name)
# onnx.save_model(onnx_model,os.path.join(data_path,onnx_path,model_name + ".onnx"))



INFO:tensorflow:Assets written to: ../Data/Models/slowloris_model/assets
INFO:tensorflow:Assets written to: ../Data/Models/slowloris_model/assets


In [13]:
# Plotten des ersten Baumes innerhalb des Decision Forests
with open('../Data/Models/Slowloris_Model_Tree.html', 'w') as f:
    f.write(tfdf.model_plotter.plot_model(model, tree_idx=0, max_depth=3))
tfdf.model_plotter.plot_model(model, tree_idx=0, max_depth=3)


'\n<script src="https://d3js.org/d3.v6.min.js"></script>\n<div id="tree_plot_bc16f2b0ee554b4ba184df72c01229b0"></div>\n<script>\n/*\n * Copyright 2021 Google LLC.\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     https://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n/**\n *  Plotting of decision trees generated by TF-DF.\n *\n *  A tree is a recursive structure of node objects.\n *  A node contains one or more of the following components:\n *\n *    - A value: Representing the output of the node. If the node is not a leaf,\

In [14]:
# Erstellen einer Bilanz für das trainierte Modell
model.summary()

Model: "random_forest_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Total params: 1
Trainable params: 0
Non-trainable params: 1
_________________________________________________________________
Type: "RANDOM_FOREST"
Task: CLASSIFICATION
Label: "__LABEL"

Input Features (4):
	bwd_iat_mean
	flow_duration
	fwd_iat_mean
	fwd_iat_min

No weights

Variable Importance: NUM_NODES:
    1. "flow_duration" 24569.000000 ################
    2.   "fwd_iat_min" 16023.000000 #######
    3.  "fwd_iat_mean" 14166.000000 ######
    4.  "bwd_iat_mean" 7501.000000 

Variable Importance: NUM_AS_ROOT:
    1. "bwd_iat_mean" 223.000000 ################
    2. "fwd_iat_mean" 77.000000 

Variable Importance: SUM_SCORE:
    1.  "bwd_iat_mean" 6081381.699483 ################
    2. "flow_duration" 2403939.750272 #
    3.  "fwd_iat_mean" 2038630.496041 
    4.   "fwd_iat_min" 1933959.961162 

Variable Importance: MEAN_MIN_

In [15]:
# Erstellen von Grafiken für die Effizienz des Trainings
logs = model.make_inspector().training_logs()
plt.figure(figsize=(12,4))

plt.subplot(1,2,1)
plt.plot([log.num_trees for log in logs], [log.evaluation.accuracy for log in logs])
plt.xlabel("Number of trees")
plt.ylabel("Accuracy (out-of-bag)")

plt.subplot(1,2,2)
plt.plot([log.num_trees for log in logs], [log.evaluation.loss for log in logs])
plt.xlabel("Number of trees")
plt.ylabel("Logloss (out-of-bag)")

plt.savefig('../Data/Visualized/Slowloris_Model.png')
plt.clf()

<Figure size 864x288 with 0 Axes>