In [1]:

###############################################################################
# Random Forest Classification Model (TensorFlow)                             #
# For LOIC Dataset                                                            #
# Based on the Implementation of:                                             #
# https://www.tensorflow.org/decision_forests/tutorials/beginner_colab        #
###############################################################################


In [2]:
# Installieren der benötigten Python Pakete
!pip install tensorflow_decision_forests
!pip install numpy==1.19.5
!pip install six==1.15.0
!pip install wheel==0.35
!pip install pandas
!pip install wurlitzer
!pip install matplotlib

Collecting tensorflow_decision_forests
  Downloading tensorflow_decision_forests-0.1.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.2 MB)
[K     |████████████████████████████████| 6.2 MB 2.6 MB/s eta 0:00:01
Collecting tensorflow~=2.5
  Downloading tensorflow-2.5.0-cp39-cp39-manylinux2010_x86_64.whl (454.4 MB)
[K     |████████████████████████████████| 454.4 MB 67 kB/s s eta 0:00:01   |▍                               | 5.7 MB 12.6 MB/s eta 0:00:36     |██████████████▊                 | 208.4 MB 24.9 MB/s eta 0:00:10
Collecting tensorflow-estimator<2.6.0,>=2.5.0rc0
  Downloading tensorflow_estimator-2.5.0-py2.py3-none-any.whl (462 kB)
[K     |████████████████████████████████| 462 kB 23.7 MB/s eta 0:00:01
[?25hCollecting typing-extensions~=3.7.4
  Downloading typing_extensions-3.7.4.3-py3-none-any.whl (22 kB)
Collecting h5py~=3.1.0
  Downloading h5py-3.1.0-cp39-cp39-manylinux1_x86_64.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 21.1 MB/s eta 0:00:01
Co

      Successfully uninstalled typing-extensions-3.10.0.0
  Attempting uninstall: tensorflow-estimator
    Found existing installation: tensorflow-estimator 2.4.0
    Uninstalling tensorflow-estimator-2.4.0:
      Successfully uninstalled tensorflow-estimator-2.4.0
  Attempting uninstall: tensorboard
    Found existing installation: tensorboard 2.4.1
    Uninstalling tensorboard-2.4.1:
      Successfully uninstalled tensorboard-2.4.1
  Attempting uninstall: h5py
    Found existing installation: h5py 2.10.0
    Uninstalling h5py-2.10.0:
      Successfully uninstalled h5py-2.10.0
  Attempting uninstall: gast
    Found existing installation: gast 0.3.3
    Uninstalling gast-0.3.3:
      Successfully uninstalled gast-0.3.3
  Attempting uninstall: tensorflow
    Found existing installation: tensorflow 2.4.1
    Uninstalling tensorflow-2.4.1:
      Successfully uninstalled tensorflow-2.4.1
Successfully installed gast-0.4.0 grpcio-1.34.1 h5py-3.1.0 keras-nightly-2.5.0.dev2021032900 numpy-1.19

In [3]:
# Importieren der benötigten Python Pakete
import pandas as pd
import numpy as np
import tensorflow_decision_forests as tfdf
from wurlitzer import sys_pipes
import matplotlib.pyplot as plt
import tensorflow as tf

In [None]:
# Laden der Netzwerk Traffic Daten für den LOIC Angriff
data_LOIC = pd.read_csv('../Data/Optimized/Tuesday-20-02-2018_LOIC-Attack.csv')

In [None]:
# Suchen und Ersetzen von NaN Werten im Dataset
nan_count = data_LOIC.isna().sum().sum()
print(f"Initial Count of NaN in Dataset: {nan_count}")

In [None]:
# Festlegen der Label-Spalte innerhalb des Datasets
label = 'label'

In [None]:
# Aufteilen des Datasets in Training- und Test-Daten
def split_dataset(dataset,  test_ratio=0.30):
    """Splits a panda dataframe in two dataframes."""
    test_indices = np.random.rand(len(dataset)) < test_ratio
    return dataset[~test_indices], dataset[test_indices]

training_data_LOIC, testing_data_LOIC = split_dataset(data_LOIC)

print("{} examples in training, {} examples for testing.".format(
    len(training_data_LOIC), len(testing_data_LOIC)))

In [None]:
# Konvertieren des Panda Dataframes in ein TensorFlow Dataset
print("Converting Panda Dataframe into TensorFlow Dataset...")
training_dataset_LOIC = tfdf.keras.pd_dataframe_to_tf_dataset(training_data_LOIC, label=label)
testing_dataset_LOIC = tfdf.keras.pd_dataframe_to_tf_dataset(testing_data_LOIC, label=label)

In [None]:
# Erstellen des Random Forest Modells
model = tfdf.keras.RandomForestModel()
model.compile(metrics=["accuracy"])

In [None]:
# Trainieren des Modells
print("Training the Model...")
with sys_pipes():
    model.fit(x=training_dataset_LOIC)

In [None]:
# Evaluieren des trainierten Modells
print("Evaluating the Model...")
evaluation = model.evaluate(testing_dataset_LOIC, return_dict=True)
print()
for name, value in evaluation.items():
    print(f"{name}: {value:.4f}")


In [None]:
# Erstellen einer Bilanz für das trainierte Modell
model.summary()

In [None]:
# Erstellen der Feature Importance Kriterien aus Sicht des TensorFlow Modells
model.make_inspector().variable_importances()