# **DDoS Proof of Concept**

---
## **Configuration**

In [None]:
# Configure Kaggle API credentials and download compressed dataset
!pip install kaggle
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d rodrigorosasilva/cic-ddos2019-30gb-full-dataset-csv-files

Dataset URL: https://www.kaggle.com/datasets/rodrigorosasilva/cic-ddos2019-30gb-full-dataset-csv-files
License(s): other
Downloading cic-ddos2019-30gb-full-dataset-csv-files.zip to /content
100% 3.10G/3.10G [02:35<00:00, 17.6MB/s]
100% 3.10G/3.10G [02:35<00:00, 21.3MB/s]


---
## **Imports**

In [None]:
import numpy as np
import os
import pandas as pd
import pickle
import tensorflow as tf
import warnings
import zipfile

from google.colab import drive
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, Normalizer, OneHotEncoder, QuantileTransformer, StandardScaler

In [None]:
# Ignore warnings
warnings.filterwarnings('ignore')

In [None]:
# Mount Google Drive content
drive.mount('/content/drive')

Mounted at /content/drive


---
## **Load Data**

In [None]:
# Extract csv files from compressed dataset file
with zipfile.ZipFile('cic-ddos2019-30gb-full-dataset-csv-files.zip', 'r') as zip_ref:
    zip_ref.extractall()

In [None]:
# Create empty final dataframe
df = pd.DataFrame()

# Load several samples (100000) from every individual csv dataset and concatenate to final dataframe
for root in ['01-12', '03-11']:
    for filename in os.listdir(root):
        df = pd.concat([df, pd.read_csv(os.path.join(root, filename), nrows=100000)])
        print(f'Load {filename} slice. Total loaded rows = {len(df)}')

# Show dataframe head
df.head()

Load UDPLag.csv slice. Total loaded rows = 100000
Load DrDoS_LDAP.csv slice. Total loaded rows = 200000
Load DrDoS_NetBIOS.csv slice. Total loaded rows = 300000
Load TFTP.csv slice. Total loaded rows = 400000
Load DrDoS_NTP.csv slice. Total loaded rows = 500000
Load DrDoS_SSDP.csv slice. Total loaded rows = 600000
Load DrDoS_DNS.csv slice. Total loaded rows = 700000
Load Syn.csv slice. Total loaded rows = 800000
Load DrDoS_MSSQL.csv slice. Total loaded rows = 900000
Load DrDoS_UDP.csv slice. Total loaded rows = 1000000
Load DrDoS_SNMP.csv slice. Total loaded rows = 1100000
Load Portmap.csv slice. Total loaded rows = 1200000
Load UDPLag.csv slice. Total loaded rows = 1300000
Load LDAP.csv slice. Total loaded rows = 1400000
Load MSSQL.csv slice. Total loaded rows = 1500000
Load Syn.csv slice. Total loaded rows = 1600000
Load UDP.csv slice. Total loaded rows = 1700000
Load NetBIOS.csv slice. Total loaded rows = 1800000


Unnamed: 0.1,Unnamed: 0,Flow ID,Source IP,Source Port,Destination IP,Destination Port,Protocol,Timestamp,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Fwd Header Length.1,Fwd Avg Bytes/Bulk,Fwd Avg Packets/Bulk,Fwd Avg Bulk Rate,Bwd Avg Bytes/Bulk,Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,SimillarHTTP,Inbound,Label
0,186059,172.16.0.5-192.168.50.1-58445-4463-17,172.16.0.5,58445,192.168.50.1,4463,17,2018-12-01 13:04:45.928673,1,2,0,766.0,0.0,383.0,383.0,383.0,0.0,0.0,0.0,0.0,0.0,766000000.0,2000000.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,-2,0,2000000.0,0.0,383.0,383.0,383.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,574.5,383.0,0.0,-2,0,0,0,0,0,0,2,766,0,0,-1,-1,1,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,UDP-lag
1,135692,172.16.0.5-192.168.50.1-36908-9914-17,172.16.0.5,36908,192.168.50.1,9914,17,2018-12-01 13:04:45.928913,1,2,0,778.0,0.0,389.0,389.0,389.0,0.0,0.0,0.0,0.0,0.0,778000000.0,2000000.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,-2,0,2000000.0,0.0,389.0,389.0,389.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,583.5,389.0,0.0,-2,0,0,0,0,0,0,2,778,0,0,-1,-1,1,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,UDP-lag
2,33822,172.16.0.5-192.168.50.1-41727-32361-17,172.16.0.5,41727,192.168.50.1,32361,17,2018-12-01 13:04:45.928915,2,2,0,750.0,0.0,375.0,375.0,375.0,0.0,0.0,0.0,0.0,0.0,375000000.0,1000000.0,2.0,0.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,-2,0,1000000.0,0.0,375.0,375.0,375.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,562.5,375.0,0.0,-2,0,0,0,0,0,0,2,750,0,0,-1,-1,1,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,UDP-lag
3,24498,172.16.0.5-192.168.50.1-55447-5691-17,172.16.0.5,55447,192.168.50.1,5691,17,2018-12-01 13:04:45.929024,2,2,0,738.0,0.0,369.0,369.0,369.0,0.0,0.0,0.0,0.0,0.0,369000000.0,1000000.0,2.0,0.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,-2,0,1000000.0,0.0,369.0,369.0,369.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,553.5,369.0,0.0,-2,0,0,0,0,0,0,2,738,0,0,-1,-1,1,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,UDP-lag
4,117372,172.16.0.5-192.168.50.1-58794-56335-17,172.16.0.5,58794,192.168.50.1,56335,17,2018-12-01 13:04:45.929096,1,2,0,750.0,0.0,375.0,375.0,375.0,0.0,0.0,0.0,0.0,0.0,750000000.0,2000000.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,-2,0,2000000.0,0.0,375.0,375.0,375.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,562.5,375.0,0.0,-2,0,0,0,0,0,0,2,750,0,0,-1,-1,1,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,UDP-lag


In [None]:
# Show dataframe info
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1800000 entries, 0 to 99999
Data columns (total 88 columns):
 #   Column                        Dtype  
---  ------                        -----  
 0   Unnamed: 0                    int64  
 1   Flow ID                       object 
 2    Source IP                    object 
 3    Source Port                  int64  
 4    Destination IP               object 
 5    Destination Port             int64  
 6    Protocol                     int64  
 7    Timestamp                    object 
 8    Flow Duration                int64  
 9    Total Fwd Packets            int64  
 10   Total Backward Packets       int64  
 11  Total Length of Fwd Packets   float64
 12   Total Length of Bwd Packets  float64
 13   Fwd Packet Length Max        float64
 14   Fwd Packet Length Min        float64
 15   Fwd Packet Length Mean       float64
 16   Fwd Packet Length Std        float64
 17  Bwd Packet Length Max         float64
 18   Bwd Packet Length Min       

In [None]:
# Columns to drop (based on MSc. thesis)
drop_columns = [
    'Unnamed: 0',
    'Flow ID',
    'Source IP',
    'Source Port',
    'Destination IP',
    'Destination Port',
    'Timestamp',
    'SimillarHTTP',
    'Inbound'
]

# Strip column names to remove blank spaces at start and end
df.columns = map(str.strip, df.columns)

# Drop columns
df.drop(columns=drop_columns, inplace=True)

# Show dataframe head
df.head()

Unnamed: 0,Protocol,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Fwd Header Length.1,Fwd Avg Bytes/Bulk,Fwd Avg Packets/Bulk,Fwd Avg Bulk Rate,Bwd Avg Bytes/Bulk,Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,17,1,2,0,766.0,0.0,383.0,383.0,383.0,0.0,0.0,0.0,0.0,0.0,766000000.0,2000000.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,-2,0,2000000.0,0.0,383.0,383.0,383.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,574.5,383.0,0.0,-2,0,0,0,0,0,0,2,766,0,0,-1,-1,1,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,UDP-lag
1,17,1,2,0,778.0,0.0,389.0,389.0,389.0,0.0,0.0,0.0,0.0,0.0,778000000.0,2000000.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,-2,0,2000000.0,0.0,389.0,389.0,389.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,583.5,389.0,0.0,-2,0,0,0,0,0,0,2,778,0,0,-1,-1,1,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,UDP-lag
2,17,2,2,0,750.0,0.0,375.0,375.0,375.0,0.0,0.0,0.0,0.0,0.0,375000000.0,1000000.0,2.0,0.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,-2,0,1000000.0,0.0,375.0,375.0,375.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,562.5,375.0,0.0,-2,0,0,0,0,0,0,2,750,0,0,-1,-1,1,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,UDP-lag
3,17,2,2,0,738.0,0.0,369.0,369.0,369.0,0.0,0.0,0.0,0.0,0.0,369000000.0,1000000.0,2.0,0.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,-2,0,1000000.0,0.0,369.0,369.0,369.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,553.5,369.0,0.0,-2,0,0,0,0,0,0,2,738,0,0,-1,-1,1,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,UDP-lag
4,17,1,2,0,750.0,0.0,375.0,375.0,375.0,0.0,0.0,0.0,0.0,0.0,750000000.0,2000000.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,-2,0,2000000.0,0.0,375.0,375.0,375.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,562.5,375.0,0.0,-2,0,0,0,0,0,0,2,750,0,0,-1,-1,1,-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,UDP-lag


In [None]:
# Show label distribution
df['Label'].value_counts()

Unnamed: 0_level_0,count
Label,Unnamed: 1_level_1
Syn,199964
NetBIOS,199899
UDP,173865
MSSQL,114054
TFTP,99989
DrDoS_LDAP,99970
DrDoS_SNMP,99956
DrDoS_SSDP,99886
DrDoS_NetBIOS,99425
DrDoS_MSSQL,99349


---
## **Data Preprocessing**

In [None]:
# Drop NaN and Inf values
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

# Replace labels
df.replace({
    'NetBIOS': 'NetBIOS/Portmap',
    'UDP': 'SSDP/UDP',
    'DrDoS_LDAP': 'DNS/LDAP',
    'DrDoS_SNMP': 'SNMP',
    'DrDoS_SSDP': 'SSDP/UDP',
    'DrDoS_NetBIOS': 'NetBIOS/Portmap',
    'DrDoS_MSSQL': 'MSSQL',
    'DrDoS_UDP': 'SSDP/UDP',
    'DrDoS_DNS': 'DNS/LDAP',
    'UDP-lag': 'UDPLag',
    'Portmap': 'NetBIOS/Portmap',
    'DrDoS_NTP': 'NTP',
    'LDAP': 'DNS/LDAP'
}, inplace=True)

# Drop WebDDoS attacks
df.drop(df[df['Label'] == 'WebDDoS'].index, inplace=True)

# Show label distribution
df['Label'].value_counts()

Unnamed: 0_level_0,count
Label,Unnamed: 1_level_1
NetBIOS/Portmap,374169
SSDP/UDP,367926
MSSQL,205981
DNS/LDAP,200749
Syn,184829
SNMP,98008
UDPLag,93523
TFTP,91896
NTP,86370
BENIGN,27017


In [None]:
# Shuffle data
df = df.sample(frac=1)

# Get (X, y) from dataframe
X = df.drop(columns=['Label'])
y = df['Label']

# Get unique labels
unique_labels = y.unique()
print(f'Unique labels = {unique_labels}')

# Encode target labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# One-hot encoding of y labels
onehot_encoder = OneHotEncoder()
onehot_encoder.fit(y.reshape(-1, 1))

# Normalize inputs
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Show (X, y) shapes
print(f'X shape = {X.shape}')
print(f'y shape = {y.shape}')

Unique labels = ['Syn' 'SSDP/UDP' 'DNS/LDAP' 'MSSQL' 'NetBIOS/Portmap' 'UDPLag' 'TFTP'
 'NTP' 'SNMP' 'BENIGN']
X shape = (1730468, 78)
y shape = (1730468,)


In [None]:
# Split data in train and dev-test data
X_train, X_devtest, y_train, y_devtest = train_test_split(X, y, test_size=0.2, random_state=42)

# Split dev-test in dev and test data
X_dev, X_test, y_dev, y_test = train_test_split(X_devtest, y_devtest, test_size=0.5, random_state=42)

# Show train, dev and test shapes
print(f'X_train shape = {X_train.shape}')
print(f'y_train shape = {y_train.shape}')
print(f'X_dev shape = {X_dev.shape}')
print(f'y_dev shape = {y_dev.shape}')
print(f'X_test shape = {X_test.shape}')
print(f'y_test shape = {y_test.shape}')

X_train shape = (1384374, 78)
y_train shape = (1384374,)
X_dev shape = (173047, 78)
y_dev shape = (173047,)
X_test shape = (173047, 78)
y_test shape = (173047,)


In [None]:
# SMOTE over train set
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

# Show train, dev and test shapes
print(f'X_train shape = {X_train.shape}')
print(f'y_train shape = {y_train.shape}')
print(f'X_dev shape = {X_dev.shape}')
print(f'y_dev shape = {y_dev.shape}')
print(f'X_test shape = {X_test.shape}')
print(f'y_test shape = {y_test.shape}')

---
## **Model Settings**

### **Deep Neural Network**

In [None]:
# NN hyperparameter definition
input_dim = X.shape[1]
n_classes = len(unique_labels)
learning_rate = 0.001 # 0.0001
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
dropout_rate = 0.3
batch_size = 256
epochs = 20

In [None]:
# NN architecture
model_nn = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(input_dim,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(dropout_rate),
    tf.keras.layers.Dense(n_classes, activation='softmax')
])

# Show architecture summary
model_nn.summary()

### **Random Forest Classifier**

In [None]:
# Random forest hyperparameter definition
n_estimators = 100

In [None]:
model_rf = RandomForestClassifier(n_estimators=n_estimators, random_state=42)

---
## **Model Training**

### **Deep Neural Network**

In [None]:
y_train_onehot = onehot_encoder.transform(y_train.reshape(-1, 1)).toarray()
y_dev_onehot = onehot_encoder.transform(y_dev.reshape(-1, 1)).toarray()
y_test_onehot = onehot_encoder.transform(y_test.reshape(-1, 1)).toarray()

print(f'y_train_onehot shape = {y_train_onehot.shape}')
print(f'y_dev_onehot shape = {y_dev_onehot.shape}')
print(f'y_test_onehot shape = {y_test_onehot.shape}')

y_train_onehot shape = (1384374, 10)
y_dev_onehot shape = (173047, 10)
y_test_onehot shape = (173047, 10)


In [None]:
model_nn.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history_nn = model_nn.fit(
    X_train,
    y_train_onehot,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_dev, y_dev_onehot)
)

Epoch 1/20
[1m5408/5408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 20ms/step - accuracy: 0.7493 - loss: 0.6531 - val_accuracy: 0.7888 - val_loss: 0.5335
Epoch 2/20
[1m5408/5408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 20ms/step - accuracy: 0.7816 - loss: 0.5478 - val_accuracy: 0.7717 - val_loss: 0.5459
Epoch 3/20
[1m5408/5408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 23ms/step - accuracy: 0.7875 - loss: 0.5368 - val_accuracy: 0.7950 - val_loss: 0.5190
Epoch 4/20
[1m5408/5408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 20ms/step - accuracy: 0.7889 - loss: 0.5315 - val_accuracy: 0.7748 - val_loss: 0.5553
Epoch 5/20
[1m5408/5408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 20ms/step - accuracy: 0.7905 - loss: 0.5280 - val_accuracy: 0.7963 - val_loss: 0.5138
Epoch 6/20
[1m5408/5408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 21ms/step - accuracy: 0.7928 - loss: 0.5239 - val_accuracy: 0.7964 - val_loss: 0.514

### **Random Forest Classifier**

In [None]:
history_rf = model_rf.fit(X_train, y_train)

---
## **Model Persistence**

In [None]:
path = '/content/drive/MyDrive/Universidad/Proyectos/FRIDA DDoS/models'

### **Deep Neural Network**

In [None]:
with open(os.path.join(path, 'model_nn.architecture.json'), 'w') as file:
    file.write(model_nn.to_json())

model_nn.save_weights(os.path.join(path, 'model_nn.weights.h5'))

### **Random Forest Classifier**

In [None]:
with open(os.path.join(path, 'model_rf.pkl'), 'wb') as file:
    pickle.dump(model_rf, file)

---
## **Model Evaluation**

### **Deep Neural Network**

In [None]:
y_pred_nn = model_nn.predict(X_test)
y_pred_nn = np.argmax(y_pred_nn, axis=1)

print(f'Accuracy = {accuracy_score(y_test, y_pred_nn)}')
print(classification_report(y_test, y_pred_nn, target_names=unique_labels))

[1m5408/5408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step
Accuracy = 0.7984651568648979
                 precision    recall  f1-score   support

            Syn       0.98      0.98      0.98      2698
       SSDP/UDP       0.77      0.94      0.84     19940
       DNS/LDAP       0.74      0.83      0.79     20712
          MSSQL       0.95      0.91      0.93      8600
NetBIOS/Portmap       0.76      0.86      0.81     37484
         UDPLag       0.00      0.00      0.00      9877
           TFTP       0.85      0.92      0.89     37006
            NTP       1.00      0.70      0.83     18445
           SNMP       0.58      1.00      0.73      9259
         BENIGN       1.00      0.33      0.50      9026

       accuracy                           0.80    173047
      macro avg       0.76      0.75      0.73    173047
   weighted avg       0.78      0.80      0.77    173047



### **Random Forest Classifier**

In [None]:
y_pred_rf = model_rf.predict(X_test)

print(f'Accuracy = {accuracy_score(y_test, y_pred_rf)}')
print(classification_report(y_test, y_pred_rf, target_names=unique_labels))

Accuracy = 0.8325946130242073
                 precision    recall  f1-score   support

            Syn       1.00      1.00      1.00      2698
       SSDP/UDP       0.81      0.96      0.88     19940
       DNS/LDAP       0.80      0.83      0.81     20712
          MSSQL       0.97      0.99      0.98      8600
NetBIOS/Portmap       0.89      0.80      0.84     37484
         UDPLag       0.66      0.71      0.68      9877
           TFTP       0.87      0.91      0.89     37006
            NTP       0.98      0.72      0.83     18445
           SNMP       0.58      0.96      0.73      9259
         BENIGN       0.77      0.40      0.53      9026

       accuracy                           0.83    173047
      macro avg       0.83      0.83      0.82    173047
   weighted avg       0.85      0.83      0.83    173047

