# Evaluation Code for KSQL and ML
Before evaluating, please run KSQL infrastructure with `test.pcap` generated from `../datasets/preparation2.py`, then run `../rule-detection/evaluation/consumer/script.py` to consume the results from Kafka, and lastly run `../rule-detection/evaluation/ksql_evaluation_dos.py` and `../rule-detection/evaluation/ksql_evaluation_ps.py` to find list of packets in the pcap (in the form of index) that is malicious/detected.

**Metrics**
- Accuracy, Precision, Recall, F1-Score
- Inference Time for ML

In [1]:
import pandas as pd
import numpy as np
import joblib
import time

In [2]:
DOS_MODEL_LOCATION = "../ai-detection/notebooks/modelsdoscrossval/postfs"
PS_MODEL_LOCATION = "../ai-detection/notebooks/models/postfs"

In [4]:
df = pd.read_csv('../datasets/output/train_final.csv', parse_dates=['Timestamp'])

## Inference Time

In [5]:
TRIES = 100

In [8]:
# this is in milliseconds, not seconds
def calculate_inference_time_in_ms(foo, tries: int):
    times = []
    for _ in range(tries):
        start = time.time()
        foo()
        end = time.time()
        times.append(((end - start) / len(df)) * 1000)
    return np.average(times)

In [9]:
def print_time(name: str, in_ms):
    print(f"Average Inference time for {name}")
    print(in_ms, 'milliseconds')
    print(in_ms / 1000, 'seconds')
    print()

## DoS

In [11]:
dt = joblib.load(f"{DOS_MODEL_LOCATION}/dt_tuned.pkl")
rf = joblib.load(f"{DOS_MODEL_LOCATION}/rf_tuned.pkl")
et = joblib.load(f"{DOS_MODEL_LOCATION}/et_tuned.pkl")
xg = joblib.load(f"{DOS_MODEL_LOCATION}/xg_tuned.pkl")
lgbm = joblib.load(f"{DOS_MODEL_LOCATION}/lgbm_tuned.pkl")
cat = joblib.load(f"{DOS_MODEL_LOCATION}/cat_tuned.pkl")
stk1 = joblib.load(f"{DOS_MODEL_LOCATION}/stk1_tuned.pkl")
stk2 = joblib.load(f"{DOS_MODEL_LOCATION}/stk2_tuned.pkl")
stk3 = joblib.load(f"{DOS_MODEL_LOCATION}/stk3_tuned.pkl")

fs = ['Fwd Seg Size Min', 'Bwd Packet Length Mean', 'FWD Init Win Bytes', 'Fwd IAT Max', 'Bwd RST Flags', 'Bwd Packet Length Max', 'FIN Flag Count', 'Fwd IAT Min', 'Protocol', 'Bwd Segment Size Avg', 'Flow IAT Mean', 'Fwd Packet Length Max', 'Down/Up Ratio', 'Flow IAT Min', 'RST Flag Count', 'Flow Duration', 'Subflow Fwd Packets', 'Flow IAT Max', 'Flow Packets/s', 'Active Min', 'Bwd IAT Min', 'Bwd Act Data Pkts', 'Bwd Init Win Bytes', 'Bwd IAT Max', 'SYN Flag Count', 'Idle Max', 'Fwd IAT Total', 'Total Length of Bwd Packet', 'Fwd IAT Mean', 'Fwd Packets/s', 'Average Packet Size', 'Bwd IAT Mean', 'Total Bwd packets', 'Bwd Packets/s', 'Bwd IAT Total', 'Packet Length Mean', 'Packet Length Max', 'Bwd PSH Flags', 'Flow IAT Std', 'Packet Length Std', 'Fwd Bulk Rate Avg', 'Fwd Segment Size Avg', 'Fwd Packet Length Min', 'Fwd IAT Std', 'Fwd Packet Length Std', 'Flow Bytes/s', 'Fwd Bytes/Bulk Avg', 'ACK Flag Count', 'Packet Length Variance']

top_3_models = ['xg', 'et', 'lgbm']

X_test_fs = df[fs]

In [12]:
dos_inference_times: list[tuple] = []

# Decision Tree
dos_inference_times.append(("DT", calculate_inference_time_in_ms(lambda: dt.predict(X_test_fs), TRIES)))

# Random Forest
dos_inference_times.append(("RF", calculate_inference_time_in_ms(lambda: rf.predict(X_test_fs), TRIES)))

# Extra Trees
dos_inference_times.append(("ET", calculate_inference_time_in_ms(lambda: et.predict(X_test_fs), TRIES)))

# XGBoost
dos_inference_times.append(("XG", calculate_inference_time_in_ms(lambda: xg.predict(X_test_fs), TRIES)))

# LightGBM
dos_inference_times.append(("LGBM", calculate_inference_time_in_ms(lambda: lgbm.predict(X_test_fs), TRIES)))

# CatBoost
dos_inference_times.append(("Cat", calculate_inference_time_in_ms(lambda: cat.predict(X_test_fs), TRIES)))

# OCSE (Change based on training's top 3 models results)
def ocse_timer():
    model_1_test = xg.predict(X_test_fs).reshape(-1, 1)
    model_2_test = et.predict(X_test_fs).reshape(-1, 1)
    model_3_test = lgbm.predict(X_test_fs).reshape(-1, 1)

    model_1_prob_test = xg.predict_proba(X_test_fs)
    model_2_prob_test = et.predict_proba(X_test_fs)
    model_3_prob_test = lgbm.predict_proba(X_test_fs)

    x_test = np.concatenate([model_1_test, model_2_test, model_3_test] + [model_1_prob_test, model_2_prob_test, model_3_prob_test], axis=1)

    stk3.predict(x_test)

dos_inference_times.append(("OCSE", calculate_inference_time_in_ms(ocse_timer, TRIES)))

In [13]:
dos_times = sorted(dos_inference_times, key=lambda x: x[1])
dos_times

[('XG', 8.44578148144689e-05),
 ('DT', 0.0001490853285177208),
 ('Cat', 0.00016464099596621282),
 ('LGBM', 0.0005456495241421308),
 ('RF', 0.001790227723203146),
 ('ET', 0.0019901761265639756),
 ('OCSE', 0.005219332419182201)]

In [14]:
[[a, format(b, '.5f')] for a, b in dos_times]

[['XG', '0.00008'],
 ['DT', '0.00015'],
 ['Cat', '0.00016'],
 ['LGBM', '0.00055'],
 ['RF', '0.00179'],
 ['ET', '0.00199'],
 ['OCSE', '0.00522']]

## Port Scan

In [15]:
dt = joblib.load(f"{PS_MODEL_LOCATION}/dt_tuned.pkl")
rf = joblib.load(f"{PS_MODEL_LOCATION}/rf_tuned.pkl")
et = joblib.load(f"{PS_MODEL_LOCATION}/et_tuned.pkl")
xg = joblib.load(f"{PS_MODEL_LOCATION}/xg_tuned.pkl")
lgbm = joblib.load(f"{PS_MODEL_LOCATION}/lgbm_tuned.pkl")
cat = joblib.load(f"{PS_MODEL_LOCATION}/cat_tuned.pkl")
stk1 = joblib.load(f"{PS_MODEL_LOCATION}/stk1_tuned.pkl")
stk2 = joblib.load(f"{PS_MODEL_LOCATION}/stk2_tuned.pkl")
stk3 = joblib.load(f"{PS_MODEL_LOCATION}/stk3_tuned.pkl")

fs = ['Flow Duration', 'Bwd RST Flags', 'RST Flag Count', 'Packet Length Mean', 'Protocol', 'Fwd Seg Size Min', 'Average Packet Size', 'Flow IAT Max', 'Fwd Packet Length Max', 'Packet Length Max', 'Bwd Packet Length Max', 'Fwd Segment Size Avg', 'Bwd Segment Size Avg', 'Fwd Act Data Pkts', 'Total Length of Fwd Packet', 'Bwd Packet Length Mean', 'FWD Init Win Bytes', 'Total Length of Bwd Packet', 'Bwd Act Data Pkts', 'Packet Length Std']

top_3_models = ['et', 'lgbm', 'rf']

X_test_fs = df[fs]

In [16]:
ps_inference_times: list[tuple] = []

# Decision Tree
ps_inference_times.append(("DT", calculate_inference_time_in_ms(lambda: dt.predict(X_test_fs), TRIES)))

# Random Forest
ps_inference_times.append(("RF", calculate_inference_time_in_ms(lambda: rf.predict(X_test_fs), TRIES)))

# Extra Trees
ps_inference_times.append(("ET", calculate_inference_time_in_ms(lambda: et.predict(X_test_fs), TRIES)))

# XGBoost
ps_inference_times.append(("XG", calculate_inference_time_in_ms(lambda: xg.predict(X_test_fs), TRIES)))

# LightGBM
ps_inference_times.append(("LGBM", calculate_inference_time_in_ms(lambda: lgbm.predict(X_test_fs), TRIES)))

# CatBoost
ps_inference_times.append(("Cat", calculate_inference_time_in_ms(lambda: cat.predict(X_test_fs), TRIES)))

# OCSE (Change based on training's top 3 models results)
def ocse_timer():
    model_1_test = et.predict(X_test_fs).reshape(-1, 1)
    model_2_test = lgbm.predict(X_test_fs).reshape(-1, 1)
    model_3_test = rf.predict(X_test_fs).reshape(-1, 1)

    model_1_prob_test = et.predict_proba(X_test_fs)
    model_2_prob_test = lgbm.predict_proba(X_test_fs)
    model_3_prob_test = rf.predict_proba(X_test_fs)

    x_test = np.concatenate([model_1_test, model_2_test, model_3_test] + [model_1_prob_test, model_2_prob_test, model_3_prob_test], axis=1)

    stk3.predict(x_test)

ps_inference_times.append(("OCSE", calculate_inference_time_in_ms(ocse_timer, TRIES)))

In [17]:
ps_times = sorted(ps_inference_times, key=lambda x: x[1])
ps_times

[('DT', 4.824936858142845e-05),
 ('XG', 6.137050984963807e-05),
 ('Cat', 0.00023446180517324697),
 ('LGBM', 0.0003129888957770155),
 ('RF', 0.0012181823822037224),
 ('ET', 0.001346262603341478),
 ('OCSE', 0.00555523008552967)]

In [18]:
[[a, format(b, '.5f')] for a, b in ps_times]

[['DT', '0.00005'],
 ['XG', '0.00006'],
 ['Cat', '0.00023'],
 ['LGBM', '0.00031'],
 ['RF', '0.00122'],
 ['ET', '0.00135'],
 ['OCSE', '0.00556']]