# Baselines

In [1]:
import pandas as pd

In [2]:
usecols = [
    "dataset_name",
    "benign_labels",
    "model_type",
    "accuracy",
    "precision",
    "recall",
    "f1_score",
    "roc_auc"
]
metrics = [
    "accuracy",
    "precision",
    "recall",
    "f1_score",
    "roc_auc"]

def highlight_if_max(row):
    return [
        "font-weight: bold; color:#80DEEA; background-color:#263238"
        if row[metric] == max_per_dataset.loc[row.name, metric]
        else ''
        for metric in metrics
    ]

## Only benign is benign

In [3]:
cnn_bilstm_cic_ids_2017 = pd.read_csv("results/cic_ids_2017_report.csv")
cnn_bilstm_cic_ids_2017 = cnn_bilstm_cic_ids_2017[cnn_bilstm_cic_ids_2017["Label"] == "Overall"]
cnn_bilstm_cic_ids_2017

Unnamed: 0,Label,TP,FN,FP,TN,Recall,Precision,Accuracy,F1 Score,Specificity,FPR,FNR
15,Overall,107934,3951,4977,448714,0.964687,0.955921,0.984214,0.960284,0.98903,0.01097,0.035313


In [4]:
cnn_bilstm_mawi_201101 = pd.read_csv("results/mawi_2011_01_report_labels.csv")
cnn_bilstm_mawi_201101 = cnn_bilstm_mawi_201101[cnn_bilstm_mawi_201101["Label"] == "Overall"]


In [5]:
cnn_bilstm_mawi_201601 = pd.read_csv("results/mawi_2016_01_report_labels.csv")
cnn_bilstm_mawi_201601 = cnn_bilstm_mawi_201601[cnn_bilstm_mawi_201601["Label"] == "Overall"]
cnn_bilstm_mawi_201601

Unnamed: 0,Label,TP,FN,FP,TN,Recall,Precision,Accuracy,F1 Score,Specificity,FPR,FNR
4,Overall,261983,75454,54001,208562,0.776391,0.829102,0.784242,0.801881,0.794331,0.205669,0.223609


In [6]:
cnn_bilstm_mawi_202101 = pd.read_csv("results/mawi_2021_01_report_labels.csv")
cnn_bilstm_mawi_202101 = cnn_bilstm_mawi_202101[cnn_bilstm_mawi_202101["Label"] == "Overall"]
cnn_bilstm_mawi_202101

Unnamed: 0,Label,TP,FN,FP,TN,Recall,Precision,Accuracy,F1 Score,Specificity,FPR,FNR
4,Overall,395183,48859,51423,104505,0.889968,0.884858,0.832855,0.887406,0.670213,0.329787,0.110032


In [7]:
cnn_bilstm_metrics = pd.DataFrame([
    [
        "CIC-IDS-2017",
        "CNN-BiLSTM",
        cnn_bilstm_cic_ids_2017["Accuracy"].values[0],
        cnn_bilstm_cic_ids_2017["Precision"].values[0],
        cnn_bilstm_cic_ids_2017["Recall"].values[0],
        cnn_bilstm_cic_ids_2017["F1 Score"].values[0],
        0.9985
    ],
    [
        "MAWILab-2011-01-n3_000_000",
        "CNN-BiLSTM",
        cnn_bilstm_mawi_201101["Accuracy"].values[0],
        cnn_bilstm_mawi_201101["Precision"].values[0],
        cnn_bilstm_mawi_201101["Recall"].values[0],
        cnn_bilstm_mawi_201101["F1 Score"].values[0],
        0.9014
    ],
    [
        "MAWILab-2016-01-n3_000_000",
        "CNN-BiLSTM",
        cnn_bilstm_mawi_201601["Accuracy"].values[0],
        cnn_bilstm_mawi_201601["Precision"].values[0],
        cnn_bilstm_mawi_201601["Recall"].values[0],
        cnn_bilstm_mawi_201601["F1 Score"].values[0],
        0.8458
    ],
    [
        "MAWILab-2021-01-n3_000_000",
        "CNN-BiLSTM",
        cnn_bilstm_mawi_202101["Accuracy"].values[0],
        cnn_bilstm_mawi_202101["Precision"].values[0],
        cnn_bilstm_mawi_202101["Recall"].values[0],
        cnn_bilstm_mawi_202101["F1 Score"].values[0],
        0.8915
    ]
], columns=["dataset_name", "model_type"] + metrics)
cnn_bilstm_metrics

Unnamed: 0,dataset_name,model_type,accuracy,precision,recall,f1_score,roc_auc
0,CIC-IDS-2017,CNN-BiLSTM,0.984214,0.955921,0.964687,0.960284,0.9985
1,MAWILab-2011-01-n3_000_000,CNN-BiLSTM,0.845972,0.876162,0.91471,0.895021,0.9014
2,MAWILab-2016-01-n3_000_000,CNN-BiLSTM,0.784242,0.829102,0.776391,0.801881,0.8458
3,MAWILab-2021-01-n3_000_000,CNN-BiLSTM,0.832855,0.884858,0.889968,0.887406,0.8915


In [8]:
results_benign = pd.concat([
    pd.read_csv("results/baseline_runs.csv", usecols=usecols),
    cnn_bilstm_metrics
], ignore_index=True, sort=False
).set_index(
    ["dataset_name", "model_type"]
).sort_index()

results_benign = results_benign.rename(index={
    "MAWILab-2011-01-n3_000_000": "MAWILab v1.1 2011-01",
    "MAWILab-2016-01-n3_000_000": "MAWILab v1.1 2016-01",
    "MAWILab-2021-01-n3_000_000": "MAWILab v1.1 2021-01"
}, level="dataset_name")

results_benign.to_csv(
    "results/baseline_metrics_benign.csv", index=True
)
max_per_dataset = results_benign.groupby("dataset_name")[metrics].transform("max")
results_benign[metrics].style.format("{:.4f}").apply(
    highlight_if_max,
    subset=metrics,
    axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,accuracy,precision,recall,f1_score,roc_auc
dataset_name,model_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CIC-IDS-2017,CNN-BiLSTM,0.9842,0.9559,0.9647,0.9603,0.9985
CIC-IDS-2017,DecisionTreeClassifier,0.999,0.9965,0.9983,0.9974,0.999
CIC-IDS-2017,IsolationForest,0.7372,0.3705,0.47,0.4144,0.7403
CIC-IDS-2017,LogisticRegression,0.9257,0.8424,0.7679,0.8034,0.9718
CIC-IDS-2017,RandomForestClassifier,0.9989,0.9966,0.9978,0.9972,0.9999
CIC-IDS-2017,XGBClassifier,0.9986,0.9961,0.9971,0.9966,0.9999
MAWILab v1.1 2011-01,CNN-BiLSTM,0.846,0.8762,0.9147,0.895,0.9014
MAWILab v1.1 2011-01,DecisionTreeClassifier,0.8334,0.8853,0.8822,0.8837,0.7955
MAWILab v1.1 2011-01,IsolationForest,0.5569,0.7463,0.5798,0.6526,0.5332
MAWILab v1.1 2011-01,LogisticRegression,0.7899,0.8036,0.9361,0.8648,0.8178


## Benign and notice are benign

In [9]:
results_benign_notice = pd.read_csv(
    "results/baseline_runs_notice.csv", usecols=usecols
).set_index(
    ["dataset_name", "model_type"]
).sort_index()
results_benign_notice.to_csv(
    "results/baseline_metrics_benign_notice.csv", index=True
)
max_per_dataset = results_benign_notice.groupby("dataset_name")[metrics].transform("max")
results_benign_notice[metrics].style.format("{:.4f}").apply(
    highlight_if_max,
    subset=metrics,
    axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,accuracy,precision,recall,f1_score,roc_auc
dataset_name,model_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MAWILab-2011-01-n3_000_000,DecisionTreeClassifier,0.7954,0.8504,0.8513,0.8509,0.7609
MAWILab-2011-01-n3_000_000,IsolationForest,0.5185,0.6964,0.5277,0.6004,0.5046
MAWILab-2011-01-n3_000_000,LogisticRegression,0.7594,0.7682,0.9297,0.8412,0.7766
MAWILab-2011-01-n3_000_000,RandomForestClassifier,0.8301,0.8572,0.9025,0.8793,0.8674
MAWILab-2011-01-n3_000_000,XGBClassifier,0.8219,0.8351,0.9223,0.8765,0.859
MAWILab-2021-01-n3_000_000,DecisionTreeClassifier,0.796,0.8586,0.858,0.8583,0.7522
MAWILab-2021-01-n3_000_000,IsolationForest,0.506,0.7231,0.509,0.5974,0.4951
MAWILab-2021-01-n3_000_000,LogisticRegression,0.7601,0.7894,0.9094,0.8452,0.7847
MAWILab-2021-01-n3_000_000,RandomForestClassifier,0.8227,0.8661,0.8917,0.8787,0.8833
MAWILab-2021-01-n3_000_000,XGBClassifier,0.8172,0.8609,0.8899,0.8752,0.8691


## Benign, notice and suspicious are benign

In [10]:
results_benign_notice_suspicious = pd.read_csv(
    "results/baseline_runs_notice_suspicious.csv", usecols=usecols
).set_index(
    ["dataset_name", "model_type"]
).sort_index()
results_benign_notice_suspicious.to_csv(
    "results/baseline_metrics_benign_notice_suspicious.csv", index=True
)
max_per_dataset = results_benign_notice_suspicious.groupby("dataset_name")[metrics].transform("max")
results_benign_notice_suspicious[metrics].style.format("{:.4f}").apply(
    highlight_if_max,
    subset=metrics,
    axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,accuracy,precision,recall,f1_score,roc_auc
dataset_name,model_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MAWILab-2011-01-n3_000_000,DecisionTreeClassifier,0.7274,0.7312,0.7305,0.7309,0.7265
MAWILab-2011-01-n3_000_000,IsolationForest,0.4951,0.5017,0.4912,0.4964,0.4765
MAWILab-2011-01-n3_000_000,LogisticRegression,0.6029,0.587,0.7286,0.6502,0.6467
MAWILab-2011-01-n3_000_000,RandomForestClassifier,0.7534,0.7437,0.7833,0.763,0.8285
MAWILab-2011-01-n3_000_000,XGBClassifier,0.6741,0.6396,0.8173,0.7176,0.7394
MAWILab-2021-01-n3_000_000,DecisionTreeClassifier,0.751,0.7963,0.7993,0.7978,0.7402
MAWILab-2021-01-n3_000_000,IsolationForest,0.4061,0.5252,0.3477,0.4184,0.4503
MAWILab-2021-01-n3_000_000,LogisticRegression,0.6629,0.6939,0.8077,0.7465,0.7122
MAWILab-2021-01-n3_000_000,RandomForestClassifier,0.7696,0.8091,0.8181,0.8135,0.855
MAWILab-2021-01-n3_000_000,XGBClassifier,0.7255,0.7443,0.8428,0.7905,0.7976


## Overview

In [11]:
results_benign['benign_labels'] = 'benign'
overview = pd.concat([
    results_benign,
    results_benign_notice,
    results_benign_notice_suspicious
]).reset_index().set_index(
    ['dataset_name', 'benign_labels', 'model_type']
).sort_index()

max_per_dataset = overview.groupby("dataset_name")[metrics].transform("max")
overview[metrics].style.format("{:.4f}").apply(
    highlight_if_max,
    subset=metrics,
    axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,precision,recall,f1_score,roc_auc
dataset_name,benign_labels,model_type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CIC-IDS-2017,benign,CNN-BiLSTM,0.9842,0.9559,0.9647,0.9603,0.9985
CIC-IDS-2017,benign,DecisionTreeClassifier,0.999,0.9965,0.9983,0.9974,0.999
CIC-IDS-2017,benign,IsolationForest,0.7372,0.3705,0.47,0.4144,0.7403
CIC-IDS-2017,benign,LogisticRegression,0.9257,0.8424,0.7679,0.8034,0.9718
CIC-IDS-2017,benign,RandomForestClassifier,0.9989,0.9966,0.9978,0.9972,0.9999
CIC-IDS-2017,benign,XGBClassifier,0.9986,0.9961,0.9971,0.9966,0.9999
MAWILab v1.1 2011-01,benign,CNN-BiLSTM,0.846,0.8762,0.9147,0.895,0.9014
MAWILab v1.1 2011-01,benign,DecisionTreeClassifier,0.8334,0.8853,0.8822,0.8837,0.7955
MAWILab v1.1 2011-01,benign,IsolationForest,0.5569,0.7463,0.5798,0.6526,0.5332
MAWILab v1.1 2011-01,benign,LogisticRegression,0.7899,0.8036,0.9361,0.8648,0.8178
