## CICIDS-2017 dataset ML exploitation

**Credit**: the processing is inspired from M. Lanvin's work: [_Errors in the CIC-IDS2017 dataset and the significant differences in detection performances it makes_](https://link.springer.com/chapter/10.1007/978-3-031-31108-6_2) whose code is hosted on [Gitlab](https://gitlab.inria.fr/mlanvin/crisis2022/-/tree/main/Performance_Evaluation).

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import time

## ML 
## Common tools
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler

## Models
from sklearn.naive_bayes import BernoulliNB 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

### 1. Data preprocessing

In [2]:
df = pd.read_csv("friday_dataset.csv")

In [3]:
def fine_process(df):
    """
    Return cleaned x, y datasets
    input: df (dataframe)
    outputs: x (dataframe), y (dataframe)
    """
    # remove single value columns
    col_to_drop = []
    for c in df.columns:
        if len(df[c].unique()) < 2:
            print(f"column {c} will be dropped")
            col_to_drop.append(c)
    df.drop(col_to_drop, axis=1, inplace=True)
    return df.drop("label", axis=1), df["label"]

Extract train set

**Warning**: the proportions below are taken wrt the number of attacks of each kind, not the full dataset size.

In [4]:
train_size = 0.7
test_size = 1 - train_size

In [5]:
np.random.seed(seed=123)

In [6]:
n_attack = 0
n_train = 0
train_samples = []

# select train_size % of each attack
for attack in df["label"].unique():
    if attack != "benign":
        sub_df = df[(df["label"] == attack)]
        n_attack = round(train_size * sub_df.shape[0])
        rdm_idx = np.random.choice(np.arange(sub_df.shape[0]), size=n_attack, replace=False)
        train_samples.append(sub_df.iloc[rdm_idx])
        n_train += n_attack
        print(f"{n_attack} samples randomly selected for {attack}")

# select the equal amount of benign samples
sub_df = df[(df["label"] == "benign")]
rdm_idx = np.random.choice(np.arange(sub_df.shape[0]), size=n_train, replace=False)
train_samples.append(sub_df.iloc[rdm_idx])
print(f"{n_train} benign samples randomly selected")

# create dataframe
train_df = pd.concat(train_samples)


517 samples randomly selected for Bot
112094 samples randomly selected for portscan
66978 samples randomly selected for ddos
179589 benign samples randomly selected


Extract test set

**Note**: given the size of the initial dataset, a portion only of the benign samples is actually retained

In [7]:
idx_test = set(df.index).difference(train_df.index)
test_df = df.iloc[list(idx_test)]

In [8]:
test_samples = []

# select all test attacks
for attack in test_df["label"].unique():
    if attack != "benign":
        sub_df = test_df[(test_df["label"] == attack)]
        test_samples.append(sub_df)
        print(f"{sub_df.shape[0]} samples selected for {attack}")

# select a portion of all benign samples
sub_df = test_df[(test_df["label"] == "benign")]
rdm_idx = np.random.choice(np.arange(sub_df.shape[0]), size=sub_df.shape[0], replace=False)
test_samples.append(sub_df.iloc[rdm_idx])
print(f"{sub_df.shape[0]} benign samples randomly selected")

# update dataframe
test_df = pd.concat(test_samples)

221 samples selected for Bot
48040 samples selected for portscan
28705 samples selected for ddos
111190 benign samples randomly selected


### 2. Model training

In [9]:
x_train, y_train = fine_process(train_df)

column conn_state_S2 will be dropped


Shuffle datasets

In [10]:
x_train = x_train.sample(frac=1.)
y_train = y_train.loc[x_train.index]

Normalize

In [11]:
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(x_train)

Train

In [12]:
# --- Parameter for Tree based models --- # 
max_depth = None

## SVM Classifier 
startSVM = time.time()
SVM_Classifier = SVC() 
SVM_Classifier.fit(X_train_normalized, (y_train!="benign").astype(int));
print("Training time SVM classifier:", (time.time()-startSVM)/60, "minutes")

## Random Forest Classifier with a limited depth
startRF = time.time()
RF_Classifier = RandomForestClassifier(max_depth=max_depth) 
RF_Classifier.fit(X_train_normalized, y_train); 
print("Training time Random Forest classifier:", (time.time()-startRF)/60, "minutes")

## NAÏVE BAYESIAN Classifier
startNB = time.time()
NB_Classifier = BernoulliNB()
NB_Classifier.fit(X_train_normalized, y_train)
print("Training time Naïve Baysian classifier:", (time.time()-startNB)/60, "minutes")

## DECISION TREE Classifier with a limited depth
startDT_limited_depth = time.time()
DTC_Classifier_limited_depth_gini = DecisionTreeClassifier(criterion='gini', max_depth=max_depth, random_state=0)
DTC_Classifier_limited_depth_gini.fit(X_train_normalized, y_train)
print("Training time Decision Tree with limited depth classifier (gini):", (time.time()-startDT_limited_depth)/60, "minutes")

Training time SVM classifier: 0.9013434171676635 minutes
Training time Random Forest classifier: 0.26973368326822916 minutes
Training time Naïve Baysian classifier: 0.021526265144348144 minutes
Training time Decision Tree with limited depth classifier (gini): 0.020609883467356364 minutes


In [13]:
# Save models in a list
models = []
models.append(('SVM Classifier', SVM_Classifier))
models.append(('Naive Bayes Classifier', NB_Classifier))
models.append(('Decision Tree Classifier', DTC_Classifier_limited_depth_gini))
models.append(('Random Forest', RF_Classifier))

Evaluate

In [14]:
perfs = []
for i, v in models:
    print(i, "...")
    true, pred =  y_train, v.predict(X_train_normalized)            
    
    if i in ["SVM Classifier"]:
        pred = pred.astype(str)
        pred[pred == '0'] = "benign"
        pred[pred == '1'] = "attack"
        
    true, pred = (true!="benign").astype(int), (pred!="benign").astype(int)

    accuracy = metrics.accuracy_score(true, pred)
    confusion_matrix = metrics.confusion_matrix(true, pred)
    classification_dict = metrics.classification_report(true, pred, output_dict=True)
    classification = metrics.classification_report(true, pred)

    ## Save
    perfs.append([confusion_matrix[0,1] / (confusion_matrix[0,1] + confusion_matrix[0,0])] + list(classification_dict['1'].values()) + [i] + ["Train"])
    print()
    print('============================== {} Model Evaluation =============================='.format(i))
    print()
    print ("Model Accuracy:" "\n", accuracy)
    print()
    print("Confusion matrix:" "\n", confusion_matrix)
    print()
    print("Classification report:" "\n", classification) 
    print()

SVM Classifier ...


Model Accuracy:
 0.9982877570452533

Confusion matrix:
 [[179566     23]
 [   592 178997]]

Classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    179589
           1       1.00      1.00      1.00    179589

    accuracy                           1.00    359178
   macro avg       1.00      1.00      1.00    359178
weighted avg       1.00      1.00      1.00    359178


Naive Bayes Classifier ...


Model Accuracy:
 0.998195880594023

Confusion matrix:
 [[179573     16]
 [   632 178957]]

Classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    179589
           1       1.00      1.00      1.00    179589

    accuracy                           1.00    359178
   macro avg       1.00      1.00      1.00    359178
weighted avg       1.00      1.00      1.00    359178


Decision Tree Classifier ...


Model Accuracy:
 1.0

Confusion 

### 3. Model testing

In [15]:
x_test, y_test = fine_process(test_df)
features_to_drop = [f for f in x_test.columns if f not in x_train.columns]
print(f"drop the following features: \n--> %s" %'\n--> '.join(features_to_drop))
x_test.drop(features_to_drop, axis=1, inplace=True)

drop the following features: 
--> conn_state_S2


Shuffle datasets

In [16]:
x_test = x_test.sample(frac=1.)
y_test = y_test.loc[x_test.index]

Normalize

In [17]:
X_test_normalized = scaler.transform(x_test)

Evaluate

In [18]:
for i, v in models:
    true, pred =  y_test, v.predict(X_test_normalized)            

    if i in ["SVM Classifier"]:
        pred = pred.astype(str)
        pred[pred == '0'] = "benign"
        pred[pred == '1'] = "attack"        


    true, pred = (true!="benign").astype(int), (pred!="benign").astype(int)


    accuracy = metrics.accuracy_score(true, pred)
    confusion_matrix = metrics.confusion_matrix(true, pred)
    [[tn, fp], [fn, tp]] = confusion_matrix
    fpr = fp / (fp + tn)
    classification_dict = metrics.classification_report(true, pred, output_dict=True)
    classification = metrics.classification_report(true, pred)
    
    # Save
    perfs.append([fpr] + list(classification_dict['1'].values()) + [i] + ["Test"])
    
    print([confusion_matrix[0,1] / (confusion_matrix[0,1] + confusion_matrix[0,0])] + list(classification_dict['1'].values()) + [i] + ["Test"])

[0.00011691698893785412, 0.9998305813665568, 0.9968037834888133, 0.9983148881905543, 76966.0, 'SVM Classifier', 'Test']
[5.3961687202086516e-05, 0.9999217772215269, 0.9965179429878128, 0.9982169584173878, 76966.0, 'Naive Bayes Classifier', 'Test']
[5.3961687202086516e-05, 0.9999220465382167, 0.9999610217498636, 0.9999415337642512, 76966.0, 'Decision Tree Classifier', 'Test']
[8.993614533681087e-06, 0.9999870067435002, 0.999948028999818, 0.9999675174918305, 76966.0, 'Random Forest', 'Test']


In [19]:
df_perfs = pd.DataFrame(perfs ,columns=["FPR", "Prec", "TPR", "F1-score", "Support", "Model", "Exp"]).drop_duplicates()
df_perfs.sort_values(by="Model")

Unnamed: 0,FPR,Prec,TPR,F1-score,Support,Model,Exp
2,0.0,1.0,1.0,1.0,179589.0,Decision Tree Classifier,Train
6,5.4e-05,0.999922,0.999961,0.999942,76966.0,Decision Tree Classifier,Test
1,8.9e-05,0.999911,0.996481,0.998193,179589.0,Naive Bayes Classifier,Train
5,5.4e-05,0.999922,0.996518,0.998217,76966.0,Naive Bayes Classifier,Test
3,0.0,1.0,1.0,1.0,179589.0,Random Forest,Train
7,9e-06,0.999987,0.999948,0.999968,76966.0,Random Forest,Test
0,0.000128,0.999872,0.996704,0.998285,179589.0,SVM Classifier,Train
4,0.000117,0.999831,0.996804,0.998315,76966.0,SVM Classifier,Test


### 4. Results viz

In [20]:
title = 'Evolution of TPR of supervised approaches given labelling methods'

fig = px.bar(df_perfs, x='Exp', y='TPR', color='Model', barmode='group', width=1000, height=600)
fig.show()


title = 'Evolution of FPR of supervised approaches given labelling methods'

fig = px.bar(df_perfs, x='Exp', y='FPR', color='Model', barmode='group', width=1000, height=600)
fig.show()


title = 'Evolution of F1-score of supervised approaches given labelling methods'

fig = px.bar(df_perfs, x='Exp', y='F1-score', color='Model', barmode='group', width=1000, height=600)
fig.show()