## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


## Import Dataset

In [2]:
df = pd.read_csv('CICIOT2023.csv')

In [3]:
df.columns

Index(['flow_duration', 'Header_Length', 'Protocol Type', 'Duration', 'Rate',
       'Srate', 'Drate', 'fin_flag_number', 'syn_flag_number',
       'rst_flag_number', 'psh_flag_number', 'ack_flag_number',
       'ece_flag_number', 'cwr_flag_number', 'ack_count', 'syn_count',
       'fin_count', 'urg_count', 'rst_count', 'HTTP', 'HTTPS', 'DNS', 'Telnet',
       'SMTP', 'SSH', 'IRC', 'TCP', 'UDP', 'DHCP', 'ARP', 'ICMP', 'IPv', 'LLC',
       'Tot sum', 'Min', 'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number',
       'Magnitude', 'Radius', 'Covariance', 'Variance', 'Weight', 'label',
       'Binary Class', 'Multiclass'],
      dtype='object')

In [4]:
df = df.drop(columns=['Binary Class', 'label'])

In [5]:
df[['Multiclass']].value_counts()

Multiclass
DDoS          5338243
DoS           1269264
Mirai          413754
Benign         172642
Spoofing        76807
Recon           55531
Web              3836
BruteForce       1988
Name: count, dtype: int64

## Encoding

In [6]:
encoder = LabelEncoder()
df['Encoded Label'] = encoder.fit_transform(df['Multiclass'])

In [7]:
df[['Encoded Label', 'Multiclass']].value_counts()

Encoded Label  Multiclass
2              DDoS          5338243
3              DoS           1269264
4              Mirai          413754
0              Benign         172642
6              Spoofing        76807
5              Recon           55531
7              Web              3836
1              BruteForce       1988
Name: count, dtype: int64

In [8]:
encoder = LabelEncoder()
df['Multiclass'] = encoder.fit_transform(df['Multiclass'])

## Feature Scaling

In [9]:
X = df.drop(columns=['Multiclass', 'Encoded Label'])
y = df['Multiclass']

In [10]:
from sklearn.preprocessing import RobustScaler

scaler = RobustScaler()
# Fit on training data
scaler.fit(X)
X = scaler.transform(X)

## Data Splitting

In [11]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Further split the testing set into testing and validation sets
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

# Print the sizes of the resulting datasets
print("Training set size:", X_train.shape[0])
print("Validation set size:", X_val.shape[0])
print("Testing set size:", X_test.shape[0])

Training set size: 5132445
Validation set size: 1099810
Testing set size: 1099810


### Computing sample_weight

In [12]:
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
weights = dict(zip(np.unique(y_train), class_weights))

In [13]:
weights

{0: 5.311638434216736,
 1: 460.5568018664752,
 2: 0.17170614405793022,
 3: 0.7222090419711726,
 4: 2.212703316525374,
 5: 16.537922434459826,
 6: 11.849718789826564,
 7: 236.64906860936924}

In [14]:
# Step 2: Map each y_train label to its corresponding weight from the dictionary
sample_weight = np.array([weights[label] for label in y_train])
sample_weight

array([0.72220904, 0.72220904, 0.17170614, ..., 0.17170614, 0.72220904,
       0.17170614])

### Model Training

In [15]:
import xgboost as xgb
# Create an XGBoost model with custom class weights
model = xgb.XGBClassifier(random_state=42)
model.fit(X_train, y_train, sample_weight=sample_weight)

### Model Evaluation (with default hyperparameter)

In [16]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Predict on the test set
y_pred = model.predict(X_test)
# Evaluate the model
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Precision: ",precision)
print("Recall: ",recall)
print("F1 Score: ",f1)

Precision:  0.9962280272259669
Recall:  0.9953291932242841
F1 Score:  0.9956701145193945


In [17]:
y_pred_train =  model.predict(X_train)
y_pred_test = model.predict(X_test)
# Calculate F1 score on the training set
f1_train = f1_score(y_train, y_pred_train, average='weighted')
# Calculate F1 score on the test set
f1_test = f1_score(y_test, y_pred_test, average='weighted')
print("F1 score on the training set: ", f1_train)
print("F1 score on the test set: ", f1_test)

F1 score on the training set:  0.996524456356845
F1 score on the test set:  0.9956701145193945


In [18]:
#class_weights_val = compute_class_weight(class_weight='balanced', classes=np.unique(y_val), y=y_val)
#weights_val = dict(zip(np.unique(y_val), class_weights_val))
#sample_weight_val = np.array([weights[label] for label in y_val])
#sample_weight_val

### Tuning reg_alpha (L1 Regularization)

In [19]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, f1_score

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'reg_alpha': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
 }

# Create a GridSearchCV object
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring=make_scorer(f1_score, average='weighted'), cv=5, verbose=2)

# Fit the grid search to the validation data
grid_search.fit(X_val, y_val)

# Print the best parameters and the best average F1 score
print("Best parameters found: ", grid_search.best_params_)
print("Best average F1 score found: ", grid_search.best_score_)

Fitting 5 folds for each of 11 candidates, totalling 55 fits
[CV] END ........................................reg_alpha=0; total time= 1.1min
[CV] END ........................................reg_alpha=0; total time= 1.1min
[CV] END ........................................reg_alpha=0; total time= 1.0min
[CV] END ........................................reg_alpha=0; total time= 1.0min
[CV] END ........................................reg_alpha=0; total time= 1.0min
[CV] END ......................................reg_alpha=0.1; total time= 1.0min
[CV] END ......................................reg_alpha=0.1; total time= 1.2min
[CV] END ......................................reg_alpha=0.1; total time= 1.2min
[CV] END ......................................reg_alpha=0.1; total time= 1.2min
[CV] END ......................................reg_alpha=0.1; total time= 1.2min
[CV] END ......................................reg_alpha=0.2; total time= 1.2min
[CV] END ......................................r

### Tuning reg_lambda (L2 Regularization)


In [20]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, f1_score

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'reg_lambda': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
 }

# Create a GridSearchCV object
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring=make_scorer(f1_score, average='weighted'), cv=5, verbose=2)

# Fit the grid search to the validation data
grid_search.fit(X_val, y_val)

# Print the best parameters and the best average F1 score
print("Best parameters found: ", grid_search.best_params_)
print("Best average F1 score found: ", grid_search.best_score_)

Fitting 5 folds for each of 11 candidates, totalling 55 fits
[CV] END .......................................reg_lambda=0; total time= 1.2min
[CV] END .......................................reg_lambda=0; total time= 1.0min
[CV] END .......................................reg_lambda=0; total time= 1.2min
[CV] END .......................................reg_lambda=0; total time= 1.2min
[CV] END .......................................reg_lambda=0; total time= 1.1min
[CV] END .....................................reg_lambda=0.1; total time= 1.2min
[CV] END .....................................reg_lambda=0.1; total time= 1.2min
[CV] END .....................................reg_lambda=0.1; total time= 1.0min
[CV] END .....................................reg_lambda=0.1; total time= 1.1min
[CV] END .....................................reg_lambda=0.1; total time= 1.1min
[CV] END .....................................reg_lambda=0.2; total time= 1.1min
[CV] END .....................................re

### Model Training & Evaluation (after hyperparameter tuning)

In [21]:
import xgboost as xgb
# Create an XGBoost model with custom class weights
model = xgb.XGBClassifier(reg_alpha=0.5 , reg_lambda=0.7, random_state=42)
model.fit(X_train, y_train, sample_weight=sample_weight)

In [22]:
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy: {:.4f}".format(accuracy))
print("Precision: {:.4f}".format(precision))
print("Recall: {:.4f}".format(recall))
print("F1 Score: {:.4f}".format(f1))

Accuracy: 0.9953
Precision: 0.9963
Recall: 0.9953
F1 Score: 0.9957


In [23]:
y_pred_train =  model.predict(X_train)
y_pred_test = model.predict(X_test)
# Calculate F1 score on the training set
f1_train = f1_score(y_train, y_pred_train, average='weighted')
# Calculate F1 score on the test set
f1_test = f1_score(y_test, y_pred_test, average='weighted')
print("F1 score on the training set: ", f1_train)
print("F1 score on the test set: ", f1_test)

F1 score on the training set:  0.9964967485420452
F1 score on the test set:  0.9956961437351703


## Classification Prediction

In [24]:
test = pd.read_csv('test_data.csv')
X_pred = test.drop(columns=['label',
       'Binary Class', 'Multiclass'])

In [25]:
from sklearn.preprocessing import RobustScaler

scaler = RobustScaler()
# Fit on training data
scaler.fit(X_pred)
X_pred = scaler.transform(X_pred)

In [26]:
prediction = model.predict(X_pred)


In [27]:
labels = ['benign', 'BruteForce', 'DDoS', 'DoS', 'Mirai', 'Recon', 'Spoofing', 'Web']
predicted_labels = [labels[pred] for pred in prediction]

In [28]:
# Assuming predicted_labels is your list of predicted labels
if 'benign' in predicted_labels:
    print("There are entries with the value 'benign' in the list.")
else:
    print("There are no entries with the value 'benign' in the list.")

There are entries with the value 'benign' in the list.


In [29]:
# Assuming X_test is your features numpy array and y_pred contains your model predictions
# Convert X_test to a DataFrame for easier manipulation
df1 = pd.DataFrame(X_pred, columns=[
    'flow_duration', 'Header_Length', 'Protocol Type', 'Duration', 'Rate',
       'Srate', 'Drate', 'fin_flag_number', 'syn_flag_number',
       'rst_flag_number', 'psh_flag_number', 'ack_flag_number',
       'ece_flag_number', 'cwr_flag_number', 'ack_count', 'syn_count',
       'fin_count', 'urg_count', 'rst_count', 'HTTP', 'HTTPS', 'DNS', 'Telnet',
       'SMTP', 'SSH', 'IRC', 'TCP', 'UDP', 'DHCP', 'ARP', 'ICMP', 'IPv', 'LLC',
       'Tot sum', 'Min', 'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number',
       'Magnitude', 'Radius', 'Covariance', 'Variance', 'Weight'])

# Add the predicted labels to the DataFrame
df1['Predicted Label'] = predicted_labels

# Now X_pred is the final DataFrame with features and their predicted labels
df1

Unnamed: 0,flow_duration,Header_Length,Protocol Type,Duration,Rate,Srate,Drate,fin_flag_number,syn_flag_number,rst_flag_number,...,Std,Tot size,IAT,Number,Magnitude,Radius,Covariance,Variance,Weight,Predicted Label
0,2.019146,113.788825,1.375,0.00,45.132922,45.132922,0.0,0.0,0.0,0.0,...,0.000000,-0.985222,-0.066789,0.0,-0.989393,0.000000,0.000000,0.000,0.0,DDoS
1,26.079252,0.223334,0.000,0.00,-0.131608,-0.131608,0.0,0.0,1.0,0.0,...,0.000000,0.000000,0.871297,0.0,0.000000,0.000000,0.000000,0.000,0.0,DDoS
2,13.239858,1711.503412,1.250,1.08,1.575604,1.575604,0.0,0.0,0.0,0.0,...,1480.146796,209.354680,0.903580,0.0,81.858470,1538.526634,237463.840359,11.875,0.0,DDoS
3,0.000000,-0.002233,-0.125,-0.64,-0.113741,-0.113741,0.0,0.0,0.0,0.0,...,0.185151,0.014778,-0.178550,0.0,0.004102,0.193382,0.070505,0.625,0.0,DDoS
4,0.000000,-0.223334,-0.625,0.00,-0.130680,-0.130680,0.0,0.0,0.0,0.0,...,0.000000,-2.955665,0.015734,0.0,-3.094882,0.000000,0.000000,0.000,0.0,DDoS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446790,0.000000,0.000000,0.000,0.00,-0.048189,-0.048189,0.0,0.0,0.0,0.0,...,0.000000,0.000000,-0.209351,0.0,0.000000,0.000000,0.000000,0.000,0.0,DDoS
446791,44.645372,0.223334,0.000,0.00,-0.134241,-0.134241,0.0,0.0,0.0,0.0,...,0.000000,0.000000,-0.649953,0.0,0.000000,0.000000,0.000000,0.000,0.0,DoS
446792,0.000000,0.000000,0.000,0.00,0.009437,0.009437,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.762220,0.0,0.000000,0.000000,0.000000,0.000,0.0,DDoS
446793,0.000000,-0.223334,-0.625,0.00,4.101880,4.101880,0.0,0.0,0.0,0.0,...,0.000000,-2.955665,0.094545,0.0,-3.094882,0.000000,0.000000,0.000,0.0,DDoS


In [30]:
test['Multiclass'].value_counts()

Multiclass
DDoS          325623
DoS            77317
Mirai          25166
Benign         10420
Spoofing        4659
Recon           3287
Web              219
BruteForce       104
Name: count, dtype: int64

In [31]:
df1['Predicted Label'].value_counts()

Predicted Label
DDoS          325548
DoS            77377
Mirai          25171
benign         10273
Spoofing        4359
Recon           3149
Web              632
BruteForce       286
Name: count, dtype: int64

In [32]:
test

Unnamed: 0,flow_duration,Header_Length,Protocol Type,Duration,Rate,Srate,Drate,fin_flag_number,syn_flag_number,rst_flag_number,...,IAT,Number,Magnitude,Radius,Covariance,Variance,Weight,label,Binary Class,Multiclass
0,0.212275,27567.00,17,64.00,5216.034882,5216.034882,0.0,0,0,0,...,8.310633e+07,9.5,10.000000,0.000000,0.000000,0.00,141.55,DDoS-UDP_Flood,Attack,DDoS
1,2.741741,108.00,6,64.00,0.729464,0.729464,0.0,0,1,0,...,8.336180e+07,9.5,10.392305,0.000000,0.000000,0.00,141.55,DDoS-SynonymousIP_Flood,Attack,DDoS
2,1.391921,413878.41,16,65.08,197.431620,197.431620,0.0,0,0,0,...,8.337060e+07,9.5,42.850051,778.373358,322704.925017,0.95,141.55,DDoS-UDP_Fragmentation,Attack,DDoS
3,0.000000,53.46,5,63.36,2.787987,2.787987,0.0,0,0,0,...,8.307589e+07,9.5,10.393931,0.097836,0.095814,0.05,141.55,DDoS-TCP_Flood,Attack,DDoS
4,0.000000,0.00,1,64.00,0.836344,0.836344,0.0,0,0,0,...,8.312881e+07,9.5,9.165151,0.000000,0.000000,0.00,141.55,DDoS-ICMP_Flood,Attack,DDoS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446790,0.000000,54.00,6,64.00,10.340833,10.340833,0.0,0,0,0,...,8.306751e+07,9.5,10.392305,0.000000,0.000000,0.00,141.55,DDoS-TCP_Flood,Attack,DDoS
446791,4.693618,108.00,6,64.00,0.426111,0.426111,0.0,0,0,0,...,8.294752e+07,9.5,10.392305,0.000000,0.000000,0.00,141.55,DoS-TCP_Flood,Attack,DoS
446792,0.000000,54.00,6,64.00,16.980438,16.980438,0.0,0,0,0,...,8.333210e+07,9.5,10.392305,0.000000,0.000000,0.00,141.55,DDoS-PSHACK_Flood,Attack,DDoS
446793,0.000000,0.00,1,64.00,488.505008,488.505008,0.0,0,0,0,...,8.315027e+07,9.5,9.165151,0.000000,0.000000,0.00,141.55,DDoS-ICMP_Flood,Attack,DDoS


## Dashboard Development

In [33]:
import pickle

# For the baseline scenario or another model (make sure to define or adjust the scaler/model)
scaler_baseline = scaler
with open('robust_scaler_multi_all.pkl', 'wb') as f:
    pickle.dump(scaler_baseline, f)

model_baseline = model
model_baseline.save_model('multi_all.json')