In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import LabelEncoder


In [2]:
path = "../../data_processed/"

Mounted at /content/drive


In [3]:
combinedDf = pd.read_csv(path + 'updated_Combined.csv')

combinedDf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1215890 entries, 0 to 1215889
Data columns (total 24 columns):
 #   Column       Non-Null Count    Dtype  
---  ------       --------------    -----  
 0   Unnamed: 0   1215890 non-null  float64
 1   Seq          1215890 non-null  float64
 2   Dur          1215890 non-null  float64
 3   RunTime      1215890 non-null  float64
 4   Mean         1215890 non-null  float64
 5   Sum          1215890 non-null  float64
 6   Min          1215890 non-null  float64
 7   Max          1215890 non-null  float64
 8   TotPkts      1215890 non-null  float64
 9   SrcPkts      1215890 non-null  float64
 10  DstPkts      1215890 non-null  float64
 11  TotBytes     1215890 non-null  float64
 12  SrcBytes     1215890 non-null  float64
 13  DstBytes     1215890 non-null  float64
 14  Offset       1215890 non-null  float64
 15  sMeanPktSz   1215890 non-null  float64
 16  dMeanPktSz   1215890 non-null  float64
 17  Load         1215890 non-null  float64
 18  Sr

In [4]:
features = ['Seq', 'Dur', 'RunTime', 'Mean', 'Sum', 'Min', 'Max',
            'TotPkts', 'SrcPkts', 'DstPkts', 'TotBytes', 'SrcBytes',
            'DstBytes', 'Offset', 'sMeanPktSz', 'dMeanPktSz',
            'Load', 'SrcLoad', 'Rate', 'SrcRate']
target = 'Attack Type'

In [8]:
# Load the trained models from files

models_path = path+'models/'

dt_model_multi = joblib.load(models_path + 'dt_model_multi.pkl')
logistic_model_multi = joblib.load(models_path + 'logistic_model_multi.pkl')
mlp_model_multi = joblib.load(models_path + 'mlp_model_multi.pkl')
rf_model_multi = joblib.load(models_path + 'rf_model_multi.pkl')
knn_model_multi = joblib.load(models_path + 'knn_model_multi.pkl')
gbm_model_multi = joblib.load(models_path + 'gbm_multi.pkl')


In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(combinedDf[features], combinedDf[target], test_size=0.2, random_state=42)

In [10]:
# Define the ensemble model for max voting
ensemble_model_multi = VotingClassifier(estimators=[
    ('Decision_tree', dt_model_multi),
    ('logistic_regression', logistic_model_multi),
    ('MultiLayerPerceptron', mlp_model_multi),
    ('RandomForestClassifier', rf_model_multi),
    ('KNN classifier', knn_model_multi),
    ('GBM model', gbm_model_multi)
], voting='soft')

In [11]:
ensemble_model_multi.fit(X_train, y_train)

attack_type_pred = ensemble_model_multi.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [12]:
classification_rep = classification_report(y_test, attack_type_pred,digits = 8)
print("Classification Report:\n", classification_rep)


Classification Report:
                 precision    recall  f1-score   support

        Benign  0.99926904 0.99896653 0.99911777     95794
     HTTPFlood  0.99875869 0.99992898 0.99934349     28163
     ICMPFlood  0.99581590 1.00000000 0.99790356       238
      SYNFlood  1.00000000 1.00000000 1.00000000      1919
       SYNScan  0.99922958 0.99922958 0.99922958      3894
   SlowrateDoS  0.99882888 0.99972419 0.99927634     14503
TCPConnectScan  0.99312377 0.99777942 0.99544615      4053
      UDPFlood  0.99970458 0.99940933 0.99955693     91421
       UDPScan  0.99842915 0.99530222 0.99686324      3193

      accuracy                      0.99923513    243178
     macro avg  0.99812884 0.99892670 0.99852634    243178
  weighted avg  0.99923573 0.99923513 0.99923523    243178



In [13]:
import joblib

joblib.dump(gbm_model, path + 'models/softvoting_multi.pkl')

['/content/drive/MyDrive/models/softvoting_multi.pkl']