In [2]:
pip install xgboost

Collecting xgboost
  Obtaining dependency information for xgboost from https://files.pythonhosted.org/packages/fe/df/e3a1f3f008db8d2b199ded2168014c7784b8027714b74d802c892815fd72/xgboost-2.1.2-py3-none-win_amd64.whl.metadata
  Downloading xgboost-2.1.2-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.2-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
    --------------------------------------- 1.8/124.9 MB 39.0 MB/s eta 0:00:04
   - -------------------------------------- 4.5/124.9 MB 48.6 MB/s eta 0:00:03
   -- ------------------------------------- 7.3/124.9 MB 58.1 MB/s eta 0:00:03
   -- ------------------------------------- 8.4/124.9 MB 53.7 MB/s eta 0:00:03
   -- ------------------------------------- 8.4/124.9 MB 53.7 MB/s eta 0:00:03
   -- ------------------------------------- 8.4/124.9 MB 53.7 MB/s eta 0:00:03
   -- ------------------------------------- 8.4/124.9 MB 53.7 MB/s eta 0:00:03
   --- ----------------

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import pickle
import xgboost as xgb

def hex_to_log(x):
    try:
        if isinstance(x, str):  
            return np.log1p(float(int(x, 16)))  
        else:
            return 0  
    except ValueError:
        return 0  

file_path = './datasets/1029_labeled_re.csv'
data = pd.read_csv(file_path)

print(data.dtypes)

Time                 float64
Source                 int64
Destination            int64
Protocol              object
Length                 int64
ID                    object
Data                  object
Same Data              int64
Strange Data           int64
Entropy              float64
Dos Attack            object
Fuzzing Attack        object
Replaying Attack      object
label                  int64
IAT                  float64
IAT_Anomaly            int64
Message_Frequency      int64
Frequency_Anomaly      int64
dtype: object


In [4]:
data['ID'] = data['ID'].apply(hex_to_log)
data['Data'] = data['Data'].apply(hex_to_log)

features = ['Time', 'Length', 'ID', 'Data', 'Same Data', 'Entropy','IAT_Anomaly','Frequency_Anomaly']  
X = data[features]
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

train_dmatrix = xgb.DMatrix(data=X_train, label=y_train)
test_dmatrix = xgb.DMatrix(data=X_test, label=y_test)

params = {
    'objective': 'multi:softmax',  
    'num_class': len(y.unique()),  
    'max_depth': 6,
    'eta': 0.1,  
    'seed': 42
}

xgb_model = xgb.train(
    params=params,
    dtrain=train_dmatrix,
    num_boost_round=100
)

y_pred = xgb_model.predict(test_dmatrix)

print(classification_report(y_test, y_pred))

model_filename = 'xgb_model.pkl'

with open(model_filename, 'wb') as file:
    pickle.dump(xgb_model, file)
print(f"Model saved to {model_filename}")

with open(model_filename, 'rb') as file:
    loaded_model = pickle.load(file)
print("Model loaded successfully")

loaded_y_pred = loaded_model.predict(test_dmatrix)
print("Loaded model prediction complete")

print(classification_report(y_test, loaded_y_pred))


              precision    recall  f1-score   support

           0       0.98      0.98      0.98     39906
           1       1.00      1.00      1.00      7059
           2       0.55      0.54      0.55      2065
           3       0.71      0.80      0.75       159

    accuracy                           0.96     49189
   macro avg       0.81      0.83      0.82     49189
weighted avg       0.96      0.96      0.96     49189

Model saved to xgb_model.pkl
Model loaded successfully
Loaded model prediction complete
              precision    recall  f1-score   support

           0       0.98      0.98      0.98     39906
           1       1.00      1.00      1.00      7059
           2       0.55      0.54      0.55      2065
           3       0.71      0.80      0.75       159

    accuracy                           0.96     49189
   macro avg       0.81      0.83      0.82     49189
weighted avg       0.96      0.96      0.96     49189

