### 1. combine `mon_features.pkl` & `unmon_features.pkl` into `features_df`

In [None]:
import pandas as pd
import numpy as np
import pickle

MON_FILE_PATH = '/content//mon_features.pkl'
UNMON_FILE_PATH = '/content/unmon_features.pkl'

LABEL_COLUMN = ['website_label', 'monitored_label']

FEATURES_VER3 = [
    'total_transmission_time', 'std_inter_packet_time',
    'avg_outgoing_burst_size', 'avg_incoming_burst_size',
    'num_outgoing_packets', 'incoming_packet_ratio',
    'outgoing_packet_ratio', 'cumul_packets_10pct',
    'cumul_packets_30pct', 'outgoing_order_skew',
    'incoming_order_skew', 'cumul_max', 'bigram_OO',
    'num_incoming_first_30', 'outgoing_first_30',
    'avg_incoming_order_first_30', 'avg_outgoing_order_first_30'
]

mon_features_df = pd.read_pickle(MON_FILE_PATH)
unmon_features_df = pd.read_pickle(UNMON_FILE_PATH)

features_df = pd.concat([mon_features_df, unmon_features_df], ignore_index=True)

X = features_df[FEATURES_VER3]
y = features_df[LABEL_COLUMN[0]]


# replace unmonitored label(-1) into 95
y = y.replace({-1: 95})

print(X)
print(y)

       total_transmission_time  std_inter_packet_time  \
0                        10.14               0.041168   
1                        10.16               0.163930   
2                        11.11               0.066661   
3                        13.36               0.047809   
4                        10.64               0.038760   
...                        ...                    ...   
28995                    32.09               0.163669   
28996                    38.62               0.114350   
28997                    34.93               1.331199   
28998                    11.84               0.083521   
28999                     9.62               0.026874   

       avg_outgoing_burst_size  avg_incoming_burst_size  num_outgoing_packets  \
0                     1.551282                16.666667                 121.0   
1                     1.702128                 9.319149                  80.0   
2                     1.552632                16.315789                 

### 2. XGBoost

#### (1) install

In [None]:
pip install xgboost



In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

#### (2) Split dataset

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

#### (3) XGBoost 모델 선언

In [None]:
print(np.unique(y))

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95]


In [None]:
model = xgb.XGBClassifier(
    n_estimators=300,
    learning_rate=0.1,
    max_depth=4,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='multi:softmax',
    num_class=len(np.unique(y)),
    eval_metric='mlogloss',
    random_state=42
)

#### (4) 모델 학습

In [None]:
model.fit(X_train, y_train)

#### (5) 예측

In [None]:
y_pred = model.predict(X_test)

#### (6) 평가

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9025862068965518

Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.72      0.76        40
           1       0.88      0.88      0.88        40
           2       0.94      0.85      0.89        40
           3       0.89      0.82      0.86        40
           4       0.94      0.82      0.88        40
           5       0.90      0.90      0.90        40
           6       0.90      0.88      0.89        40
           7       0.80      0.82      0.81        40
           8       0.92      0.82      0.87        40
           9       0.72      0.78      0.75        40
          10       0.91      0.78      0.84        40
          11       0.94      0.80      0.86        40
          12       0.93      0.93      0.93        40
          13       0.85      0.70      0.77        40
          14       0.92      0.82      0.87        40
          15       0.89      0.80      0.84        40
          16       0.90    