### 1. mon_feature.pkl upload

In [None]:
import pandas as pd
import numpy as np
import pickle

FILE_PATH = '/content/mon_features.pkl'
LABEL_COLUMN = ['website_label', 'monitored_label']

FEATURES_VER2 = [
    'total_transmission_time', 'std_inter_packet_time',
    'num_outgoing_packets', 'avg_incoming_burst_size',
    'cumul_packets_30pct', 'cumul_packets_10pct',
    'incoming_order_skew', 'outgoing_order_skew',
    'cumul_max', 'bigram_OO', 'avg_outgoing_order_first_30',
    'num_incoming_first_30', 'incoming_packet_ratio'
]

features_df = pd.read_pickle(FILE_PATH)
X = features_df[FEATURES_VER2]
y = features_df[LABEL_COLUMN[0]]

print(X)
print(y)

       total_transmission_time  std_inter_packet_time  num_outgoing_packets  \
0                        10.14               0.041168                 121.0   
1                        10.16               0.163930                  80.0   
2                        11.11               0.066661                 118.0   
3                        13.36               0.047809                 122.0   
4                        10.64               0.038760                 115.0   
...                        ...                    ...                   ...   
18995                    43.91               0.143962                 619.0   
18996                    15.60               0.019465                 552.0   
18997                    14.93               0.016411                 579.0   
18998                    19.91               0.033281                 690.0   
18999                    13.76               0.011074                 757.0   

       avg_incoming_burst_size  cumul_packets_30pct

### 2. LightGBM

#### (1) install

In [None]:
pip install lightgbm



In [None]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

#### (2) Split dataset

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

#### (3) LightGBM 모델 정의

In [None]:
model = lgb.LGBMClassifier(
    objective="multiclass",
    num_class=len(np.unique(y)),
    n_estimators=500,
    learning_rate=0.03,
    num_leaves=40,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

#### (4) 모델 학습

In [None]:
model.fit(X_train, y_train)

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m


#### (5) 예측

In [None]:
y_pred = model.predict(X_test)

#### (6) 평가

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:", classification_report(y_test, y_pred))

Accuracy: 0.8757894736842106

Classification Report:               precision    recall  f1-score   support

           0       0.74      0.78      0.76        40
           1       0.93      0.93      0.93        40
           2       0.97      0.93      0.95        40
           3       0.97      0.90      0.94        40
           4       0.90      0.88      0.89        40
           5       0.86      0.90      0.88        40
           6       0.84      0.90      0.87        40
           7       0.82      0.78      0.79        40
           8       1.00      0.82      0.90        40
           9       0.79      0.85      0.82        40
          10       0.89      0.82      0.86        40
          11       0.97      0.90      0.94        40
          12       0.92      0.90      0.91        40
          13       0.82      0.68      0.74        40
          14       0.89      0.78      0.83        40
          15       0.82      0.93      0.87        40
          16       0.87     