In [1]:
# 라이브러리 임포트
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from google.colab import drive
import matplotlib.pyplot as plt
import seaborn as sns

# 데이터 로드
drive.mount('/content/drive')
file_path = '/content/drive/MyDrive/monunmon.csv'
data = pd.read_csv(file_path)

# 데이터 확인
print("데이터 샘플:")
print(data.head())

# 특징(X)과 레이블(y) 분리
X = data.iloc[:, :-1]  # 특징 (feature)들
y = data.iloc[:, -1]   # 레이블 (0~94)

Mounted at /content/drive
데이터 샘플:
   Total Packets  Incoming Packets  Outgoing Packets  Incoming Ratio  \
0         1421.0             121.0            1300.0        0.085151   
1          518.0              80.0             438.0        0.154440   
2         1358.0             118.0            1240.0        0.086892   
3         1446.0             122.0            1324.0        0.084371   
4         1406.0             115.0            1291.0        0.081792   

   Outgoing Ratio  Outgoing Std  Outgoing Mean  Packets per Second  \
0        0.914849    515.483953     773.322314          140.138067   
1        0.845560    139.231951     226.162500           50.984252   
2        0.913108    472.735508     786.110169          122.232223   
3        0.915629    513.916038     820.139344          108.233533   
4        0.918208    503.993490     789.608696          132.142857   

   First 30 Incoming  First 30 Outgoing  Inter-arrival Mean  \
0                9.0               21.0          

In [2]:
!pip install catboost

import pandas as pd
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score


# 레이블을 1과 -1로 변환(monitored label:1/ unmonitored label:-1)
data['Label'] = data['Label'].apply(lambda x: 1 if x != -1 else -1)

# feature와 label 분리
X = data.drop('Label', axis=1)  # 'Label'을 제외한 모든 열이 feature
y = data['Label']  # 'Label' 열이 target

# 학습용 데이터와 테스트 데이터로 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


**튜닝을 하지 않은 기본 Cat Boost model**

In [5]:
# CatBoost 모델 초기화
model = CatBoostClassifier(iterations=1000, learning_rate=0.1, depth=6, cat_features=[], verbose=200)

# 모델 학습
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 정확도 평가
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

# F1 Score 계산
f1 = f1_score(y_test, y_pred)
print(f'F1 Score (unTuned Model): {f1:.4f}')

0:	learn: 0.6008554	total: 8.61ms	remaining: 8.6s
200:	learn: 0.2158412	total: 1.54s	remaining: 6.12s
400:	learn: 0.1690867	total: 4.45s	remaining: 6.64s
600:	learn: 0.1393608	total: 7.04s	remaining: 4.67s
800:	learn: 0.1152722	total: 8.56s	remaining: 2.13s
999:	learn: 0.0985620	total: 10.1s	remaining: 0us
Accuracy: 0.9161
F1 Score (unTuned Model): 0.9532


**튜닝을 진행한 Cat Boost model**

---



In [4]:
# CatBoost 모델 초기화 (튜닝된 하이퍼파라미터)
model = CatBoostClassifier(
    iterations=1500,        # 트리의 개수 (조정 가능)
    learning_rate=0.05,     # 학습률 (작은 값 사용)
    depth=8,                # 트리의 깊이 (조정 가능)
    l2_leaf_reg=3,          # L2 정규화 (과적합 방지)
    subsample=0.8,          # 샘플링 비율 (데이터의 80% 사용)
    random_state=42,        # 랜덤 시드 (재현 가능성)
    verbose=200             # 훈련 중 출력 간격 (조정 가능)
)

# 모델 학습
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 정확도 평가
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

# F1 Score 계산
f1 = f1_score(y_test, y_pred)
print(f'F1 Score (Tuned Model): {f1:.4f}')

0:	learn: 0.6508651	total: 43.6ms	remaining: 1m 5s
200:	learn: 0.2283729	total: 4.02s	remaining: 26s
400:	learn: 0.1801749	total: 7.09s	remaining: 19.4s
600:	learn: 0.1494321	total: 10.2s	remaining: 15.2s
800:	learn: 0.1257416	total: 14.9s	remaining: 13s
1000:	learn: 0.1067319	total: 18.6s	remaining: 9.26s
1200:	learn: 0.0924108	total: 21.7s	remaining: 5.4s
1400:	learn: 0.0799239	total: 24.8s	remaining: 1.75s
1499:	learn: 0.0754903	total: 26.7s	remaining: 0us
Accuracy: 0.9168
F1 Score (Tuned Model): 0.9535
