In [1]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report, f1_score
from catboost import CatBoostClassifier

# 데이터 로드
mon = pd.read_csv('mon.csv')

# 특징(X)와 타겟(y) 분리
X = mon.drop(columns=['Label'])
y = mon['Label']

# 타겟 데이터 변환 (문자열 -> 숫자)
le = LabelEncoder()
y = le.fit_transform(y)

# 데이터를 훈련 세트와 테스트 세트로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# CatBoost 하이퍼파라미터 튜닝
param_grid = {
    'iterations': [200, 300],  # 트리 개수
    'learning_rate': [0.1],  # 학습률
    'depth': [6,8]  # 트리 깊이
}

# 모델 초기화
model = CatBoostClassifier(
    random_state=42,
    eval_metric='MultiClass',  # 다중 클래스 분류 메트릭
    verbose=0
)

# GridSearchCV 초기화
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='f1_weighted',  # F1-Weighted 스코어
    cv=3,
    verbose=1,
    n_jobs=-1
)

# 학습 및 하이퍼파라미터 튜닝
grid_search.fit(X_train, y_train)

# 최적의 하이퍼파라미터 출력
print("Best parameters:", grid_search.best_params_)

# 최적 모델로 예측
best_model = grid_search.best_estimator_
cat_pred = best_model.predict(X_test)

# 성능 평가
f1 = f1_score(y_test, cat_pred, average='weighted')
print("Tuned CatBoost Accuracy:", accuracy_score(y_test, cat_pred))
print("Tuned CatBoost F1 Score (weighted):", f1)
print("\nClassification Report (Tuned CatBoost):\n", classification_report(y_test, cat_pred))


Fitting 3 folds for each of 4 candidates, totalling 12 fits


