In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report, f1_score
from catboost import CatBoostClassifier

# 데이터 로드
mon = pd.read_csv('../../../mon.csv')

# 특징(X)와 타겟(y) 분리
X = mon.drop(columns=['Label'])
y = mon['Label']

# 타겟 데이터 변환 (문자열 -> 숫자)
le = LabelEncoder()
y = le.fit_transform(y)

# 데이터를 훈련 세트와 테스트 세트로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# CatBoost 하이퍼파라미터 튜닝을 위한 파라미터 분포 설정
param_dist = {
    'iterations': [300, 500, 700],      # 트리 개수
    'learning_rate': [0.01, 0.05, 0.1], # 학습률
    'depth': [6, 8, 10, 12],            # 트리 깊이
    'l2_leaf_reg': [1, 3, 5, 7],        # L2 정규화 계수
    'border_count': [32, 64, 128],      # 이진 분할 개수
    'bagging_temperature': [0.1, 1, 5]  # 배깅 온도
}

# CatBoost 모델 초기화
model = CatBoostClassifier(
    random_state=42,
    eval_metric='MultiClass',          # 다중 클래스 분류 메트릭
    verbose=0                          # 학습 로그 비활성화
)

# RandomizedSearchCV 초기화
random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_dist,
    n_iter=50,                         # 랜덤 샘플링 횟수
    scoring='f1_weighted',             # F1-Weighted 점수 기준
    cv=3,                              # 교차 검증 fold 수
    verbose=2,                         # RandomizedSearchCV 진행 상황 출력
    n_jobs=-1,                         # 모든 CPU 코어 사용
    random_state=42
)

# RandomizedSearchCV 실행
print("Starting RandomizedSearchCV...")
random_search.fit(X_train, y_train)

# 최적 하이퍼파라미터 출력
print("Best parameters:", random_search.best_params_)

# 최적 모델로 테스트 세트 예측
best_model = random_search.best_estimator_
cat_pred = best_model.predict(X_test)

# 성능 평가
f1 = f1_score(y_test, cat_pred, average='weighted')
print("Tuned CatBoost Accuracy:", accuracy_score(y_test, cat_pred))
print("Tuned CatBoost F1 Score (weighted):", f1)
print("\nClassification Report (Tuned CatBoost):\n", classification_report(y_test, cat_pred))


Starting RandomizedSearchCV...
Fitting 3 folds for each of 50 candidates, totalling 150 fits


105 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
105 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\yzooz\pythonenv\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\yzooz\pythonenv\lib\site-packages\catboost\core.py", line 5245, in fit
    self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline, use_best_model,
  File "c:\Users\yzooz\pythonenv\lib\site-packages\catboost\core.py", line 2410, in _fit
    self._train(
  File "c:\Users\yzooz\pythonenv\lib\site-packages\catboost\core.py", li

Best parameters: {'learning_rate': 0.05, 'l2_leaf_reg': 1, 'iterations': 700, 'depth': 6, 'border_count': 128, 'bagging_temperature': 1}
Tuned CatBoost Accuracy: 0.75
Tuned CatBoost F1 Score (weighted): 0.7482224254243945

Classification Report (Tuned CatBoost):
               precision    recall  f1-score   support

           0       0.78      0.50      0.61        42
           1       0.83      0.69      0.75        42
           2       0.82      0.91      0.86        35
           3       0.65      0.83      0.73        29
           4       0.82      0.85      0.84        39
           5       0.92      0.78      0.84        45
           6       0.80      0.89      0.84        44
           7       0.70      0.72      0.71        36
           8       0.88      0.62      0.72        34
           9       0.63      0.77      0.70        31
          10       0.90      0.77      0.83        47
          11       0.74      0.74      0.74        35
          12       0.88      0.83