In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install numpy==1.26.4 pandas==2.2.2 scikit-learn==1.3.2 catboost==1.2.7 xgboost==2.1.4 lightgbm imbalanced-learn

Collecting scikit-learn==1.3.2
  Downloading scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting catboost==1.2.7
  Downloading catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m63.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-learn, catboost
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.6.1
    Uninstalling scikit-learn-1.6.1:
      Successfully uninstalled scikit-learn-1.6.1
Successfully installed catboost-1.2.7 scikit-learn-1.3.2


In [3]:

import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier, ExtraTreesClassifier, RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from imblearn.under_sampling import RandomUnderSampler
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import RandomizedSearchCV
import scipy.stats as stats
from sklearn.preprocessing import RobustScaler

#  문자열 → 숫자 int 로 변환하는 함수
def convert_count_str(val):
    if pd.isna(val):
        return 0
    val = str(val).strip()
    if "회 이상" in val:
        return 6
    m = re.search(r'(\d+)회?', val)
    if m:
        return int(m.group(1))
    return 0

# 정자 난자 기증자 나이 맵핑
donor_age_mapping = {
    '만20세 이하': 3, '만21-25세': 5, '만26-30세': 4, '만31-35세': 2,
    '만36-40세': 1, '만41-45세': 0, '알 수 없음': 0
}

def convert_donor_age(val):
    if pd.isna(val):
        return np.nan
    return donor_age_mapping.get(str(val).strip(), np.nan)

# 카테고리형 변수들의 NaN을 문자열 'NaN'으로 변환하는 함수
def convert_nan_to_string(df, category_columns):
    df_copy = df.copy()
    for col in category_columns:
        df_copy[col] = df_copy[col].fillna('NaN')
    return df_copy

#  1. 데이터 로드 및 전처리
train = pd.read_csv('train.csv').drop(columns=['ID'])
test = pd.read_csv('test.csv').drop(columns=['ID'])

# 가중치 데이터 로드 (인코딩 문제 해결)
weight_data = pd.read_csv('weight_v2.csv', encoding='euc-kr')
weight_dict = weight_data.set_index("데이터 항목").to_dict()

# '시술 당시 나이' 결측치 여부 추가
train['시술 당시 나이_missing'] = train['시술 당시 나이'].apply(lambda x: 1.0 if str(x).strip() == '알 수 없음' else 0.0)
test['시술 당시 나이_missing'] = test['시술 당시 나이'].apply(lambda x: 1.0 if str(x).strip() == '알 수 없음' else 0.0)

# '시술 당시 나이' 변환 (나이가 낮을수록 높은 숫자 부여)
age_mapping = {
    '만18-34세': 5, '만35-37세': 4, '만38-39세': 3, '만40-42세': 2, '만43-44세': 1, '만45-50세': 0, '알 수 없음': np.nan
}
train['시술 당시 나이'] = train['시술 당시 나이'].apply(lambda x: float(age_mapping.get(str(x).strip(), 0)))
test['시술 당시 나이'] = test['시술 당시 나이'].apply(lambda x: float(age_mapping.get(str(x).strip(), 0)))

# 횟수 관련 컬럼 변환
count_columns = ["총 시술 횟수", "클리닉 내 총 시술 횟수", "IVF 시술 횟수", "DI 시술 횟수",
                 "총 임신 횟수", "IVF 임신 횟수", "DI 임신 횟수", "총 출산 횟수", "IVF 출산 횟수", "DI 출산 횟수"]
for col in count_columns:
    train[col] = train[col].astype(str).apply(convert_count_str).astype(int)
    test[col] = test[col].astype(str).apply(convert_count_str).astype(int)

# 난자/정자 기증자 나이 변환
train['난자 기증자 나이'] = train['난자 기증자 나이'].astype(str).apply(convert_donor_age)
test['난자 기증자 나이'] = test['난자 기증자 나이'].astype(str).apply(convert_donor_age)
train['정자 기증자 나이'] = train['정자 기증자 나이'].astype(str).apply(convert_donor_age)
test['정자 기증자 나이'] = test['정자 기증자 나이'].astype(str).apply(convert_donor_age)

#  2. 가중치 적용 함수
def apply_feature_weights(X, weight_dict):
    X_weighted = X.copy()
    for column in X.columns:
        if column in weight_dict["IVF"]:
            X_weighted[column] *= weight_dict["IVF"][column]  # IVF 가중치 적용
    return X_weighted

# 카테고리형 변수들의 인덱스 찾기
def get_categorical_feature_indices(df):
    cat_features = []
    for idx, (column, dtype) in enumerate(df.dtypes.items()):
        if dtype == 'category':
            cat_features.append(idx)
    return cat_features

# Feature 가중치 적용
X = train.drop('임신 성공 여부', axis=1)
y = train['임신 성공 여부']

X_weighted = apply_feature_weights(X, weight_dict)
X_test_weighted = apply_feature_weights(test, weight_dict)

#  3. 데이터 불균형 처리 (임신 성공 여부 기준)
undersample = RandomUnderSampler(sampling_strategy=0.5, random_state=42)
X_resampled, y_resampled = undersample.fit_resample(X_weighted, y)

#  4. 데이터 타입 변환 (카테고리형 변수 처리)
category_columns = [
    "시술 시기 코드", "시술 유형", "특정 시술 유형", "배란 유도 유형",
    "배아 생성 주요 이유", "난자 출처", "정자 출처"
]
X_resampled = convert_nan_to_string(X_resampled, category_columns)
X_test_weighted = convert_nan_to_string(X_test_weighted, category_columns)
for col in category_columns:
    X_resampled[col] = X_resampled[col].astype("category")
    X_test_weighted[col] = X_test_weighted[col].astype("category")

#  5. 모델 학습용 데이터 분할
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled,
                                                  test_size=0.2, random_state=42, stratify=y_resampled)

# 모델 파라미터 수정
stack_clf = StackingClassifier(
    estimators=[
        ('xgb', XGBClassifier(
            tree_method='gpu_hist',
            enable_categorical=True,
            random_state=42
        )),
        ('lgbm', LGBMClassifier(
            n_jobs=-2,
            random_state=42,
            verbose=-1
        )),
        ('cat', CatBoostClassifier(
            task_type='CPU',
            verbose=0,
            cat_features=get_categorical_feature_indices(X_resampled)
        ))
    ],
    final_estimator=Pipeline([
        ('scaler', RobustScaler()),
        ('lr', LogisticRegression(
            max_iter=1000,
            class_weight='balanced',
            solver='liblinear'
        ))
    ]),
    cv=3,  # CV 폴드 축소
    n_jobs=-2
)

# 🔥 추가 파라미터 반영
param_dist = {
    # XGBoost 파라미터
    'xgb__n_estimators': [300, 400, 500],
    'xgb__max_depth': [4, 5, 6],
    'xgb__learning_rate': [0.025, 0.05, 0.1],

    # LightGBM 파라미터 (learning_rate 추가)
    'lgbm__n_estimators': [300, 400, 500],
    'lgbm__num_leaves': [31, 63],
    'lgbm__min_child_samples': [20, 50],
    'lgbm__learning_rate': [0.025, 0.05, 0.1],  # 추가됨

    # CatBoost 파라미터 추가
    'cat__iterations': [100, 300, 500],
    'cat__depth': [4, 5, 6],
    'cat__learning_rate': [0.025, 0.05, 0.1],

    # 최종 분류기 (LogisticRegression) 파라미터
    'final_estimator__lr__C': [0.1, 1.0, 5.0, 10.0]
}

random_search = RandomizedSearchCV(
    stack_clf,
    param_distributions=param_dist,
    n_iter=30,  # 후보 수 30개로 제한
    scoring='roc_auc',
    cv=3,  # CV 폴드 축소
    n_jobs=1,
    random_state=42,
    verbose=2
)
random_search.fit(X_train, y_train)

# 최적 모델로 검증 데이터 평가
y_val_pred = random_search.best_estimator_.predict_proba(X_val)[:, 1]
roc_auc = roc_auc_score(y_val, y_val_pred)
print(f"Validation ROC AUC: {roc_auc:.5f}")

# 최적 모델로 테스트 데이터 예측 및 제출 파일 생성
pred_proba = random_search.best_estimator_.predict_proba(X_test_weighted)[:, 1]
submission = pd.DataFrame({'ID': [f"TEST_{i:05d}" for i in range(len(test))],
                           'probability': pred_proba})
submission.to_csv('baseline_v7_submit.csv', index=False)
print("제출 파일 생성 완료")

# Validation ROC AUC: 0.74275


Fitting 3 folds for each of 30 candidates, totalling 90 fits



    E.g. tree_method = "hist", device = "cuda"

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




[CV] END cat__depth=4, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=0.1, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=4, xgb__n_estimators=300; total time=  57.1s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=0.1, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=4, xgb__n_estimators=300; total time=  52.7s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=0.1, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=4, xgb__n_estimators=300; total time= 1.1min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=50, lgbm__n_estimators=300, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=5, xgb__n_estimators=500; total time=  24.6s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=50, lgbm__n_estimators=300, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=5, xgb__n_estimators=500; total time=  27.0s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=50, lgbm__n_estimators=300, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=5, xgb__n_estimators=500; total time=  28.4s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=5.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=500; total time= 2.6min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=5.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=500; total time= 2.7min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=5.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=500; total time= 2.8min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=10.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=5, xgb__n_estimators=500; total time=  42.9s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=10.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=5, xgb__n_estimators=500; total time=  43.1s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=10.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=5, xgb__n_estimators=500; total time=  38.1s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=300, cat__learning_rate=0.1, final_estimator__lr__C=0.1, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=5, xgb__n_estimators=400; total time= 1.7min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=300, cat__learning_rate=0.1, final_estimator__lr__C=0.1, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=5, xgb__n_estimators=400; total time= 1.5min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=300, cat__learning_rate=0.1, final_estimator__lr__C=0.1, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=5, xgb__n_estimators=400; total time= 2.0min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.025, xgb__max_depth=4, xgb__n_estimators=400; total time=  33.2s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.025, xgb__max_depth=4, xgb__n_estimators=400; total time=  32.9s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.025, xgb__max_depth=4, xgb__n_estimators=400; total time=  36.6s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.025, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.0min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.025, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.0min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.025, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.3min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=6, xgb__n_estimators=300; total time=  38.1s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=6, xgb__n_estimators=300; total time=  39.5s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=6, xgb__n_estimators=300; total time=  45.4s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.0min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.3min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.3min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=100, cat__learning_rate=0.1, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=400; total time=  40.1s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=100, cat__learning_rate=0.1, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=400; total time=  39.7s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=100, cat__learning_rate=0.1, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=400; total time=  44.4s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.025, final_estimator__lr__C=1.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=50, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=6, xgb__n_estimators=500; total time= 3.1min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.025, final_estimator__lr__C=1.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=50, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=6, xgb__n_estimators=500; total time= 2.8min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.025, final_estimator__lr__C=1.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=50, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=6, xgb__n_estimators=500; total time= 2.2min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=0.1, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=300; total time= 1.1min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=0.1, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=300; total time= 1.2min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=0.1, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=300; total time= 1.1min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=5, xgb__n_estimators=500; total time=  31.1s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=5, xgb__n_estimators=500; total time=  27.8s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=5, xgb__n_estimators=500; total time=  31.1s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=1.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.0min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=1.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 1.8min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=1.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.2min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=500; total time= 2.1min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=500; total time= 2.0min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=500; total time= 2.3min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=400; total time=  28.0s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=400; total time=  25.5s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=400; total time=  31.3s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.025, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=4, xgb__n_estimators=500; total time= 2.4min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.025, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=4, xgb__n_estimators=500; total time= 2.1min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.025, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=4, xgb__n_estimators=500; total time= 1.9min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.0min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.0min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=0.1, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.1min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=6, xgb__n_estimators=500; total time= 2.1min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=6, xgb__n_estimators=500; total time= 2.0min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=5.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.05, xgb__max_depth=6, xgb__n_estimators=500; total time= 2.1min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=1.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=4, xgb__n_estimators=400; total time= 2.0min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=1.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=4, xgb__n_estimators=400; total time= 1.9min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=1.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=4, xgb__n_estimators=400; total time= 2.1min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=400; total time=  24.1s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=400; total time=  21.6s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=100, cat__learning_rate=0.05, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=400; total time=  21.8s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=6, xgb__n_estimators=500; total time= 2.3min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=6, xgb__n_estimators=500; total time= 2.4min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=6, xgb__n_estimators=500; total time= 2.6min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=10.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.3min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=10.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.5min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=500, cat__learning_rate=0.1, final_estimator__lr__C=10.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=300; total time= 2.2min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.025, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=6, xgb__n_estimators=400; total time= 1.6min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.025, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=6, xgb__n_estimators=400; total time= 1.4min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.025, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=500, lgbm__num_leaves=63, xgb__learning_rate=0.1, xgb__max_depth=6, xgb__n_estimators=400; total time= 1.5min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=10.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=5, xgb__n_estimators=400; total time= 1.2min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=10.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=5, xgb__n_estimators=400; total time= 1.3min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=5, cat__iterations=300, cat__learning_rate=0.05, final_estimator__lr__C=10.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=50, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=5, xgb__n_estimators=400; total time= 1.4min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.025, final_estimator__lr__C=10.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=300; total time= 1.5min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.025, final_estimator__lr__C=10.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=300; total time= 1.4min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.025, final_estimator__lr__C=10.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=63, xgb__learning_rate=0.05, xgb__max_depth=5, xgb__n_estimators=300; total time= 1.3min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=50, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=400; total time= 1.8min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=50, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=400; total time= 1.6min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=500, cat__learning_rate=0.05, final_estimator__lr__C=1.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=50, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.1, xgb__max_depth=4, xgb__n_estimators=400; total time= 1.7min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.025, xgb__max_depth=5, xgb__n_estimators=400; total time=  27.5s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.025, xgb__max_depth=5, xgb__n_estimators=400; total time=  26.4s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=100, cat__learning_rate=0.025, final_estimator__lr__C=5.0, lgbm__learning_rate=0.025, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=31, xgb__learning_rate=0.025, xgb__max_depth=5, xgb__n_estimators=400; total time=  25.4s



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.1, final_estimator__lr__C=10.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=4, xgb__n_estimators=400; total time= 1.4min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.1, final_estimator__lr__C=10.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=4, xgb__n_estimators=400; total time= 1.4min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=6, cat__iterations=300, cat__learning_rate=0.1, final_estimator__lr__C=10.0, lgbm__learning_rate=0.1, lgbm__min_child_samples=20, lgbm__n_estimators=300, lgbm__num_leaves=63, xgb__learning_rate=0.025, xgb__max_depth=4, xgb__n_estimators=400; total time= 1.4min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=500, cat__learning_rate=0.025, final_estimator__lr__C=5.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=31, xgb__learning_rate=0.025, xgb__max_depth=6, xgb__n_estimators=500; total time= 1.8min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=500, cat__learning_rate=0.025, final_estimator__lr__C=5.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=31, xgb__learning_rate=0.025, xgb__max_depth=6, xgb__n_estimators=500; total time= 1.6min



    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=4, cat__iterations=500, cat__learning_rate=0.025, final_estimator__lr__C=5.0, lgbm__learning_rate=0.05, lgbm__min_child_samples=20, lgbm__n_estimators=400, lgbm__num_leaves=31, xgb__learning_rate=0.025, xgb__max_depth=6, xgb__n_estimators=500; total time= 1.6min



    E.g. tree_method = "hist", device = "cuda"



Validation ROC AUC: 0.74246
제출 파일 생성 완료


In [None]:
# pandas >= 1.3.0
# numpy >= 1.20.0
# matplotlib >= 3.4.0
# seaborn >= 0.11.0
# scikit-learn >= 1.0.0
# imbalanced-learn >= 0.8.0
# catboost >= 1.0.0
# lightgbm >= 3.3.0
# scipy >= 1.7.0
# xgboost >= 1.7.0

import pandas as pd
import numpy as np
import gc
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler, PowerTransformer
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from imblearn.under_sampling import RandomUnderSampler
from catboost import CatBoostClassifier, Pool
import lightgbm as lgb
from lightgbm import LGBMClassifier
import xgboost as xgb
from xgboost import XGBClassifier
from scipy.optimize import minimize
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler

# 데이터 불러오기 및 전처리
def preprocess_data(train_path, test_path):
    print("Loading data...")
    train = pd.read_csv(train_path)
    test = pd.read_csv(test_path)
    sample_submission = pd.read_csv(test_path.replace('test.csv', 'sample_submission.csv'))
    
    # ID 컬럼 제거
    if 'ID' in train.columns:
        train.drop(columns=['ID'], inplace=True)
    if 'ID' in test.columns:
        test.drop(columns=['ID'], inplace=True)

    y = train['임신 성공 여부']
    X = train.drop(columns=['임신 성공 여부'])
    X_test = test.copy()

    # 결측치 처리
    for col in X.columns:
        if X[col].dtype == 'object':
            X[col] = X[col].fillna('Unknown')
            X_test[col] = X_test[col].fillna('Unknown')
        else:
            X[col] = X[col].fillna(X[col].median())  # mean 대신 median 사용
            X_test[col] = X_test[col].fillna(X_test[col].median())

    # 범주형/수치형 변수 구분
    categorical_features = X.select_dtypes(include=['object']).columns.tolist()
    numerical_features = X.select_dtypes(exclude=['object']).columns.tolist()

    # 범주형 변수 인코딩
    ordinal_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
    X[categorical_features] = ordinal_encoder.fit_transform(X[categorical_features])
    X_test[categorical_features] = ordinal_encoder.transform(X_test[categorical_features])

    # 수치형 변수 스케일링
    scaler = MinMaxScaler()
    X[numerical_features] = scaler.fit_transform(X[numerical_features])
    X_test[numerical_features] = scaler.transform(X_test[numerical_features])

    # 이상치 처리 (IQR 방법)
    for col in numerical_features:
        Q1 = X[col].quantile(0.25)
        Q3 = X[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        X[col] = np.clip(X[col], lower_bound, upper_bound)
        X_test[col] = np.clip(X_test[col], lower_bound, upper_bound)

    # 파워 변환으로 분포 개선
    power = PowerTransformer(method='yeo-johnson')
    X[numerical_features] = power.fit_transform(X[numerical_features])
    X_test[numerical_features] = power.transform(X_test[numerical_features])
    
    # 상호작용 특성 생성
    interact_features = []
    for i in range(min(10, len(numerical_features))):
        for j in range(i+1, min(11, len(numerical_features))):
            feat1, feat2 = numerical_features[i], numerical_features[j]
            feat_name = f'{feat1}_{feat2}_interact'
            interact_features.append(feat_name)
            X[feat_name] = X[feat1] * X[feat2]
            X_test[feat_name] = X_test[feat1] * X_test[feat2]
    
    # 데이터 타입 변환
    X[numerical_features + interact_features] = X[numerical_features + interact_features].astype(float)
    X_test[numerical_features + interact_features] = X_test[numerical_features + interact_features].astype(float)
    X[categorical_features] = X[categorical_features].astype(int)
    X_test[categorical_features] = X_test[categorical_features].astype(int)
    
    return X, y, X_test, sample_submission, categorical_features

# CatBoost 최적 파라미터
def get_cat_params():
    return {
        "iterations": 2000,      # 반복 횟수 증가
        "learning_rate": 0.01,   # 학습률 조정
        "depth": 10,             # 트리 깊이 증가
        "l2_leaf_reg": 7,        # L2 정규화 강화
        "border_count": 255,     # 경계 카운트 증가
        "subsample": 0.85,       # 서브샘플링 비율 조정
        "random_strength": 0.7,  # 랜덤 강도 증가
        "bagging_temperature": 1.2,  # 배깅 온도 조정
        "od_type": "Iter",
        "od_wait": 75,           # 조기 종료 대기 라운드 증가
        "loss_function": "Logloss",
        "eval_metric": "AUC",
        "verbose": 100,
        "random_seed": 42,
        "class_weights": [1, 4]  # 클래스 불균형 가중치 조정
    }

# LightGBM 최적 파라미터
def get_lgb_params():
    return {
        "n_estimators": 2000,    # 트리 수 증가
        "learning_rate": 0.01,   # 학습률 조정
        "num_leaves": 127,       # 리프 노드 수 증가
        "max_depth": 14,         # 트리 깊이 증가
        "min_data_in_leaf": 15,  # 리프 노드 최소 데이터 수 조정
        "max_bin": 511,          # 최대 빈 수 증가
        "subsample": 0.85,       # 서브샘플링 비율 조정
        "subsample_freq": 1,
        "colsample_bytree": 0.85,# 열 샘플링 비율 조정
        "min_child_weight": 0.0001,  # 자식 노드 가중치 조정
        "reg_alpha": 7,          # L1 정규화 강화
        "reg_lambda": 15,        # L2 정규화 강화
        "objective": "binary",
        "metric": "auc",
        "boosting_type": "gbdt",
        "verbose": -1,
        "random_state": 42,
        "scale_pos_weight": 4.0  # 클래스 불균형 가중치 조정
    }

# XGBoost 최적 파라미터
def get_xgb_params():
    return {
        "n_estimators": 2000,    # 트리 수 증가
        "learning_rate": 0.01,   # 학습률 조정
        "max_depth": 12,         # 트리 깊이 증가
        "min_child_weight": 0.5, # 자식 노드 가중치 조정
        "subsample": 0.85,       # 서브샘플링 비율 조정
        "colsample_bytree": 0.85,# 열 샘플링 비율 조정
        "gamma": 0.2,            # 분할 페널티 증가
        "reg_alpha": 7,          # L1 정규화 강화
        "reg_lambda": 15,        # L2 정규화 강화
        "objective": "binary:logistic",
        "eval_metric": "auc",
        "tree_method": "gpu_hist",
        "random_state": 42,
        "scale_pos_weight": 4.0  # 클래스 불균형 가중치 조정
    }

# 가중치 최적화 함수
def optimize_weights(predictions, y_true):
    def objective(weights):
        weights = np.array(weights)
        weights = weights / np.sum(weights)  # 정규화
        weighted_pred = np.sum([w * p for w, p in zip(weights, predictions)], axis=0)
        return -roc_auc_score(y_true, weighted_pred)  # 최대화를 위해 음수 사용
    
    n_models = len(predictions)
    # 기본 가중치를 모델 성능 기반으로 초기화
    initial_aucs = [roc_auc_score(y_true, pred) for pred in predictions]
    initial_weights = np.array(initial_aucs) / sum(initial_aucs)
    
    bounds = [(0, 1) for _ in range(n_models)]
    constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1})
    
    try:
        result = minimize(objective, initial_weights, method='SLSQP', bounds=bounds, constraints=constraints)
        return result.x
    except Exception as e:
        print(f"최적화 실패, 기본 가중치 사용: {e}")
        return initial_weights

# 배치 예측 함수
def batch_predict(model, X, batch_size=10000):
    predictions = []
    for i in range(0, len(X), batch_size):
        batch_pred = model.predict_proba(X.iloc[i:i+batch_size])[:, 1]
        predictions.append(batch_pred)
    return np.concatenate(predictions)

# 모델 훈련 및 예측 함수
def train_and_predict():
    # 데이터 로드 및 전처리
    X, y, X_test, sample_submission, categorical_features = preprocess_data(
        'train.csv',
        'test.csv'
    )
    
    # 스태킹 분류기 파라미터 그리드
    param_dist = {
        # XGBoost 파라미터
        'xgb__n_estimators': [1500, 2000, 2500],
        'xgb__max_depth': [8, 10, 12, 14],
        'xgb__learning_rate': [0.005, 0.01, 0.02],
        'xgb__min_child_weight': [0.1, 0.5, 1],

        # LightGBM 파라미터
        'lgbm__n_estimators': [1500, 2000, 2500],
        'lgbm__num_leaves': [63, 127, 255],
        'lgbm__max_depth': [10, 12, 14, 16],
        'lgbm__learning_rate': [0.005, 0.01, 0.02],
        'lgbm__min_child_samples': [10, 15, 20],
        'lgbm__reg_alpha': [1, 5, 7],
        'lgbm__reg_lambda': [10, 15, 20],

        # CatBoost 파라미터
        'cat__iterations': [1500, 2000, 2500],
        'cat__depth': [8, 10, 12],
        'cat__learning_rate': [0.005, 0.01, 0.02],
        'cat__l2_leaf_reg': [3, 5, 7],
        'cat__random_strength': [0.5, 0.7, 1.0],

        # 최종 분류기 (LogisticRegression) 파라미터
        'final_estimator__lr__C': [0.01, 0.1, 1.0, 5.0, 10.0]
    }
    
    # 스태킹 분류기 설정
    stack_clf = StackingClassifier(
        estimators=[
            ('xgb', XGBClassifier(**get_xgb_params())),
            ('lgbm', LGBMClassifier(**get_lgb_params())),
            ('cat', CatBoostClassifier(
                **get_cat_params(), 
                cat_features=categorical_features
            ))
        ],
        final_estimator=Pipeline([
            ('scaler', RobustScaler()),
            ('lr', LogisticRegression(
                max_iter=1000,
                class_weight='balanced',
                solver='liblinear'
            ))
        ]),
        cv=5,  # 폴드 수 증가
        n_jobs=1
    )
    
    # 교차 검증 설정
    from sklearn.model_selection import RandomizedSearchCV
    random_search = RandomizedSearchCV(
        stack_clf,
        param_distributions=param_dist,
        n_iter=50,  # 탐색 후보 수 증가
        scoring='roc_auc',
        cv=5,  # 폴드 수 증가
        n_jobs=1,
        random_state=42,
        verbose=2
    )
    
    # 데이터 분할
    n_splits = 5
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    # 예측 결과 저장 (3개 모델)
    oof_preds_cat = np.zeros(len(X))
    oof_preds_lgb = np.zeros(len(X))
    oof_preds_xgb = np.zeros(len(X))
    test_preds_cat = np.zeros(len(X_test))
    test_preds_lgb = np.zeros(len(X_test))
    test_preds_xgb = np.zeros(len(X_test))


    # 각 폴드별 최고 성능 모델 저장
    best_models = {'cat': None, 'lgb': None, 'xgb': None}
    best_score = 0
    
    # K-Fold 훈련
    for fold_idx, (tr_idx, val_idx) in enumerate(skf.split(X, y), 1):
        print(f"\n==== Fold {fold_idx}/{n_splits} ====")
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx], y.iloc[val_idx]
        
        try:
            # RandomUnderSampler 사용
            rus = RandomUnderSampler(sampling_strategy=0.5, random_state=42)  
            X_tr_res, y_tr_res = rus.fit_resample(X_tr, y_tr)
            
            # 모델 학습
            random_search.fit(X_tr_res, y_tr_res)
            
            # 최적 모델 선택
            best_stacked_model = random_search.best_estimator_
            
            # 개별 모델 추출
            cat_model = best_stacked_model.estimators_[2]
            lgb_model = best_stacked_model.estimators_[1]
            xgb_model = best_stacked_model.estimators_[0]
            
            # 개별 모델 검증 세트 예측
            cat_val_pred = cat_model.predict_proba(X_val)[:, 1]
            lgb_val_pred = lgb_model.predict_proba(X_val)[:, 1]
            xgb_val_pred = xgb_model.predict_proba(X_val)[:, 1]
            
            # OOF 예측 저장
            oof_preds_cat[val_idx] = cat_val_pred
            oof_preds_lgb[val_idx] = lgb_val_pred
            oof_preds_xgb[val_idx] = xgb_val_pred
            
            # 테스트 세트 예측
            test_preds_cat += batch_predict(cat_model, X_test) / n_splits
            test_preds_lgb += batch_predict(lgb_model, X_test) / n_splits
            test_preds_xgb += batch_predict(xgb_model, X_test) / n_splits
            
            # 개별 모델 성능 확인
            cat_auc = roc_auc_score(y_val, cat_val_pred)
            lgb_auc = roc_auc_score(y_val, lgb_val_pred)
            xgb_auc = roc_auc_score(y_val, xgb_val_pred)
            print(f"CatBoost Fold {fold_idx} AUC: {cat_auc:.6f}")
            print(f"LightGBM Fold {fold_idx} AUC: {lgb_auc:.6f}")
            print(f"XGBoost Fold {fold_idx} AUC: {xgb_auc:.6f}")
            
            # 가중치 최적화로 앙상블
            weights = optimize_weights(
                [cat_val_pred, lgb_val_pred, xgb_val_pred], 
                y_val
            )
            weighted_val_pred = (
                weights[0] * cat_val_pred + 
                weights[1] * lgb_val_pred + 
                weights[2] * xgb_val_pred
            )
            ensemble_auc = roc_auc_score(y_val, weighted_val_pred)
            print(f"Ensemble Fold {fold_idx} AUC: {ensemble_auc:.6f} (weights: {weights})")
            
            # 최고 성능 모델 업데이트
            if ensemble_auc > best_score:
                best_score = ensemble_auc
                best_models['cat'] = cat_model
                best_models['lgb'] = lgb_model
                best_models['xgb'] = xgb_model
                print(f"New best model found! Score: {best_score:.6f}")
                
                # 모델 저장
                import joblib
                best_models['cat'].save_model(f'best_cat_model_fold{fold_idx}.cbm')
                joblib.dump(best_models['lgb'], f'best_lgb_model_fold{fold_idx}.bin')
                joblib.dump(best_models['xgb'], f'best_xgb_model_fold{fold_idx}.bin')
            
            # 모델 훈련 진행 확인을 위한 로깅 추가
            print(f"훈련 데이터 크기: {len(X_tr)} -> 리샘플링 후: {len(X_tr_res)}")
            print(f"훈련 데이터 클래스 분포: {np.bincount(y_tr)}")
            print(f"리샘플링 후 클래스 분포: {np.bincount(y_tr_res)}")
            
        except Exception as e:
            print(f"Error in fold {fold_idx}: {e}")
            continue
            
        # 메모리 정리
        gc.collect()
    
    # 전체 OOF 성능 평가
    cat_oof_auc = roc_auc_score(y, oof_preds_cat)
    lgb_oof_auc = roc_auc_score(y, oof_preds_lgb)
    xgb_oof_auc = roc_auc_score(y, oof_preds_xgb)
    print(f"\nCatBoost OOF AUC: {cat_oof_auc:.6f}")
    print(f"LightGBM OOF AUC: {lgb_oof_auc:.6f}")
    print(f"XGBoost OOF AUC: {xgb_oof_auc:.6f}")
    
    # 전체 데이터에 대한 최적 가중치 계산
    final_weights = optimize_weights(
        [oof_preds_cat, oof_preds_lgb, oof_preds_xgb], 
        y
    )
    oof_ensemble = (
        final_weights[0] * oof_preds_cat + 
        final_weights[1] * oof_preds_lgb + 
        final_weights[2] * oof_preds_xgb
    )
    ensemble_oof_auc = roc_auc_score(y, oof_ensemble)
    print(f"Final Ensemble OOF AUC: {ensemble_oof_auc:.6f}")
    print(f"Final weights: CatBoost={final_weights[0]:.4f}, LightGBM={final_weights[1]:.4f}, XGBoost={final_weights[2]:.4f}")
    
    # 테스트 데이터 최종 예측
    final_prediction = (
        final_weights[0] * test_preds_cat + 
        final_weights[1] * test_preds_lgb + 
        final_weights[2] * test_preds_xgb
    )
    
    # 특성 중요도 시각화 (LightGBM 기준)
    if best_models['lgb'] is not None:
        feature_importance = pd.DataFrame({
            'feature': X.columns,
            'importance': best_models['lgb'].feature_importances_
        }).sort_values('importance', ascending=False)
        
        plt.figure(figsize=(12, 8))
        sns.barplot(x='importance', y='feature', data=feature_importance.head(20))
        plt.title('LightGBM Feature Importance')
        plt.tight_layout()
        plt.savefig('feature_importance.png')
        print("Feature importance plot saved.")
    
    # 제출 파일 생성
    sample_submission['probability'] = final_prediction
    submission_path = "cat_lgb_xgb_ensemble.csv"
    sample_submission.to_csv(submission_path, index=False)
    print(f"\nSubmission saved: {submission_path}")
    print(f"Final Ensemble OOF AUC: {ensemble_oof_auc:.6f}")

# 메인 실행
if __name__ == "__main__":
    train_and_predict()

Loading data...


  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[feat1] * X[feat2]
  X_test[feat_name] = X_test[feat1] * X_test[feat2]
  X[feat_name] = X[fe


==== Fold 1/5 ====
Fitting 5 folds for each of 50 candidates, totalling 250 fits



    E.g. tree_method = "hist", device = "cuda"



0:	total: 485ms	remaining: 16m 9s
100:	total: 34.1s	remaining: 10m 41s
200:	total: 1m 9s	remaining: 10m 20s
300:	total: 1m 44s	remaining: 9m 49s
400:	total: 2m 22s	remaining: 9m 29s
500:	total: 2m 59s	remaining: 8m 57s
600:	total: 3m 36s	remaining: 8m 24s
700:	total: 4m 16s	remaining: 7m 54s
800:	total: 4m 54s	remaining: 7m 21s
900:	total: 5m 35s	remaining: 6m 49s
1000:	total: 6m 15s	remaining: 6m 15s
1100:	total: 6m 55s	remaining: 5m 39s
1200:	total: 7m 35s	remaining: 5m 2s
1300:	total: 8m 14s	remaining: 4m 25s
1400:	total: 8m 53s	remaining: 3m 47s
1500:	total: 9m 31s	remaining: 3m 9s
1600:	total: 10m 5s	remaining: 2m 30s
1700:	total: 10m 43s	remaining: 1m 53s
1800:	total: 11m 19s	remaining: 1m 15s
1900:	total: 11m 56s	remaining: 37.3s
1999:	total: 12m 34s	remaining: 0us



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



0:	total: 312ms	remaining: 10m 24s
100:	total: 38.5s	remaining: 12m 3s
200:	total: 1m 20s	remaining: 12m
300:	total: 2m 3s	remaining: 11m 35s
400:	total: 2m 45s	remaining: 11m 1s
500:	total: 3m 29s	remaining: 10m 28s
600:	total: 4m 17s	remaining: 9m 58s
700:	total: 5m 3s	remaining: 9m 23s
800:	total: 5m 53s	remaining: 8m 49s
900:	total: 6m 44s	remaining: 8m 12s
1000:	total: 7m 31s	remaining: 7m 30s
1100:	total: 8m 18s	remaining: 6m 47s
1200:	total: 9m 5s	remaining: 6m 2s
1300:	total: 9m 49s	remaining: 5m 16s
1400:	total: 10m 35s	remaining: 4m 31s
1500:	total: 11m 24s	remaining: 3m 47s
1600:	total: 12m 10s	remaining: 3m 2s
1700:	total: 12m 59s	remaining: 2m 17s
1800:	total: 13m 47s	remaining: 1m 31s
1900:	total: 14m 34s	remaining: 45.6s
1999:	total: 15m 22s	remaining: 0us
0:	total: 387ms	remaining: 12m 53s
100:	total: 39.9s	remaining: 12m 30s
200:	total: 1m 20s	remaining: 12m 4s
300:	total: 2m 4s	remaining: 11m 42s
400:	total: 2m 48s	remaining: 11m 13s
500:	total: 3m 35s	remaining: 10m 


    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=10, cat__iterations=2000, cat__l2_leaf_reg=7, cat__learning_rate=0.005, cat__random_strength=0.5, final_estimator__lr__C=10.0, lgbm__learning_rate=0.02, lgbm__max_depth=12, lgbm__min_child_samples=10, lgbm__n_estimators=1500, lgbm__num_leaves=63, lgbm__reg_alpha=1, lgbm__reg_lambda=10, xgb__learning_rate=0.01, xgb__max_depth=10, xgb__min_child_weight=0.1, xgb__n_estimators=2500; total time=94.7min



    E.g. tree_method = "hist", device = "cuda"



0:	total: 439ms	remaining: 14m 37s
100:	total: 41s	remaining: 12m 50s
200:	total: 1m 22s	remaining: 12m 17s
300:	total: 2m 6s	remaining: 11m 54s
400:	total: 2m 53s	remaining: 11m 32s
500:	total: 3m 41s	remaining: 11m 1s
600:	total: 4m 30s	remaining: 10m 30s
700:	total: 5m 23s	remaining: 9m 58s
800:	total: 6m 13s	remaining: 9m 19s
900:	total: 7m 5s	remaining: 8m 39s
1000:	total: 7m 58s	remaining: 7m 57s
1100:	total: 8m 50s	remaining: 7m 13s
1200:	total: 9m 42s	remaining: 6m 27s
1300:	total: 10m 34s	remaining: 5m 40s
1400:	total: 11m 24s	remaining: 4m 52s
1500:	total: 12m 16s	remaining: 4m 4s
1600:	total: 13m 5s	remaining: 3m 15s
1700:	total: 13m 54s	remaining: 2m 26s
1800:	total: 14m 43s	remaining: 1m 37s
1900:	total: 15m 35s	remaining: 48.7s
1999:	total: 16m 26s	remaining: 0us



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



0:	total: 447ms	remaining: 14m 54s
100:	total: 47.3s	remaining: 14m 48s
200:	total: 1m 34s	remaining: 14m 2s
300:	total: 2m 23s	remaining: 13m 27s
400:	total: 3m 16s	remaining: 13m 3s
500:	total: 4m 9s	remaining: 12m 27s
600:	total: 5m 5s	remaining: 11m 51s
700:	total: 6m 4s	remaining: 11m 15s
800:	total: 7m 2s	remaining: 10m 32s
900:	total: 8m 1s	remaining: 9m 46s
1000:	total: 9m	remaining: 8m 59s
1100:	total: 10m	remaining: 8m 10s
1200:	total: 10m 59s	remaining: 7m 18s
1300:	total: 11m 55s	remaining: 6m 24s
1400:	total: 12m 50s	remaining: 5m 29s
1500:	total: 13m 48s	remaining: 4m 35s
1600:	total: 14m 48s	remaining: 3m 41s
1700:	total: 15m 50s	remaining: 2m 47s
1800:	total: 16m 49s	remaining: 1m 51s
1900:	total: 17m 48s	remaining: 55.7s
1999:	total: 18m 46s	remaining: 0us
0:	total: 490ms	remaining: 16m 19s
100:	total: 49s	remaining: 15m 21s
200:	total: 1m 41s	remaining: 15m 5s
300:	total: 2m 35s	remaining: 14m 35s
400:	total: 3m 31s	remaining: 14m 2s
500:	total: 4m 29s	remaining: 13m 


    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=10, cat__iterations=2000, cat__l2_leaf_reg=7, cat__learning_rate=0.005, cat__random_strength=0.5, final_estimator__lr__C=10.0, lgbm__learning_rate=0.02, lgbm__max_depth=12, lgbm__min_child_samples=10, lgbm__n_estimators=1500, lgbm__num_leaves=63, lgbm__reg_alpha=1, lgbm__reg_lambda=10, xgb__learning_rate=0.01, xgb__max_depth=10, xgb__min_child_weight=0.1, xgb__n_estimators=2500; total time=119.1min



    E.g. tree_method = "hist", device = "cuda"



0:	total: 411ms	remaining: 13m 41s
100:	total: 41.9s	remaining: 13m 8s
200:	total: 1m 24s	remaining: 12m 35s
300:	total: 2m 8s	remaining: 12m 3s
400:	total: 2m 57s	remaining: 11m 45s
500:	total: 3m 46s	remaining: 11m 16s
600:	total: 4m 36s	remaining: 10m 42s
700:	total: 5m 27s	remaining: 10m 7s
800:	total: 6m 20s	remaining: 9m 29s
900:	total: 7m 15s	remaining: 8m 50s
1000:	total: 8m 7s	remaining: 8m 6s
1100:	total: 8m 58s	remaining: 7m 20s
1200:	total: 9m 51s	remaining: 6m 33s
1300:	total: 10m 43s	remaining: 5m 45s
1400:	total: 11m 34s	remaining: 4m 57s
1500:	total: 12m 27s	remaining: 4m 8s
1600:	total: 13m 18s	remaining: 3m 18s
1700:	total: 14m 9s	remaining: 2m 29s
1800:	total: 14m 59s	remaining: 1m 39s
1900:	total: 15m 50s	remaining: 49.5s
1999:	total: 16m 43s	remaining: 0us



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



0:	total: 502ms	remaining: 16m 44s
100:	total: 46.6s	remaining: 14m 35s
200:	total: 1m 36s	remaining: 14m 26s
300:	total: 2m 27s	remaining: 13m 51s
400:	total: 3m 21s	remaining: 13m 24s
500:	total: 4m 20s	remaining: 12m 59s
600:	total: 5m 22s	remaining: 12m 31s
700:	total: 6m 23s	remaining: 11m 50s
800:	total: 7m 24s	remaining: 11m 5s
900:	total: 8m 27s	remaining: 10m 19s
1000:	total: 9m 31s	remaining: 9m 30s
1100:	total: 10m 34s	remaining: 8m 38s
1200:	total: 11m 38s	remaining: 7m 44s
1300:	total: 12m 38s	remaining: 6m 47s
1400:	total: 13m 40s	remaining: 5m 50s
1500:	total: 14m 40s	remaining: 4m 52s
1600:	total: 15m 39s	remaining: 3m 54s
1700:	total: 16m 42s	remaining: 2m 56s
1800:	total: 17m 44s	remaining: 1m 57s
1900:	total: 18m 47s	remaining: 58.7s
1999:	total: 19m 53s	remaining: 0us
0:	total: 538ms	remaining: 17m 55s
100:	total: 53.4s	remaining: 16m 44s
200:	total: 1m 48s	remaining: 16m 9s
300:	total: 2m 45s	remaining: 15m 33s
400:	total: 3m 43s	remaining: 14m 51s
500:	total: 4m 4


    E.g. tree_method = "hist", device = "cuda"



[CV] END cat__depth=10, cat__iterations=2000, cat__l2_leaf_reg=7, cat__learning_rate=0.005, cat__random_strength=0.5, final_estimator__lr__C=10.0, lgbm__learning_rate=0.02, lgbm__max_depth=12, lgbm__min_child_samples=10, lgbm__n_estimators=1500, lgbm__num_leaves=63, lgbm__reg_alpha=1, lgbm__reg_lambda=10, xgb__learning_rate=0.01, xgb__max_depth=10, xgb__min_child_weight=0.1, xgb__n_estimators=2500; total time=336.0min



    E.g. tree_method = "hist", device = "cuda"



0:	total: 431ms	remaining: 14m 21s
100:	total: 45.7s	remaining: 14m 19s
200:	total: 1m 34s	remaining: 14m 7s
300:	total: 2m 24s	remaining: 13m 34s
400:	total: 3m 13s	remaining: 12m 52s
500:	total: 4m 3s	remaining: 12m 8s
600:	total: 4m 56s	remaining: 11m 30s
700:	total: 5m 49s	remaining: 10m 47s
800:	total: 6m 44s	remaining: 10m 4s
900:	total: 7m 38s	remaining: 9m 19s
1000:	total: 8m 34s	remaining: 8m 33s
1100:	total: 9m 27s	remaining: 7m 43s
1200:	total: 10m 22s	remaining: 6m 54s
1300:	total: 11m 14s	remaining: 6m 2s
1400:	total: 12m 4s	remaining: 5m 9s
1500:	total: 12m 58s	remaining: 4m 18s
1600:	total: 13m 52s	remaining: 3m 27s
1700:	total: 14m 44s	remaining: 2m 35s
1800:	total: 15m 36s	remaining: 1m 43s
1900:	total: 16m 29s	remaining: 51.5s
1999:	total: 17m 20s	remaining: 0us



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



0:	total: 548ms	remaining: 18m 15s
100:	total: 49.2s	remaining: 15m 25s
200:	total: 1m 38s	remaining: 14m 45s
300:	total: 2m 32s	remaining: 14m 20s
400:	total: 3m 31s	remaining: 14m 4s
500:	total: 4m 30s	remaining: 13m 30s
600:	total: 5m 31s	remaining: 12m 51s
700:	total: 6m 34s	remaining: 12m 10s
800:	total: 7m 38s	remaining: 11m 25s
900:	total: 8m 41s	remaining: 10m 36s
1000:	total: 9m 43s	remaining: 9m 42s
1100:	total: 10m 45s	remaining: 8m 47s
1200:	total: 11m 46s	remaining: 7m 50s
1300:	total: 12m 48s	remaining: 6m 52s
1400:	total: 13m 51s	remaining: 5m 55s
1500:	total: 14m 52s	remaining: 4m 56s
1600:	total: 15m 54s	remaining: 3m 57s
1700:	total: 17m	remaining: 2m 59s
1800:	total: 18m 2s	remaining: 1m 59s
1900:	total: 19m 3s	remaining: 59.5s
1999:	total: 20m 5s	remaining: 0us
0:	total: 512ms	remaining: 17m 4s
100:	total: 50.3s	remaining: 15m 45s
200:	total: 1m 42s	remaining: 15m 17s
300:	total: 2m 37s	remaining: 14m 51s
400:	total: 3m 35s	remaining: 14m 20s
500:	total: 4m 37s	rema