## 난임 환자 대상 임신 성공 여부 예측

### LGAimers 6th 온라인 해커톤

Import

In [1]:
import pandas as pd
import optuna
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import lightgbm as lgb

### Data Load

In [2]:
# 데이터 로드
Total_train = pd.read_csv('../data/Total_train_dataset_32.csv')
Total_test = pd.read_csv('../data/Total_test_dataset_32.csv')

In [3]:
# ID 열을 제외한 특성과 타겟 변수 분리
Total_X = Total_train.drop(['임신_성공_여부', 'ID'], axis=1)
Total_y = Total_train['임신_성공_여부']

### 인코딩 

In [4]:
Total_categorical_columns = [
    "시술_당시_나이",
    "배란_유도_유형",
    "난자_출처",
    "정자_출처",
    "난자_기증자_나이",
    "정자_기증자_나이"
]

In [5]:
# 모든 범주형 변수를 문자열로 변환
Total_X[Total_categorical_columns] = Total_X[Total_categorical_columns].astype(str)
Total_test[Total_categorical_columns] = Total_test[Total_categorical_columns].astype(str)

# OrdinalEncoder를 사용하여 범주형 변수 인코딩
Total_encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)

Total_X[Total_categorical_columns] = Total_encoder.fit_transform(Total_X[Total_categorical_columns])
Total_test[Total_categorical_columns] = Total_encoder.transform(Total_test[Total_categorical_columns])

## Modeling

In [6]:
# 데이터 분할
Total_X_train, Total_X_test, Total_y_train, Total_y_test = train_test_split(Total_X, Total_y, test_size=0.2, random_state=42)

### Total 데이터

optuna

In [None]:
# 목적 함수 정의
def objective(trial):
    param = {
        'iterations': trial.suggest_int('iterations', 800, 5000),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.2),
        'depth': trial.suggest_int('depth', 4, 13),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 5),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10, log=True),
        'random_strength': trial.suggest_float('random_strength', 0, 10),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.1, 10),
        'border_count': trial.suggest_int('border_count', 128, 300),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1, 10),
        'grow_policy': trial.suggest_categorical('grow_policy', ['SymmetricTree', 'Depthwise', 'Lossguide']),

        'random_state': 42,
        'n_jobs': -1,
        'metric': 'auc',
        'objective': 'binary',
        'logging_level': 'Silent',
        'boosting_type': 'Plain',
        'verbose': -1
    }

    model = lgb.LGBMClassifier(**param)
    model.fit(Total_X_train, Total_y_train)
    
    y_pred_proba = model.predict_proba(Total_X_test)[:, 1]
    
    auc = roc_auc_score(Total_y_test, y_pred_proba)
    return auc

# Optuna 스터디 생성 및 최적화 실행
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=500)

# 최적의 하이퍼파라미터 출력
print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2025-02-16 13:47:54,423] A new study created in memory with name: no-name-af7f63da-6b14-4a7d-bf59-2f94dfb64e08
[W 2025-02-16 13:47:54,618] Trial 0 failed with parameters: {'iterations': 4162, 'learning_rate': 0.06816080591114411, 'depth': 7, 'min_data_in_leaf': 1, 'l2_leaf_reg': 3.8940613181237187, 'random_strength': 3.4378172408592755, 'bagging_temperature': 1.375186932064219, 'border_count': 165, 'scale_pos_weight': 2.0641045308465267, 'grow_policy': 'Lossguide'} because of the following error: LightGBMError('Unknown boosting type plain').
Traceback (most recent call last):
  File "c:\Users\juneh\AppData\Local\Programs\Python\Python37\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\juneh\AppData\Local\Temp\ipykernel_7560\3209558354.py", line 25, in objective
    model.fit(Total_X_train, Total_y_train)
  File "c:\Users\juneh\AppData\Local\Programs\Python\Python37\lib\site-packages\lightgbm\sklearn.py", line 12

LightGBMError: Unknown boosting type plain

.