In [1]:
# Basic Libraries
import pandas as pd
import numpy as np
from scipy import stats

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler, RobustScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
from sklearn.utils import resample

# For data manipulation
import pandas as pd
import numpy as np

# For splitting the data
from sklearn.model_selection import train_test_split

# For resampling (handling class imbalance)
from imblearn.over_sampling import SMOTE, ADASYN, BorderlineSMOTE
from imblearn.combine import SMOTEENN, SMOTETomek

# For scaling numeric features
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

from sklearn.model_selection import StratifiedKFold, GridSearchCV
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier

# # Configurations (optional)
pd.options.display.max_columns = None

import warnings
warnings.filterwarnings('ignore')



In [24]:
def preprocess_data(train, test):
    """
    Preprocesses train and test datasets with streamlined transformations and appropriate resampling.

    Parameters:
        train (DataFrame): Training dataset.
        test (DataFrame): Testing dataset.

    Returns:
        train (DataFrame): Preprocessed training dataset.
        test (DataFrame): Preprocessed testing dataset.
    """
    # Step 1: Drop 'ID' and 'Loan_ID' as they are unique identifiers
    train.drop(['ID', 'Loan_ID'], axis=1, inplace=True)
    test.drop(['ID', 'Loan_ID'], axis=1, inplace=True)

    # Step 2: Replace '3+' with 3 in 'Dependents' and convert to float
    train['Dependents'] = train['Dependents'].replace('3+', 3).astype(float)
    test['Dependents'] = test['Dependents'].replace('3+', 3).astype(float)

    # Step 3: Handle missing values (if any)
    # Although your dataset does not have missing values, this is good practice
    numeric_cols = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Total_Income']
    for col in numeric_cols:
        train[col].fillna(train[col].median(), inplace=True)
        test[col].fillna(test[col].median(), inplace=True)

    categorical_cols = ['Credit_History', 'Self_Employed', 'Education', 'Gender', 'Married']
    for col in categorical_cols:
        train[col].fillna(train[col].mode()[0], inplace=True)
        test[col].fillna(test[col].mode()[0], inplace=True)

    # Ensure no NaN values remain in the dataset
    train.fillna(0, inplace=True)
    test.fillna(0, inplace=True)

    # Step 4: One-Hot Encode 'Property_Area'
    train = pd.get_dummies(train, columns=['Property_Area'], drop_first=True)
    test = pd.get_dummies(test, columns=['Property_Area'], drop_first=True)

    # Step 5: Add feature interactions with descriptive headers
    for df in [train, test]:
        # Basic interactions
        df['Loan_to_Income_Ratio'] = df['LoanAmount'] / (df['ApplicantIncome'] + df['CoapplicantIncome'] + 1e-6)
        df['Income_per_Dependent'] = df['Total_Income'] / (df['Dependents'] + 1)
        df['LoanAmount_per_Term'] = df['LoanAmount'] / (df['Loan_Amount_Term'] + 1e-6)
        df['EMI'] = df['LoanAmount'] / (df['Loan_Amount_Term'] + 1e-6)
        df['EMI_to_Income_Ratio'] = df['EMI'] / df['Total_Income']
        df['Debt_to_Income_Ratio'] = df['LoanAmount'] / (df['Total_Income'] + 1e-6)
        df['all_income'] = df['ApplicantIncome'] + df['CoapplicantIncome'] + df['Total_Income']
        df['loan_to_all_income_ratio'] = df['LoanAmount'] / df['all_income']
        df['all_Income_per_Dependent'] = df['all_income'] / (df['Dependents'] + 1)

        # Additional interactions
        df['Dependents_Credit_History_Interaction'] = df['Dependents'] * df['Credit_History']
        df['Income_LoanAmount_Interaction'] = df['Total_Income'] * df['LoanAmount']
        df['all_Income_LoanAmount_Interaction'] = df['all_income'] * df['LoanAmount']
        df['Credit_History_LoanAmount'] = df['Credit_History'] * df['LoanAmount']
        df['Credit_History_Income_Interaction'] = df['Credit_History'] * df['Total_Income']
        df['Credit_History_all_Income_Interaction'] = df['Credit_History'] * df['all_income']
        df['loan_to_income_ratio'] = df['LoanAmount'] / (df['Total_Income'] + 1e-6)
        df['income_per_year_emp'] = df['ApplicantIncome'] / (df['Loan_Amount_Term'] + 1e-6)
        df['coapplicant_income_per_year_emp'] = df['CoapplicantIncome'] / (df['Loan_Amount_Term'] + 1e-6)
        df['all_income_per_year_emp'] = df['all_income'] / (df['Loan_Amount_Term'] + 1e-6)
        df['Coapplicant_Income_Ratio'] = df['CoapplicantIncome'] / df['Total_Income']
        df['Applicant_Income_Ratio'] = df['ApplicantIncome'] / df['Total_Income']
        df['TotalIncome_LoanAmount_Ratio'] = df['Total_Income'] / (df['LoanAmount'] + 1e-6)
        df['all_Income_LoanAmount_Ratio'] = df['all_income'] / (df['LoanAmount'] + 1e-6)
        df['CreditHistory_LoanTerm_Interaction'] = df['Credit_History'] * df['Loan_Amount_Term']
        df['Education_LoanAmount_Interaction'] = df['Education'] * df['LoanAmount']
        df['SelfEmployed_Income_Interaction'] = df['Self_Employed'] * df['Total_Income']

        # Squared and log-transformed features
        df['ApplicantIncome_Squared'] = df['ApplicantIncome'] ** 2
        df['all_income_Squared'] = df['all_income'] ** 2
        df['LoanAmount_Squared'] = df['LoanAmount'] ** 2
        df['Log_LoanAmount_per_Term'] = np.log1p(df['LoanAmount_per_Term'])
        df['Log_ApplicantIncome'] = np.log1p(df['ApplicantIncome'])
        df['Log_CoapplicantIncome'] = np.log1p(df['CoapplicantIncome'])
        df['Log_LoanAmount'] = np.log1p(df['LoanAmount'])
        df['Log_Total_Income'] = np.log1p(df['Total_Income'])

        # Dependents and loan-based interactions
        df['Dependents_TotalIncome_Interaction'] = df['Dependents'] * df['Total_Income']
        df['Dependents_LoanAmount_Interaction'] = df['Dependents'] * df['LoanAmount']
        df['LoanAmount_to_Term_Ratio'] = df['LoanAmount'] / (df['Loan_Amount_Term'] + 1e-6)

        # Feature interactions with Gender and Married
        df['Gender_ApplicantIncome'] = df['Gender'] * df['ApplicantIncome']
        df['Married_CoapplicantIncome'] = df['Married'] * df['CoapplicantIncome']
        df['Married_Total_Income'] = df['Married'] * df['Total_Income']
        df['Gender_Married_Interaction'] = df['Gender'] * df['Married']

    return train, test

from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from imblearn.over_sampling import SMOTE, ADASYN, BorderlineSMOTE
from imblearn.combine import SMOTEENN, SMOTETomek

# Helper function for oversampling
def resample_minority_class(X, y):
    # Concatenate X and y for resampling
    data = pd.concat([X, y], axis=1)
    minority_class = data[y.name].value_counts().idxmin()  # Find the minority class
    minority_data = data[data[y.name] == minority_class]
    majority_data = data[data[y.name] != minority_class]

    # Oversample the minority class
    minority_upsampled = resample(minority_data,
                                  replace=True,  # Sample with replacement
                                  n_samples=len(majority_data),  # Match majority class size
                                  random_state=42)

    # Combine majority data with upsampled minority data
    upsampled_data = pd.concat([majority_data, minority_upsampled])

    return upsampled_data.drop(columns=[y.name]), upsampled_data[y.name]

# Helper function for undersampling
def resample_majority_class(X, y):
    # Concatenate X and y for resampling
    data = pd.concat([X, y], axis=1)
    minority_class = data[y.name].value_counts().idxmin()  # Find the minority class
    minority_data = data[data[y.name] == minority_class]
    majority_data = data[data[y.name] != minority_class]

    # Undersample the majority class
    majority_downsampled = resample(majority_data,
                                    replace=False,  # Sample without replacement
                                    n_samples=len(minority_data),  # Match minority class size
                                    random_state=42)

    # Combine minority data with downsampled majority data
    downsampled_data = pd.concat([minority_data, majority_downsampled])

    return downsampled_data.drop(columns=[y.name]), downsampled_data[y.name]

# Main function for resampling and scaling
def resample_split(train, test, resampling_method='SMOTE', scaling_option=None):
    """
    Splits the data into training and testing sets, handles class imbalance, and scales features appropriately.

    Parameters:
        train (DataFrame): Preprocessed training dataset.
        test (DataFrame): Preprocessed testing dataset.
        resampling_method (str): Resampling technique to handle class imbalance.
                                 Options: 'SMOTE', 'ADASYN', 'Borderline-SMOTE', 'SMOTEENN', 'SMOTETomek', 'oversample', 'undersample'.
        scaling_option (str): Scaling technique for continuous features.
                              Options: 'standard', 'minmax', 'robust', or None.

    Returns:
        X_train_res (array): Resampled training features.
        y_train_res (array): Resampled training labels.
        X_test (array): Scaled testing features.
        y_test (array): Original testing labels.
        test_scaled (array): Scaled test dataset features.
    """
    # Step 1: Create X and y
    X = train.drop('Loan_Status', axis=1)
    y = train['Loan_Status']
    test = test.copy()

    # Step 2: Align columns between train and test
    X, test = X.align(test, join='left', axis=1, fill_value=0)

    # Step 3: Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1, stratify=y)

    # Step 4: Handle class imbalance
    if resampling_method == 'SMOTE':
        resampler = SMOTE(random_state=42)
        X_train_res, y_train_res = resampler.fit_resample(X_train, y_train)
    elif resampling_method == 'ADASYN':
        resampler = ADASYN(random_state=42)
        X_train_res, y_train_res = resampler.fit_resample(X_train, y_train)
    elif resampling_method == 'Borderline-SMOTE':
        resampler = BorderlineSMOTE(random_state=42)
        X_train_res, y_train_res = resampler.fit_resample(X_train, y_train)
    elif resampling_method == 'SMOTEENN':
        resampler = SMOTEENN(random_state=42)
        X_train_res, y_train_res = resampler.fit_resample(X_train, y_train)
    elif resampling_method == 'SMOTETomek':
        resampler = SMOTETomek(random_state=42)
        X_train_res, y_train_res = resampler.fit_resample(X_train, y_train)
    elif resampling_method == 'oversample':
        # Oversample the minority class
        X_train_res, y_train_res = resample_minority_class(X_train, y_train)
    elif resampling_method == 'undersample':
        # Undersample the majority class
        X_train_res, y_train_res = resample_majority_class(X_train, y_train)
    else:
        raise ValueError(f"Invalid resampling method: {resampling_method}")

    # Step 5: Scale numeric features (optional, based on model requirements)
    if scaling_option:
        # List of binary columns (do not scale these)
        binary_cols = ['Gender', 'Married', 'Education', 'Self_Employed', 'Credit_History']
        # Include 'Property_Area' dummies
        binary_cols += [col for col in X_train_res.columns if 'Property_Area_' in col]

        # Columns to scale
        cols_to_scale = [col for col in X_train_res.columns if col not in binary_cols]

        # Apply scaler
        scaler = None
        if scaling_option == 'standard':
            scaler = StandardScaler()
        elif scaling_option == 'minmax':
            scaler = MinMaxScaler()
        elif scaling_option == 'robust':
            scaler = RobustScaler()
        else:
            raise ValueError(f"Invalid scaling option: {scaling_option}")

        if scaler:
            # Ensure that X_train_res, X_test, test are DataFrames
            if not isinstance(X_train_res, pd.DataFrame):
                X_train_res = pd.DataFrame(X_train_res, columns=X.columns)
            if not isinstance(X_test, pd.DataFrame):
                X_test = pd.DataFrame(X_test, columns=X.columns)
            if not isinstance(test, pd.DataFrame):
                test = pd.DataFrame(test, columns=X.columns)

            # Apply scaling only to continuous features
            X_train_res[cols_to_scale] = scaler.fit_transform(X_train_res[cols_to_scale])
            X_test[cols_to_scale] = scaler.transform(X_test[cols_to_scale])
            test[cols_to_scale] = scaler.transform(test[cols_to_scale])

    # Convert back to arrays for modeling
    X_train_res = X_train_res.values
    X_test = X_test.values
    test_scaled = test.values

    return X_train_res, y_train_res, X_test, y_test, test_scaled


In [25]:
train = pd.read_csv('data/Train.csv')
test = pd.read_csv('data/Test.csv')

# Preprocess the data
# train, test = preprocess_data(train, test)


In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
import pandas as pd
import numpy as np

def preprocess_data_with_feature_selection(train, test, target_col='Loan_Status', method='model', k=20):
    """
    Preprocesses train and test datasets with streamlined transformations and feature selection.

    Parameters:
        train (DataFrame): Training dataset.
        test (DataFrame): Testing dataset.
        target_col (str): Name of the target column.
        method (str): Feature selection method - 'model', 'filter', or 'embedded'.
        k (int): Number of top features to select.

    Returns:
        train_selected (DataFrame): Preprocessed and selected training dataset.
        test_selected (DataFrame): Preprocessed and selected testing dataset.
    """
    # Separate target variable
    y_train = train[target_col]
    train = train.drop(target_col, axis=1)

    # Preprocess train and test (original preprocessing steps)
    train, test = preprocess_data(train, test)

    # Feature selection
    if method == 'filter':
        # Univariate feature selection using ANOVA F-test or mutual information
        selector = SelectKBest(score_func=f_classif, k=k)
        train_selected = selector.fit_transform(train, y_train)
        test_selected = selector.transform(test)
        
        # Get selected feature names
        selected_features = train.columns[selector.get_support()]
        train_selected = pd.DataFrame(train_selected, columns=selected_features)
        test_selected = pd.DataFrame(test_selected, columns=selected_features)

    elif method == 'model':
        # Use a RandomForest or LightGBM model to rank features
        model = RandomForestClassifier(random_state=42)
        model.fit(train, y_train)
        
        # Get feature importance
        feature_importances = pd.DataFrame({
            'feature': train.columns,
            'importance': model.feature_importances_
        }).sort_values(by='importance', ascending=False)

        # Select top k features
        selected_features = feature_importances.head(k)['feature']
        train_selected = train[selected_features]
        test_selected = test[selected_features]

    elif method == 'embedded':
        # Use L1-regularized model (e.g., Lasso or Logistic Regression)
        from sklearn.linear_model import LogisticRegression
        model = LogisticRegression(penalty='l1', solver='liblinear', random_state=42)
        model.fit(train, y_train)
        
        # Get non-zero coefficients
        selected_features = train.columns[model.coef_[0] != 0]
        train_selected = train[selected_features]
        test_selected = test[selected_features]

    else:
        raise ValueError(f"Invalid feature selection method: {method}")

    # Add target back to train
    train_selected[target_col] = y_train

    return train_selected, test_selected


In [27]:
# Example Usage
train_selected, test_selected = preprocess_data_with_feature_selection(
    train, test, target_col='Loan_Status', method='model', k=30
)

print("Selected Features (Train):", train_selected.columns)
print("Selected Features (Test):", test_selected.columns)


Selected Features (Train): Index(['all_Income_per_Dependent', 'all_income_per_year_emp',
       'Loan_to_Income_Ratio', 'income_per_year_emp',
       'all_Income_LoanAmount_Interaction', 'all_Income_LoanAmount_Ratio',
       'loan_to_all_income_ratio', 'all_income',
       'Credit_History_all_Income_Interaction', 'Applicant_Income_Ratio',
       'all_income_Squared', 'Gender_ApplicantIncome', 'ApplicantIncome',
       'ApplicantIncome_Squared', 'Loan_Amount_Term', 'Log_ApplicantIncome',
       'Log_LoanAmount_per_Term', 'LoanAmount_to_Term_Ratio',
       'LoanAmount_per_Term', 'EMI', 'CreditHistory_LoanTerm_Interaction',
       'EMI_to_Income_Ratio', 'Coapplicant_Income_Ratio',
       'coapplicant_income_per_year_emp', 'Log_CoapplicantIncome',
       'CoapplicantIncome', 'Income_LoanAmount_Interaction',
       'Married_CoapplicantIncome', 'loan_to_income_ratio',
       'Debt_to_Income_Ratio', 'Loan_Status'],
      dtype='object')
Selected Features (Test): Index(['all_Income_per_Depende

In [28]:
train_selected.head()

Unnamed: 0,all_Income_per_Dependent,all_income_per_year_emp,Loan_to_Income_Ratio,income_per_year_emp,all_Income_LoanAmount_Interaction,all_Income_LoanAmount_Ratio,loan_to_all_income_ratio,all_income,Credit_History_all_Income_Interaction,Applicant_Income_Ratio,all_income_Squared,Gender_ApplicantIncome,ApplicantIncome,ApplicantIncome_Squared,Loan_Amount_Term,Log_ApplicantIncome,Log_LoanAmount_per_Term,LoanAmount_to_Term_Ratio,LoanAmount_per_Term,EMI,CreditHistory_LoanTerm_Interaction,EMI_to_Income_Ratio,Coapplicant_Income_Ratio,coapplicant_income_per_year_emp,Log_CoapplicantIncome,CoapplicantIncome,Income_LoanAmount_Interaction,Married_CoapplicantIncome,loan_to_income_ratio,Debt_to_Income_Ratio,Loan_Status
0,14328.0,39.471074,0.002041,22.942149,243576.0,842.82348,0.001186,14328.0,14328.0,1.388,205291600.0,8328,8328,69355584,363,9.027499,0.045768,0.046832,0.046832,0.046832,363,8e-06,0.0,0.0,0.0,0.0,102000,0.0,0.002833,0.002833,1
1,10007.458782,27.047186,0.046913,0.405405,1881402.0,53.231163,0.018786,10007.458782,10007.458782,0.025,100149200.0,150,150,22500,370,5.01728,0.410856,0.508108,0.508108,0.508108,370,8.5e-05,0.64291,10.425564,8.258023,3857.458782,1128000,3857.458782,0.031333,0.031333,0
2,11303.472511,32.481243,0.003205,14.336207,192159.0,664.910109,0.001504,11303.472511,11303.472511,0.8315,127768500.0,0,4989,24890121,348,8.515191,0.047695,0.048851,0.048851,0.048851,348,8e-06,0.052412,0.903657,5.754072,314.472511,102000,0.0,0.002833,0.002833,0
3,3900.0,10.86351,1.546667,0.417827,904800.0,16.810345,0.059487,3900.0,3900.0,0.04,15210000.0,150,150,22500,359,5.01728,0.498494,0.64624,0.64624,0.64624,359,0.000172,0.0,0.0,0.0,0.0,870000,0.0,0.061867,0.061867,1
4,5904.5,31.744624,0.002109,21.663978,200753.0,694.647018,0.00144,11809.0,11809.0,2.149067,139452500.0,8059,8059,64947481,372,8.994669,0.044685,0.045699,0.045699,0.045699,372,1.2e-05,0.0,0.0,0.0,0.0,63750,0.0,0.004533,0.004533,1


In [29]:
test_selected.head()

Unnamed: 0,all_Income_per_Dependent,all_income_per_year_emp,Loan_to_Income_Ratio,income_per_year_emp,all_Income_LoanAmount_Interaction,all_Income_LoanAmount_Ratio,loan_to_all_income_ratio,all_income,Credit_History_all_Income_Interaction,Applicant_Income_Ratio,all_income_Squared,Gender_ApplicantIncome,ApplicantIncome,ApplicantIncome_Squared,Loan_Amount_Term,Log_ApplicantIncome,Log_LoanAmount_per_Term,LoanAmount_to_Term_Ratio,LoanAmount_per_Term,EMI,CreditHistory_LoanTerm_Interaction,EMI_to_Income_Ratio,Coapplicant_Income_Ratio,coapplicant_income_per_year_emp,Log_CoapplicantIncome,CoapplicantIncome,Income_LoanAmount_Interaction,Married_CoapplicantIncome,loan_to_income_ratio,Debt_to_Income_Ratio
0,22761.075952,61.350609,0.011216,42.830189,4279082.0,121.069552,0.00826,22761.075952,22761.075952,2.648333,518066600.0,15890,15890,252492100,371,9.673508,0.409947,0.506739,0.506739,0.506739,371,8.4e-05,0.145179,2.347914,6.770877,871.075952,1128000,871.075952,0.031333,0.031333
1,13478.718887,36.135975,0.002273,17.646113,229138.2,792.86577,0.001261,13478.718887,0.0,1.097,181675900.0,6582,6582,43322724,373,8.792246,0.044568,0.045576,0.045576,0.045576,0,8e-06,0.149453,2.404072,6.799857,896.718887,102000,896.718887,0.002833,0.002833
2,14441.900354,38.718231,0.002014,21.096515,245512.3,849.5235,0.001177,14441.900354,14441.900354,1.3115,208568500.0,0,7869,61921161,373,8.970813,0.044568,0.045576,0.045576,0.045576,373,8e-06,0.095483,1.535926,6.352456,572.900354,102000,0.0,0.002833,0.002833
3,6150.0,17.621776,1.646667,0.429799,1519050.0,24.898785,0.040163,6150.0,6150.0,0.025,37822500.0,150,150,22500,349,5.01728,0.535169,0.707736,0.707736,0.707736,349,0.000118,0.0,0.0,0.0,0.0,1482000,0.0,0.041167,0.041167
4,12112.0,1009.333249,0.002033,696.833275,205904.0,712.470546,0.001404,12112.0,12112.0,2.229867,146700500.0,8362,8362,69923044,12,9.031572,0.882389,1.416667,1.416667,1.416667,12,0.000378,0.0,0.0,0.0,0.0,63750,0.0,0.004533,0.004533


In [36]:
# Resample, split, and scale the data
X_train_res, y_train_res, X_test, y_test, test_scaled = resample_split(
    train_selected,
    test_selected,
    resampling_method='SMOTE',
    scaling_option='minmax'
)

In [37]:

from lightgbm import LGBMClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE

def grid_search_lightgbm_with_smote(X_train, y_train, X_test, y_test):
    # Define a narrowed parameter grid
    param_grid = {
        'lgbm__learning_rate': [0.02, 0.03229, 0.05],
        'lgbm__num_leaves': [20, 24, 28],
        'lgbm__max_depth': [12, 15, 18],
        'lgbm__min_data_in_leaf': [20, 25, 30],
        'lgbm__feature_fraction': [0.6, 0.6236, 0.65],
        'lgbm__bagging_fraction': [0.95, 0.9597, 0.97],
        'lgbm__bagging_freq': [2, 3, 4]
    }

    # Create a pipeline with SMOTE and LGBMClassifier
    pipeline = Pipeline([
        ('smote', SMOTE(random_state=42)),
        ('lgbm', LGBMClassifier(
            objective='binary',
            n_estimators=3000,
            metric='binary_logloss',
            boosting_type='gbdt',
            random_state=42,
            verbose=-1
        ))
    ])

    # Stratified K-Fold
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # Grid Search
    grid_search = GridSearchCV(
        estimator=pipeline,
        param_grid=param_grid,
        scoring='roc_auc',
        cv=skf,
        n_jobs=-1,
        verbose=1
    )

    # Fit the model using resampled training data
    grid_search.fit(X_train, y_train)

    print("Best Parameters:", grid_search.best_params_)
    print("Best ROC-AUC (Cross-Validation):", grid_search.best_score_)

    # Use the best model to predict on the test set
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict_proba(X_test)[:, 1]

    from sklearn.metrics import roc_auc_score
    roc_auc = roc_auc_score(y_test, y_pred)
    print("Test ROC-AUC:", roc_auc)

    return best_model


In [38]:
from xgboost import XGBClassifier

def grid_search_xgboost_with_smote(X_train, y_train, X_test, y_test):
    # Define a narrowed parameter grid
    param_grid = {
        'xgb__learning_rate': [0.02, 0.05, 0.1],
        'xgb__max_depth': [3, 5, 7],
        'xgb__n_estimators': [100, 200, 300],
        'xgb__subsample': [0.8, 0.9],
        'xgb__colsample_bytree': [0.8, 0.9]
    }

    # Create a pipeline with SMOTE and XGBClassifier
    pipeline = Pipeline([
        ('smote', SMOTE(random_state=42)),
        ('xgb', XGBClassifier(
            objective='binary:logistic',
            random_state=42,
            use_label_encoder=False,
            eval_metric='logloss'
        ))
    ])

    # Stratified K-Fold
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # Grid Search
    grid_search = GridSearchCV(
        estimator=pipeline,
        param_grid=param_grid,
        scoring='roc_auc',
        cv=skf,
        n_jobs=-1,
        verbose=1
    )

    # Fit the model using resampled training data
    grid_search.fit(X_train, y_train)

    print("Best Parameters:", grid_search.best_params_)
    print("Best ROC-AUC (Cross-Validation):", grid_search.best_score_)

    # Use the best model to predict on the test set
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict_proba(X_test)[:, 1]

    from sklearn.metrics import roc_auc_score
    roc_auc = roc_auc_score(y_test, y_pred)
    print("Test ROC-AUC:", roc_auc)

    return best_model


In [None]:
import optuna
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score

def objective_lightgbm(trial):
    # Suggest hyperparameters
    params = {
        'objective': 'binary',
        'metric': 'auc',
        'boosting_type': 'gbdt',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.5, 0.9),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.5, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
    }

    # Stratified K-Fold Cross-Validation
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = []

    for train_idx, val_idx in skf.split(X_train_res, y_train_res):
        X_train_fold, X_val_fold = X_train_res[train_idx], X_train_res[val_idx]
        y_train_fold, y_val_fold = y_train_res[train_idx], y_train_res[val_idx]

        model = LGBMClassifier(**params, random_state=42)
        model.fit(X_train_fold, y_train_fold, eval_set=[(X_val_fold, y_val_fold)], early_stopping_rounds=50, verbose=False)
        preds = model.predict_proba(X_val_fold)[:, 1]
        cv_scores.append(roc_auc_score(y_val_fold, preds))

    # Return the average ROC-AUC across folds
    return np.mean(cv_scores)

# Create Optuna study and optimize
study = optuna.create_study(direction='maximize')
study.optimize(objective_lightgbm, n_trials=50)

# Best parameters and score
print("Best Parameters (LightGBM):", study.best_params)
print("Best AUC (LightGBM):", study.best_value)

# Train final model with best parameters
best_params = study.best_params
final_model_lgb = LGBMClassifier(**best_params, random_state=42)
final_model_lgb.fit(X_train_res, y_train_res)


In [41]:
import xgboost as xgb

def objective_xgboost(trial):
    # Suggest hyperparameters
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
    }

    # Stratified K-Fold Cross-Validation
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = []

    for train_idx, val_idx in skf.split(X_train_res, y_train_res):
        X_train_fold, X_val_fold = X_train_res[train_idx], X_train_res[val_idx]
        y_train_fold, y_val_fold = y_train_res[train_idx], y_train_res[val_idx]

        model = xgb.XGBClassifier(**params, random_state=42, use_label_encoder=False)
        model.fit(X_train_fold, y_train_fold, eval_set=[(X_val_fold, y_val_fold)])
        preds = model.predict_proba(X_val_fold)[:, 1]
        cv_scores.append(roc_auc_score(y_val_fold, preds))

    # Return the average ROC-AUC across folds
    return np.mean(cv_scores)

# Create Optuna study and optimize
study = optuna.create_study(direction='maximize')
study.optimize(objective_xgboost, n_trials=50)

# Best parameters and score
print("Best Parameters (XGBoost):", study.best_params)
print("Best AUC (XGBoost):", study.best_value)

# Train final model with best parameters
best_params = study.best_params
final_model_xgb = xgb.XGBClassifier(**best_params, random_state=42, use_label_encoder=False)
final_model_xgb.fit(X_train_res, y_train_res)


[I 2024-11-23 21:07:23,434] A new study created in memory with name: no-name-de6427c1-8950-46af-b6d8-312a62046deb


[0]	validation_0-auc:0.69165
[1]	validation_0-auc:0.71084
[2]	validation_0-auc:0.75995
[3]	validation_0-auc:0.77126
[4]	validation_0-auc:0.77228
[5]	validation_0-auc:0.79120
[6]	validation_0-auc:0.80173
[7]	validation_0-auc:0.79525
[8]	validation_0-auc:0.80402
[9]	validation_0-auc:0.80751
[10]	validation_0-auc:0.81150
[11]	validation_0-auc:0.81316
[12]	validation_0-auc:0.81306
[13]	validation_0-auc:0.81795
[14]	validation_0-auc:0.81937
[15]	validation_0-auc:0.82310
[16]	validation_0-auc:0.82083
[17]	validation_0-auc:0.82232
[18]	validation_0-auc:0.82582
[19]	validation_0-auc:0.82679
[20]	validation_0-auc:0.82852
[21]	validation_0-auc:0.82948
[22]	validation_0-auc:0.82941
[23]	validation_0-auc:0.82879
[24]	validation_0-auc:0.82991
[25]	validation_0-auc:0.83318
[26]	validation_0-auc:0.83495
[27]	validation_0-auc:0.83694
[28]	validation_0-auc:0.83699
[29]	validation_0-auc:0.83841
[30]	validation_0-auc:0.83807
[31]	validation_0-auc:0.83790
[32]	validation_0-auc:0.84037
[33]	validation_0-au

[I 2024-11-23 21:07:28,023] Trial 0 finished with value: 0.9050182811609677 and parameters: {'n_estimators': 537, 'learning_rate': 0.08685763987080015, 'max_depth': 4, 'subsample': 0.6836808538335686, 'colsample_bytree': 0.6690301285794469, 'min_child_weight': 8}. Best is trial 0 with value: 0.9050182811609677.


[0]	validation_0-auc:0.73806
[1]	validation_0-auc:0.75497
[2]	validation_0-auc:0.77547
[3]	validation_0-auc:0.78352
[4]	validation_0-auc:0.80101
[5]	validation_0-auc:0.80726
[6]	validation_0-auc:0.81203
[7]	validation_0-auc:0.81502
[8]	validation_0-auc:0.81985
[9]	validation_0-auc:0.82296
[10]	validation_0-auc:0.82383
[11]	validation_0-auc:0.83271
[12]	validation_0-auc:0.83426
[13]	validation_0-auc:0.83552
[14]	validation_0-auc:0.83268
[15]	validation_0-auc:0.83666
[16]	validation_0-auc:0.84061
[17]	validation_0-auc:0.84306
[18]	validation_0-auc:0.84643
[19]	validation_0-auc:0.84813
[20]	validation_0-auc:0.84907
[21]	validation_0-auc:0.84897
[22]	validation_0-auc:0.85083
[23]	validation_0-auc:0.85112
[24]	validation_0-auc:0.85109
[25]	validation_0-auc:0.85400
[26]	validation_0-auc:0.85539
[27]	validation_0-auc:0.85672
[28]	validation_0-auc:0.85792
[29]	validation_0-auc:0.85796
[30]	validation_0-auc:0.85847
[31]	validation_0-auc:0.85912
[32]	validation_0-auc:0.85933
[33]	validation_0-au

[I 2024-11-23 21:07:31,525] Trial 1 finished with value: 0.9035921361278998 and parameters: {'n_estimators': 245, 'learning_rate': 0.048783940584981984, 'max_depth': 9, 'subsample': 0.5317308857240053, 'colsample_bytree': 0.6104669784000554, 'min_child_weight': 8}. Best is trial 0 with value: 0.9050182811609677.


[0]	validation_0-auc:0.73326
[1]	validation_0-auc:0.74744
[2]	validation_0-auc:0.76969
[3]	validation_0-auc:0.79142
[4]	validation_0-auc:0.81425
[5]	validation_0-auc:0.82522
[6]	validation_0-auc:0.82858
[7]	validation_0-auc:0.83290
[8]	validation_0-auc:0.83395
[9]	validation_0-auc:0.83561
[10]	validation_0-auc:0.83895
[11]	validation_0-auc:0.84230
[12]	validation_0-auc:0.84442
[13]	validation_0-auc:0.84358
[14]	validation_0-auc:0.84233
[15]	validation_0-auc:0.84632
[16]	validation_0-auc:0.84793
[17]	validation_0-auc:0.84697
[18]	validation_0-auc:0.84965
[19]	validation_0-auc:0.85132
[20]	validation_0-auc:0.85527
[21]	validation_0-auc:0.85777
[22]	validation_0-auc:0.86000
[23]	validation_0-auc:0.85998
[24]	validation_0-auc:0.86032
[25]	validation_0-auc:0.86152
[26]	validation_0-auc:0.86284
[27]	validation_0-auc:0.86273
[28]	validation_0-auc:0.86256
[29]	validation_0-auc:0.86171
[30]	validation_0-auc:0.86261
[31]	validation_0-auc:0.86306
[32]	validation_0-auc:0.86386
[33]	validation_0-au

[I 2024-11-23 21:07:38,947] Trial 2 finished with value: 0.9116765801074527 and parameters: {'n_estimators': 384, 'learning_rate': 0.01963309836929149, 'max_depth': 12, 'subsample': 0.6412226540042782, 'colsample_bytree': 0.5065234248436881, 'min_child_weight': 5}. Best is trial 2 with value: 0.9116765801074527.


[0]	validation_0-auc:0.77410
[1]	validation_0-auc:0.80340
[2]	validation_0-auc:0.81406
[3]	validation_0-auc:0.81589
[4]	validation_0-auc:0.82245
[5]	validation_0-auc:0.82097
[6]	validation_0-auc:0.82515
[7]	validation_0-auc:0.82701
[8]	validation_0-auc:0.82879
[9]	validation_0-auc:0.83381
[10]	validation_0-auc:0.83880
[11]	validation_0-auc:0.83906
[12]	validation_0-auc:0.83826
[13]	validation_0-auc:0.84030
[14]	validation_0-auc:0.84149
[15]	validation_0-auc:0.84256
[16]	validation_0-auc:0.84218
[17]	validation_0-auc:0.84328
[18]	validation_0-auc:0.84258
[19]	validation_0-auc:0.84144
[20]	validation_0-auc:0.84052
[21]	validation_0-auc:0.84001
[22]	validation_0-auc:0.84327
[23]	validation_0-auc:0.84364
[24]	validation_0-auc:0.84364
[25]	validation_0-auc:0.84291
[26]	validation_0-auc:0.84297
[27]	validation_0-auc:0.84158
[28]	validation_0-auc:0.84184
[29]	validation_0-auc:0.84139
[30]	validation_0-auc:0.84071
[31]	validation_0-auc:0.84141
[32]	validation_0-auc:0.84146
[33]	validation_0-au

[I 2024-11-23 21:07:52,507] Trial 3 finished with value: 0.8719774242550897 and parameters: {'n_estimators': 715, 'learning_rate': 0.002384653131071203, 'max_depth': 10, 'subsample': 0.83189271194212, 'colsample_bytree': 0.7065519432666105, 'min_child_weight': 8}. Best is trial 2 with value: 0.9116765801074527.


[0]	validation_0-auc:0.69432
[1]	validation_0-auc:0.70920
[2]	validation_0-auc:0.73720
[3]	validation_0-auc:0.75625
[4]	validation_0-auc:0.77017
[5]	validation_0-auc:0.77940
[6]	validation_0-auc:0.78472
[7]	validation_0-auc:0.78673
[8]	validation_0-auc:0.79629
[9]	validation_0-auc:0.80161
[10]	validation_0-auc:0.79979
[11]	validation_0-auc:0.80628
[12]	validation_0-auc:0.80623
[13]	validation_0-auc:0.80711
[14]	validation_0-auc:0.80445
[15]	validation_0-auc:0.80983
[16]	validation_0-auc:0.81031
[17]	validation_0-auc:0.81254
[18]	validation_0-auc:0.81559
[19]	validation_0-auc:0.81619
[20]	validation_0-auc:0.81739
[21]	validation_0-auc:0.81769
[22]	validation_0-auc:0.81882
[23]	validation_0-auc:0.81716
[24]	validation_0-auc:0.81787
[25]	validation_0-auc:0.81814
[26]	validation_0-auc:0.81723
[27]	validation_0-auc:0.81478
[28]	validation_0-auc:0.81493
[29]	validation_0-auc:0.81439
[30]	validation_0-auc:0.81520
[31]	validation_0-auc:0.81515
[32]	validation_0-auc:0.81738
[33]	validation_0-au

[I 2024-11-23 21:08:04,162] Trial 4 finished with value: 0.871279609938061 and parameters: {'n_estimators': 931, 'learning_rate': 0.0037086560225893634, 'max_depth': 6, 'subsample': 0.5949916384325777, 'colsample_bytree': 0.5921879005886703, 'min_child_weight': 4}. Best is trial 2 with value: 0.9116765801074527.


[0]	validation_0-auc:0.67630
[1]	validation_0-auc:0.68371
[2]	validation_0-auc:0.71916
[3]	validation_0-auc:0.72943
[4]	validation_0-auc:0.73463
[5]	validation_0-auc:0.73500
[6]	validation_0-auc:0.73942
[7]	validation_0-auc:0.73794
[8]	validation_0-auc:0.75073
[9]	validation_0-auc:0.75125
[10]	validation_0-auc:0.75231
[11]	validation_0-auc:0.75143
[12]	validation_0-auc:0.75286
[13]	validation_0-auc:0.75122
[14]	validation_0-auc:0.74947
[15]	validation_0-auc:0.75591
[16]	validation_0-auc:0.75687
[17]	validation_0-auc:0.75989
[18]	validation_0-auc:0.76131
[19]	validation_0-auc:0.76007
[20]	validation_0-auc:0.76730
[21]	validation_0-auc:0.76652
[22]	validation_0-auc:0.77011
[23]	validation_0-auc:0.76901
[24]	validation_0-auc:0.76710
[25]	validation_0-auc:0.76654
[26]	validation_0-auc:0.76897
[27]	validation_0-auc:0.76831
[28]	validation_0-auc:0.77076
[29]	validation_0-auc:0.76981
[30]	validation_0-auc:0.77140
[31]	validation_0-auc:0.77106
[32]	validation_0-auc:0.77375
[33]	validation_0-au

[I 2024-11-23 21:08:10,505] Trial 5 finished with value: 0.8355313921662134 and parameters: {'n_estimators': 703, 'learning_rate': 0.004220815929127002, 'max_depth': 4, 'subsample': 0.6593462283726139, 'colsample_bytree': 0.6637291829117675, 'min_child_weight': 3}. Best is trial 2 with value: 0.9116765801074527.


[0]	validation_0-auc:0.75494
[1]	validation_0-auc:0.77651
[2]	validation_0-auc:0.78131
[3]	validation_0-auc:0.78329
[4]	validation_0-auc:0.80197
[5]	validation_0-auc:0.79950
[6]	validation_0-auc:0.80034
[7]	validation_0-auc:0.80208
[8]	validation_0-auc:0.80739
[9]	validation_0-auc:0.81713
[10]	validation_0-auc:0.81957
[11]	validation_0-auc:0.82025
[12]	validation_0-auc:0.82056
[13]	validation_0-auc:0.82221
[14]	validation_0-auc:0.82146
[15]	validation_0-auc:0.81868
[16]	validation_0-auc:0.81721
[17]	validation_0-auc:0.81790
[18]	validation_0-auc:0.81741
[19]	validation_0-auc:0.81594
[20]	validation_0-auc:0.81643
[21]	validation_0-auc:0.81725
[22]	validation_0-auc:0.82135
[23]	validation_0-auc:0.82128
[24]	validation_0-auc:0.82247
[25]	validation_0-auc:0.82148
[26]	validation_0-auc:0.82035
[27]	validation_0-auc:0.82034
[28]	validation_0-auc:0.81921
[29]	validation_0-auc:0.82170
[30]	validation_0-auc:0.82167
[31]	validation_0-auc:0.82251
[32]	validation_0-auc:0.82217
[33]	validation_0-au

[I 2024-11-23 21:08:24,192] Trial 6 finished with value: 0.8467152898923553 and parameters: {'n_estimators': 819, 'learning_rate': 0.001001071154864689, 'max_depth': 8, 'subsample': 0.8711635646190207, 'colsample_bytree': 0.7776936031594488, 'min_child_weight': 7}. Best is trial 2 with value: 0.9116765801074527.


[0]	validation_0-auc:0.77518
[1]	validation_0-auc:0.79223
[2]	validation_0-auc:0.80941
[3]	validation_0-auc:0.81537
[4]	validation_0-auc:0.82968
[5]	validation_0-auc:0.83091
[6]	validation_0-auc:0.83422
[7]	validation_0-auc:0.83723
[8]	validation_0-auc:0.84306
[9]	validation_0-auc:0.84918
[10]	validation_0-auc:0.85208
[11]	validation_0-auc:0.85300
[12]	validation_0-auc:0.85620
[13]	validation_0-auc:0.85833
[14]	validation_0-auc:0.86039
[15]	validation_0-auc:0.86324
[16]	validation_0-auc:0.86565
[17]	validation_0-auc:0.86654
[18]	validation_0-auc:0.86923
[19]	validation_0-auc:0.86993
[20]	validation_0-auc:0.87314
[21]	validation_0-auc:0.87371
[22]	validation_0-auc:0.87474
[23]	validation_0-auc:0.87536
[24]	validation_0-auc:0.87630
[25]	validation_0-auc:0.87896
[26]	validation_0-auc:0.88056
[27]	validation_0-auc:0.88168
[28]	validation_0-auc:0.88301
[29]	validation_0-auc:0.88479
[30]	validation_0-auc:0.88549
[31]	validation_0-auc:0.88662
[32]	validation_0-auc:0.88836
[33]	validation_0-au

[I 2024-11-23 21:08:29,689] Trial 7 finished with value: 0.9198560703924242 and parameters: {'n_estimators': 256, 'learning_rate': 0.07366949616368347, 'max_depth': 15, 'subsample': 0.8633192288487099, 'colsample_bytree': 0.8149350581826964, 'min_child_weight': 8}. Best is trial 7 with value: 0.9198560703924242.


[0]	validation_0-auc:0.72301
[1]	validation_0-auc:0.73412
[2]	validation_0-auc:0.74283
[3]	validation_0-auc:0.74494
[4]	validation_0-auc:0.74668
[5]	validation_0-auc:0.74474
[6]	validation_0-auc:0.74756
[7]	validation_0-auc:0.75837
[8]	validation_0-auc:0.76925
[9]	validation_0-auc:0.76818
[10]	validation_0-auc:0.76954
[11]	validation_0-auc:0.76990
[12]	validation_0-auc:0.77196
[13]	validation_0-auc:0.77152
[14]	validation_0-auc:0.76933
[15]	validation_0-auc:0.76770
[16]	validation_0-auc:0.76925
[17]	validation_0-auc:0.77006
[18]	validation_0-auc:0.77258
[19]	validation_0-auc:0.77581
[20]	validation_0-auc:0.77575
[21]	validation_0-auc:0.77610
[22]	validation_0-auc:0.77515
[23]	validation_0-auc:0.77692
[24]	validation_0-auc:0.77696
[25]	validation_0-auc:0.77661
[26]	validation_0-auc:0.77655
[27]	validation_0-auc:0.77569
[28]	validation_0-auc:0.77598
[29]	validation_0-auc:0.77822
[30]	validation_0-auc:0.77948
[31]	validation_0-auc:0.77940
[32]	validation_0-auc:0.77958
[33]	validation_0-au

[I 2024-11-23 21:08:36,663] Trial 8 finished with value: 0.8332735890293785 and parameters: {'n_estimators': 625, 'learning_rate': 0.0027793905628844336, 'max_depth': 5, 'subsample': 0.6327738378955612, 'colsample_bytree': 0.9799305376259065, 'min_child_weight': 7}. Best is trial 7 with value: 0.9198560703924242.


[0]	validation_0-auc:0.74283
[1]	validation_0-auc:0.79124
[2]	validation_0-auc:0.79897
[3]	validation_0-auc:0.80570
[4]	validation_0-auc:0.81665
[5]	validation_0-auc:0.81350
[6]	validation_0-auc:0.81231
[7]	validation_0-auc:0.81185
[8]	validation_0-auc:0.82011
[9]	validation_0-auc:0.82415
[10]	validation_0-auc:0.82459
[11]	validation_0-auc:0.82320
[12]	validation_0-auc:0.82702
[13]	validation_0-auc:0.82780
[14]	validation_0-auc:0.82649
[15]	validation_0-auc:0.82386
[16]	validation_0-auc:0.82649
[17]	validation_0-auc:0.82467
[18]	validation_0-auc:0.82528
[19]	validation_0-auc:0.82707
[20]	validation_0-auc:0.82482
[21]	validation_0-auc:0.82583
[22]	validation_0-auc:0.82484
[23]	validation_0-auc:0.82484
[24]	validation_0-auc:0.82409
[25]	validation_0-auc:0.82333
[26]	validation_0-auc:0.82230
[27]	validation_0-auc:0.82098
[28]	validation_0-auc:0.82141
[29]	validation_0-auc:0.82127
[30]	validation_0-auc:0.82262
[31]	validation_0-auc:0.82441
[32]	validation_0-auc:0.82619
[33]	validation_0-au

[I 2024-11-23 21:08:45,616] Trial 9 finished with value: 0.8542056700350722 and parameters: {'n_estimators': 556, 'learning_rate': 0.0014617234415575321, 'max_depth': 7, 'subsample': 0.6796634570712836, 'colsample_bytree': 0.8450193567394881, 'min_child_weight': 1}. Best is trial 7 with value: 0.9198560703924242.


[0]	validation_0-auc:0.76932
[1]	validation_0-auc:0.78183
[2]	validation_0-auc:0.81164
[3]	validation_0-auc:0.81593
[4]	validation_0-auc:0.83034
[5]	validation_0-auc:0.82581
[6]	validation_0-auc:0.82818
[7]	validation_0-auc:0.83144
[8]	validation_0-auc:0.83576
[9]	validation_0-auc:0.84009
[10]	validation_0-auc:0.84337
[11]	validation_0-auc:0.84355
[12]	validation_0-auc:0.84273
[13]	validation_0-auc:0.84258
[14]	validation_0-auc:0.84245
[15]	validation_0-auc:0.84252
[16]	validation_0-auc:0.84143
[17]	validation_0-auc:0.84206
[18]	validation_0-auc:0.84256
[19]	validation_0-auc:0.84250
[20]	validation_0-auc:0.84309
[21]	validation_0-auc:0.84337
[22]	validation_0-auc:0.84539
[23]	validation_0-auc:0.84563
[24]	validation_0-auc:0.84535
[25]	validation_0-auc:0.84675
[26]	validation_0-auc:0.84772
[27]	validation_0-auc:0.84784
[28]	validation_0-auc:0.84815
[29]	validation_0-auc:0.84864
[30]	validation_0-auc:0.84891
[31]	validation_0-auc:0.84945
[32]	validation_0-auc:0.84853
[33]	validation_0-au

[I 2024-11-23 21:08:48,655] Trial 10 finished with value: 0.8803055304096731 and parameters: {'n_estimators': 124, 'learning_rate': 0.018918915987816618, 'max_depth': 15, 'subsample': 0.9637209610656716, 'colsample_bytree': 0.8868224102725641, 'min_child_weight': 10}. Best is trial 7 with value: 0.9198560703924242.


[0]	validation_0-auc:0.78788
[1]	validation_0-auc:0.79222
[2]	validation_0-auc:0.80524
[3]	validation_0-auc:0.82208
[4]	validation_0-auc:0.83950
[5]	validation_0-auc:0.84346
[6]	validation_0-auc:0.84521
[7]	validation_0-auc:0.85058
[8]	validation_0-auc:0.85274
[9]	validation_0-auc:0.85461
[10]	validation_0-auc:0.85249
[11]	validation_0-auc:0.85537
[12]	validation_0-auc:0.85688
[13]	validation_0-auc:0.85901
[14]	validation_0-auc:0.85947
[15]	validation_0-auc:0.86183
[16]	validation_0-auc:0.86345
[17]	validation_0-auc:0.86522
[18]	validation_0-auc:0.86686
[19]	validation_0-auc:0.86746
[20]	validation_0-auc:0.86956
[21]	validation_0-auc:0.87108
[22]	validation_0-auc:0.87240
[23]	validation_0-auc:0.87188
[24]	validation_0-auc:0.87292
[25]	validation_0-auc:0.87333
[26]	validation_0-auc:0.87369
[27]	validation_0-auc:0.87324
[28]	validation_0-auc:0.87368
[29]	validation_0-auc:0.87362
[30]	validation_0-auc:0.87475
[31]	validation_0-auc:0.87623
[32]	validation_0-auc:0.87703
[33]	validation_0-au

[I 2024-11-23 21:08:56,521] Trial 11 finished with value: 0.9155677468503918 and parameters: {'n_estimators': 351, 'learning_rate': 0.01957128241732568, 'max_depth': 14, 'subsample': 0.7941506344178287, 'colsample_bytree': 0.5346844621679946, 'min_child_weight': 5}. Best is trial 7 with value: 0.9198560703924242.


[0]	validation_0-auc:0.79137
[1]	validation_0-auc:0.82225
[2]	validation_0-auc:0.84046
[3]	validation_0-auc:0.84700
[4]	validation_0-auc:0.85396
[5]	validation_0-auc:0.84993
[6]	validation_0-auc:0.85092
[7]	validation_0-auc:0.85203
[8]	validation_0-auc:0.85502
[9]	validation_0-auc:0.85871
[10]	validation_0-auc:0.86175
[11]	validation_0-auc:0.86356
[12]	validation_0-auc:0.86412
[13]	validation_0-auc:0.86576
[14]	validation_0-auc:0.86512
[15]	validation_0-auc:0.86578
[16]	validation_0-auc:0.86817
[17]	validation_0-auc:0.86913
[18]	validation_0-auc:0.87013
[19]	validation_0-auc:0.86988
[20]	validation_0-auc:0.87123
[21]	validation_0-auc:0.87160
[22]	validation_0-auc:0.87270
[23]	validation_0-auc:0.87265
[24]	validation_0-auc:0.87252
[25]	validation_0-auc:0.87327
[26]	validation_0-auc:0.87452
[27]	validation_0-auc:0.87471
[28]	validation_0-auc:0.87560
[29]	validation_0-auc:0.87648
[30]	validation_0-auc:0.87815
[31]	validation_0-auc:0.87963
[32]	validation_0-auc:0.88090
[33]	validation_0-au

[I 2024-11-23 21:09:04,413] Trial 12 finished with value: 0.919649457633079 and parameters: {'n_estimators': 360, 'learning_rate': 0.032625021671207066, 'max_depth': 14, 'subsample': 0.7842973567147814, 'colsample_bytree': 0.8053198497300683, 'min_child_weight': 6}. Best is trial 7 with value: 0.9198560703924242.


[0]	validation_0-auc:0.76666
[1]	validation_0-auc:0.77451
[2]	validation_0-auc:0.80067
[3]	validation_0-auc:0.80204
[4]	validation_0-auc:0.81797
[5]	validation_0-auc:0.82057
[6]	validation_0-auc:0.82320
[7]	validation_0-auc:0.82595
[8]	validation_0-auc:0.83501
[9]	validation_0-auc:0.83982
[10]	validation_0-auc:0.84533
[11]	validation_0-auc:0.84740
[12]	validation_0-auc:0.84944
[13]	validation_0-auc:0.85169
[14]	validation_0-auc:0.85228
[15]	validation_0-auc:0.85397
[16]	validation_0-auc:0.85544
[17]	validation_0-auc:0.85667
[18]	validation_0-auc:0.85835
[19]	validation_0-auc:0.85759
[20]	validation_0-auc:0.86179
[21]	validation_0-auc:0.86272
[22]	validation_0-auc:0.86323
[23]	validation_0-auc:0.86506
[24]	validation_0-auc:0.86630
[25]	validation_0-auc:0.86698
[26]	validation_0-auc:0.86833
[27]	validation_0-auc:0.86921
[28]	validation_0-auc:0.87096
[29]	validation_0-auc:0.87177
[30]	validation_0-auc:0.87264
[31]	validation_0-auc:0.87416
[32]	validation_0-auc:0.87524
[33]	validation_0-au

[I 2024-11-23 21:09:12,088] Trial 13 finished with value: 0.9166030296840404 and parameters: {'n_estimators': 386, 'learning_rate': 0.048027819987105515, 'max_depth': 13, 'subsample': 0.919958036330907, 'colsample_bytree': 0.8069355211314311, 'min_child_weight': 10}. Best is trial 7 with value: 0.9198560703924242.


[0]	validation_0-auc:0.77919
[1]	validation_0-auc:0.81284
[2]	validation_0-auc:0.82763
[3]	validation_0-auc:0.83877
[4]	validation_0-auc:0.84825
[5]	validation_0-auc:0.85253
[6]	validation_0-auc:0.85573
[7]	validation_0-auc:0.85584
[8]	validation_0-auc:0.85968
[9]	validation_0-auc:0.86339
[10]	validation_0-auc:0.86395
[11]	validation_0-auc:0.86421
[12]	validation_0-auc:0.86451
[13]	validation_0-auc:0.86579
[14]	validation_0-auc:0.86781
[15]	validation_0-auc:0.87160
[16]	validation_0-auc:0.87502
[17]	validation_0-auc:0.87492
[18]	validation_0-auc:0.87533
[19]	validation_0-auc:0.87765
[20]	validation_0-auc:0.87961
[21]	validation_0-auc:0.88030
[22]	validation_0-auc:0.88114
[23]	validation_0-auc:0.88128
[24]	validation_0-auc:0.88108
[25]	validation_0-auc:0.88317
[26]	validation_0-auc:0.88479
[27]	validation_0-auc:0.88512
[28]	validation_0-auc:0.88663
[29]	validation_0-auc:0.88899
[30]	validation_0-auc:0.88886
[31]	validation_0-auc:0.89051
[32]	validation_0-auc:0.89129
[33]	validation_0-au

[I 2024-11-23 21:09:15,088] Trial 14 finished with value: 0.9166720406575248 and parameters: {'n_estimators': 156, 'learning_rate': 0.08005473481952677, 'max_depth': 11, 'subsample': 0.7661975630204956, 'colsample_bytree': 0.9189248558569405, 'min_child_weight': 6}. Best is trial 7 with value: 0.9198560703924242.


[0]	validation_0-auc:0.78862
[1]	validation_0-auc:0.81800
[2]	validation_0-auc:0.83244
[3]	validation_0-auc:0.83882
[4]	validation_0-auc:0.85439
[5]	validation_0-auc:0.85411
[6]	validation_0-auc:0.85447
[7]	validation_0-auc:0.85882
[8]	validation_0-auc:0.86785
[9]	validation_0-auc:0.87221
[10]	validation_0-auc:0.87389
[11]	validation_0-auc:0.87658
[12]	validation_0-auc:0.87828
[13]	validation_0-auc:0.87925
[14]	validation_0-auc:0.87948
[15]	validation_0-auc:0.88107
[16]	validation_0-auc:0.88330
[17]	validation_0-auc:0.88497
[18]	validation_0-auc:0.88560
[19]	validation_0-auc:0.88616
[20]	validation_0-auc:0.88582
[21]	validation_0-auc:0.88665
[22]	validation_0-auc:0.88880
[23]	validation_0-auc:0.89040
[24]	validation_0-auc:0.89053
[25]	validation_0-auc:0.89124
[26]	validation_0-auc:0.89191
[27]	validation_0-auc:0.89310
[28]	validation_0-auc:0.89408
[29]	validation_0-auc:0.89545
[30]	validation_0-auc:0.89607
[31]	validation_0-auc:0.89646
[32]	validation_0-auc:0.89789
[33]	validation_0-au

[I 2024-11-23 21:09:21,903] Trial 15 finished with value: 0.9259204139622664 and parameters: {'n_estimators': 264, 'learning_rate': 0.03568106366805702, 'max_depth': 15, 'subsample': 0.8895908852162108, 'colsample_bytree': 0.7383692609754049, 'min_child_weight': 3}. Best is trial 15 with value: 0.9259204139622664.


[0]	validation_0-auc:0.80254
[1]	validation_0-auc:0.83879
[2]	validation_0-auc:0.84503
[3]	validation_0-auc:0.84533
[4]	validation_0-auc:0.86096
[5]	validation_0-auc:0.86138
[6]	validation_0-auc:0.86363
[7]	validation_0-auc:0.86525
[8]	validation_0-auc:0.86748
[9]	validation_0-auc:0.87347
[10]	validation_0-auc:0.87397
[11]	validation_0-auc:0.87079
[12]	validation_0-auc:0.87362
[13]	validation_0-auc:0.87485
[14]	validation_0-auc:0.87292
[15]	validation_0-auc:0.87360
[16]	validation_0-auc:0.87233
[17]	validation_0-auc:0.87411
[18]	validation_0-auc:0.87373
[19]	validation_0-auc:0.87293
[20]	validation_0-auc:0.87406
[21]	validation_0-auc:0.87345
[22]	validation_0-auc:0.87643
[23]	validation_0-auc:0.87572
[24]	validation_0-auc:0.87689
[25]	validation_0-auc:0.87718
[26]	validation_0-auc:0.87729
[27]	validation_0-auc:0.87790
[28]	validation_0-auc:0.87870
[29]	validation_0-auc:0.88044
[30]	validation_0-auc:0.88023
[31]	validation_0-auc:0.87941
[32]	validation_0-auc:0.87993
[33]	validation_0-au

[I 2024-11-23 21:09:29,064] Trial 16 finished with value: 0.9005271761426268 and parameters: {'n_estimators': 215, 'learning_rate': 0.0087348084308888, 'max_depth': 15, 'subsample': 0.9968463815553197, 'colsample_bytree': 0.736461906686805, 'min_child_weight': 2}. Best is trial 15 with value: 0.9259204139622664.


[0]	validation_0-auc:0.79131
[1]	validation_0-auc:0.82612
[2]	validation_0-auc:0.84032
[3]	validation_0-auc:0.84079
[4]	validation_0-auc:0.85074
[5]	validation_0-auc:0.85082
[6]	validation_0-auc:0.84995
[7]	validation_0-auc:0.85890
[8]	validation_0-auc:0.86537
[9]	validation_0-auc:0.86867
[10]	validation_0-auc:0.87268
[11]	validation_0-auc:0.87210
[12]	validation_0-auc:0.87338
[13]	validation_0-auc:0.87322
[14]	validation_0-auc:0.87204
[15]	validation_0-auc:0.86982
[16]	validation_0-auc:0.86844
[17]	validation_0-auc:0.86785
[18]	validation_0-auc:0.86790
[19]	validation_0-auc:0.86933
[20]	validation_0-auc:0.86929
[21]	validation_0-auc:0.86849
[22]	validation_0-auc:0.87004
[23]	validation_0-auc:0.86952
[24]	validation_0-auc:0.86996
[25]	validation_0-auc:0.87034
[26]	validation_0-auc:0.86935
[27]	validation_0-auc:0.86904
[28]	validation_0-auc:0.86919
[29]	validation_0-auc:0.87056
[30]	validation_0-auc:0.87029
[31]	validation_0-auc:0.87124
[32]	validation_0-auc:0.87245
[33]	validation_0-au

[I 2024-11-23 21:09:35,617] Trial 17 finished with value: 0.9020063064228123 and parameters: {'n_estimators': 263, 'learning_rate': 0.009487016689593725, 'max_depth': 12, 'subsample': 0.887219833787306, 'colsample_bytree': 0.8889814882805531, 'min_child_weight': 3}. Best is trial 15 with value: 0.9259204139622664.


[0]	validation_0-auc:0.78819
[1]	validation_0-auc:0.82394
[2]	validation_0-auc:0.84416
[3]	validation_0-auc:0.84933
[4]	validation_0-auc:0.86603
[5]	validation_0-auc:0.86857
[6]	validation_0-auc:0.87170
[7]	validation_0-auc:0.87521
[8]	validation_0-auc:0.88110
[9]	validation_0-auc:0.88659
[10]	validation_0-auc:0.88913
[11]	validation_0-auc:0.89124
[12]	validation_0-auc:0.89405
[13]	validation_0-auc:0.89378
[14]	validation_0-auc:0.89281
[15]	validation_0-auc:0.89356
[16]	validation_0-auc:0.89338
[17]	validation_0-auc:0.89434
[18]	validation_0-auc:0.89486
[19]	validation_0-auc:0.89447
[20]	validation_0-auc:0.89476
[21]	validation_0-auc:0.89628
[22]	validation_0-auc:0.89781
[23]	validation_0-auc:0.89981
[24]	validation_0-auc:0.89938
[25]	validation_0-auc:0.89928
[26]	validation_0-auc:0.89914
[27]	validation_0-auc:0.90009
[28]	validation_0-auc:0.90048
[29]	validation_0-auc:0.90192
[30]	validation_0-auc:0.90258
[31]	validation_0-auc:0.90304
[32]	validation_0-auc:0.90312
[33]	validation_0-au

[I 2024-11-23 21:09:46,749] Trial 18 finished with value: 0.930862983068631 and parameters: {'n_estimators': 450, 'learning_rate': 0.03776062081025995, 'max_depth': 13, 'subsample': 0.9246642798284709, 'colsample_bytree': 0.7385527356404329, 'min_child_weight': 1}. Best is trial 18 with value: 0.930862983068631.


[0]	validation_0-auc:0.78380
[1]	validation_0-auc:0.82331
[2]	validation_0-auc:0.84242
[3]	validation_0-auc:0.84949
[4]	validation_0-auc:0.86714
[5]	validation_0-auc:0.86853
[6]	validation_0-auc:0.87114
[7]	validation_0-auc:0.87348
[8]	validation_0-auc:0.87789
[9]	validation_0-auc:0.88483
[10]	validation_0-auc:0.88731
[11]	validation_0-auc:0.88904
[12]	validation_0-auc:0.89001
[13]	validation_0-auc:0.89101
[14]	validation_0-auc:0.88983
[15]	validation_0-auc:0.89078
[16]	validation_0-auc:0.89007
[17]	validation_0-auc:0.89087
[18]	validation_0-auc:0.89077
[19]	validation_0-auc:0.89135
[20]	validation_0-auc:0.89134
[21]	validation_0-auc:0.89309
[22]	validation_0-auc:0.89445
[23]	validation_0-auc:0.89578
[24]	validation_0-auc:0.89571
[25]	validation_0-auc:0.89540
[26]	validation_0-auc:0.89652
[27]	validation_0-auc:0.89762
[28]	validation_0-auc:0.89850
[29]	validation_0-auc:0.90021
[30]	validation_0-auc:0.90053
[31]	validation_0-auc:0.90178
[32]	validation_0-auc:0.90184
[33]	validation_0-au

[I 2024-11-23 21:09:59,003] Trial 19 finished with value: 0.931142099024254 and parameters: {'n_estimators': 482, 'learning_rate': 0.03183318638502882, 'max_depth': 13, 'subsample': 0.92686174878522, 'colsample_bytree': 0.7377212142343996, 'min_child_weight': 1}. Best is trial 19 with value: 0.931142099024254.


[0]	validation_0-auc:0.77307
[1]	validation_0-auc:0.80359
[2]	validation_0-auc:0.82664
[3]	validation_0-auc:0.83745
[4]	validation_0-auc:0.85232
[5]	validation_0-auc:0.85301
[6]	validation_0-auc:0.85880
[7]	validation_0-auc:0.86139
[8]	validation_0-auc:0.86416
[9]	validation_0-auc:0.86702
[10]	validation_0-auc:0.87084
[11]	validation_0-auc:0.87284
[12]	validation_0-auc:0.87528
[13]	validation_0-auc:0.87619
[14]	validation_0-auc:0.87779
[15]	validation_0-auc:0.87886
[16]	validation_0-auc:0.87792
[17]	validation_0-auc:0.87842
[18]	validation_0-auc:0.87968
[19]	validation_0-auc:0.87920
[20]	validation_0-auc:0.87965
[21]	validation_0-auc:0.88013
[22]	validation_0-auc:0.88153
[23]	validation_0-auc:0.88155
[24]	validation_0-auc:0.88059
[25]	validation_0-auc:0.88137
[26]	validation_0-auc:0.88067
[27]	validation_0-auc:0.88019
[28]	validation_0-auc:0.88047
[29]	validation_0-auc:0.88154
[30]	validation_0-auc:0.88165
[31]	validation_0-auc:0.88319
[32]	validation_0-auc:0.88367
[33]	validation_0-au

[I 2024-11-23 21:10:08,996] Trial 20 finished with value: 0.9232504142128974 and parameters: {'n_estimators': 461, 'learning_rate': 0.014974100052688712, 'max_depth': 10, 'subsample': 0.948798412829932, 'colsample_bytree': 0.6715677301328, 'min_child_weight': 1}. Best is trial 19 with value: 0.931142099024254.


[0]	validation_0-auc:0.78359
[1]	validation_0-auc:0.81117
[2]	validation_0-auc:0.82433
[3]	validation_0-auc:0.83097
[4]	validation_0-auc:0.84791
[5]	validation_0-auc:0.84664
[6]	validation_0-auc:0.84948
[7]	validation_0-auc:0.85254
[8]	validation_0-auc:0.86159
[9]	validation_0-auc:0.86740
[10]	validation_0-auc:0.87182
[11]	validation_0-auc:0.87324
[12]	validation_0-auc:0.87574
[13]	validation_0-auc:0.87754
[14]	validation_0-auc:0.87800
[15]	validation_0-auc:0.87912
[16]	validation_0-auc:0.87992
[17]	validation_0-auc:0.88024
[18]	validation_0-auc:0.87992
[19]	validation_0-auc:0.88058
[20]	validation_0-auc:0.88120
[21]	validation_0-auc:0.88244
[22]	validation_0-auc:0.88385
[23]	validation_0-auc:0.88575
[24]	validation_0-auc:0.88600
[25]	validation_0-auc:0.88609
[26]	validation_0-auc:0.88745
[27]	validation_0-auc:0.88854
[28]	validation_0-auc:0.88835
[29]	validation_0-auc:0.88983
[30]	validation_0-auc:0.89143
[31]	validation_0-auc:0.89251
[32]	validation_0-auc:0.89348
[33]	validation_0-au

[I 2024-11-23 21:10:19,341] Trial 21 finished with value: 0.9299858107685239 and parameters: {'n_estimators': 459, 'learning_rate': 0.03727916677946819, 'max_depth': 13, 'subsample': 0.9161175995716141, 'colsample_bytree': 0.7416777963212172, 'min_child_weight': 2}. Best is trial 19 with value: 0.931142099024254.


[0]	validation_0-auc:0.77821
[1]	validation_0-auc:0.82269
[2]	validation_0-auc:0.84678
[3]	validation_0-auc:0.84630
[4]	validation_0-auc:0.85153
[5]	validation_0-auc:0.85652
[6]	validation_0-auc:0.86224
[7]	validation_0-auc:0.86697
[8]	validation_0-auc:0.87340
[9]	validation_0-auc:0.87729
[10]	validation_0-auc:0.88160
[11]	validation_0-auc:0.88526
[12]	validation_0-auc:0.88746
[13]	validation_0-auc:0.88833
[14]	validation_0-auc:0.88703
[15]	validation_0-auc:0.88961
[16]	validation_0-auc:0.88775
[17]	validation_0-auc:0.88809
[18]	validation_0-auc:0.88884
[19]	validation_0-auc:0.88936
[20]	validation_0-auc:0.88941
[21]	validation_0-auc:0.89104
[22]	validation_0-auc:0.89160
[23]	validation_0-auc:0.89142
[24]	validation_0-auc:0.89110
[25]	validation_0-auc:0.89111
[26]	validation_0-auc:0.89105
[27]	validation_0-auc:0.89165
[28]	validation_0-auc:0.89183
[29]	validation_0-auc:0.89199
[30]	validation_0-auc:0.89236
[31]	validation_0-auc:0.89282
[32]	validation_0-auc:0.89303
[33]	validation_0-au

[I 2024-11-23 21:10:31,049] Trial 22 finished with value: 0.9283375167862913 and parameters: {'n_estimators': 510, 'learning_rate': 0.03255548899663179, 'max_depth': 13, 'subsample': 0.9405888707870974, 'colsample_bytree': 0.7180520515757123, 'min_child_weight': 2}. Best is trial 19 with value: 0.931142099024254.


[0]	validation_0-auc:0.78944
[1]	validation_0-auc:0.82568
[2]	validation_0-auc:0.84257
[3]	validation_0-auc:0.84735
[4]	validation_0-auc:0.86139
[5]	validation_0-auc:0.86466
[6]	validation_0-auc:0.86727
[7]	validation_0-auc:0.86995
[8]	validation_0-auc:0.87661
[9]	validation_0-auc:0.88385
[10]	validation_0-auc:0.88558
[11]	validation_0-auc:0.88452
[12]	validation_0-auc:0.88693
[13]	validation_0-auc:0.88823
[14]	validation_0-auc:0.88878
[15]	validation_0-auc:0.89052
[16]	validation_0-auc:0.89185
[17]	validation_0-auc:0.89353
[18]	validation_0-auc:0.89553
[19]	validation_0-auc:0.89524
[20]	validation_0-auc:0.89660
[21]	validation_0-auc:0.89784
[22]	validation_0-auc:0.89940
[23]	validation_0-auc:0.90012
[24]	validation_0-auc:0.90058
[25]	validation_0-auc:0.90161
[26]	validation_0-auc:0.90206
[27]	validation_0-auc:0.90285
[28]	validation_0-auc:0.90364
[29]	validation_0-auc:0.90478
[30]	validation_0-auc:0.90506
[31]	validation_0-auc:0.90613
[32]	validation_0-auc:0.90655
[33]	validation_0-au

[I 2024-11-23 21:10:41,041] Trial 23 finished with value: 0.9286864703579514 and parameters: {'n_estimators': 441, 'learning_rate': 0.05067144062534989, 'max_depth': 12, 'subsample': 0.9987137836993056, 'colsample_bytree': 0.7656765332942579, 'min_child_weight': 1}. Best is trial 19 with value: 0.931142099024254.


[0]	validation_0-auc:0.78086
[1]	validation_0-auc:0.79923
[2]	validation_0-auc:0.83024
[3]	validation_0-auc:0.84317
[4]	validation_0-auc:0.85980
[5]	validation_0-auc:0.85943
[6]	validation_0-auc:0.86607
[7]	validation_0-auc:0.86801
[8]	validation_0-auc:0.87066
[9]	validation_0-auc:0.87007
[10]	validation_0-auc:0.87122
[11]	validation_0-auc:0.87373
[12]	validation_0-auc:0.87566
[13]	validation_0-auc:0.87816
[14]	validation_0-auc:0.87834
[15]	validation_0-auc:0.87949
[16]	validation_0-auc:0.88289
[17]	validation_0-auc:0.88589
[18]	validation_0-auc:0.88800
[19]	validation_0-auc:0.88909
[20]	validation_0-auc:0.89080
[21]	validation_0-auc:0.89063
[22]	validation_0-auc:0.89175
[23]	validation_0-auc:0.89241
[24]	validation_0-auc:0.89158
[25]	validation_0-auc:0.89254
[26]	validation_0-auc:0.89226
[27]	validation_0-auc:0.89200
[28]	validation_0-auc:0.89274
[29]	validation_0-auc:0.89234
[30]	validation_0-auc:0.89299
[31]	validation_0-auc:0.89349
[32]	validation_0-auc:0.89413
[33]	validation_0-au

[I 2024-11-23 21:10:56,380] Trial 24 finished with value: 0.9270679335533772 and parameters: {'n_estimators': 615, 'learning_rate': 0.013061884829581765, 'max_depth': 13, 'subsample': 0.822709122628493, 'colsample_bytree': 0.6095556489480155, 'min_child_weight': 2}. Best is trial 19 with value: 0.931142099024254.


[0]	validation_0-auc:0.75852
[1]	validation_0-auc:0.78586
[2]	validation_0-auc:0.83120
[3]	validation_0-auc:0.83754
[4]	validation_0-auc:0.85365
[5]	validation_0-auc:0.85224
[6]	validation_0-auc:0.85844
[7]	validation_0-auc:0.86334
[8]	validation_0-auc:0.86830
[9]	validation_0-auc:0.87088
[10]	validation_0-auc:0.87116
[11]	validation_0-auc:0.87555
[12]	validation_0-auc:0.87883
[13]	validation_0-auc:0.87985
[14]	validation_0-auc:0.87890
[15]	validation_0-auc:0.87897
[16]	validation_0-auc:0.88195
[17]	validation_0-auc:0.88481
[18]	validation_0-auc:0.88661
[19]	validation_0-auc:0.88541
[20]	validation_0-auc:0.88826
[21]	validation_0-auc:0.88945
[22]	validation_0-auc:0.89256
[23]	validation_0-auc:0.89315
[24]	validation_0-auc:0.89273
[25]	validation_0-auc:0.89228
[26]	validation_0-auc:0.89294
[27]	validation_0-auc:0.89236
[28]	validation_0-auc:0.89274
[29]	validation_0-auc:0.89306
[30]	validation_0-auc:0.89357
[31]	validation_0-auc:0.89410
[32]	validation_0-auc:0.89473
[33]	validation_0-au

[I 2024-11-23 21:11:06,840] Trial 25 finished with value: 0.9317106571275765 and parameters: {'n_estimators': 467, 'learning_rate': 0.026932723641514514, 'max_depth': 11, 'subsample': 0.7194664019272704, 'colsample_bytree': 0.6912702331695365, 'min_child_weight': 1}. Best is trial 25 with value: 0.9317106571275765.


[0]	validation_0-auc:0.75646
[1]	validation_0-auc:0.78624
[2]	validation_0-auc:0.81751
[3]	validation_0-auc:0.83072
[4]	validation_0-auc:0.85225
[5]	validation_0-auc:0.85502
[6]	validation_0-auc:0.86013
[7]	validation_0-auc:0.86464
[8]	validation_0-auc:0.86863
[9]	validation_0-auc:0.87023
[10]	validation_0-auc:0.87105
[11]	validation_0-auc:0.87620
[12]	validation_0-auc:0.87875
[13]	validation_0-auc:0.88011
[14]	validation_0-auc:0.87865
[15]	validation_0-auc:0.88097
[16]	validation_0-auc:0.88322
[17]	validation_0-auc:0.88596
[18]	validation_0-auc:0.88841
[19]	validation_0-auc:0.88726
[20]	validation_0-auc:0.89078
[21]	validation_0-auc:0.89197
[22]	validation_0-auc:0.89381
[23]	validation_0-auc:0.89420
[24]	validation_0-auc:0.89415
[25]	validation_0-auc:0.89410
[26]	validation_0-auc:0.89479
[27]	validation_0-auc:0.89438
[28]	validation_0-auc:0.89532
[29]	validation_0-auc:0.89586
[30]	validation_0-auc:0.89746
[31]	validation_0-auc:0.89807
[32]	validation_0-auc:0.89916
[33]	validation_0-au

[I 2024-11-23 21:11:19,915] Trial 26 finished with value: 0.9320724549266064 and parameters: {'n_estimators': 606, 'learning_rate': 0.024428272580470995, 'max_depth': 11, 'subsample': 0.7158223881418168, 'colsample_bytree': 0.6456866483960999, 'min_child_weight': 1}. Best is trial 26 with value: 0.9320724549266064.


[0]	validation_0-auc:0.73418
[1]	validation_0-auc:0.75993
[2]	validation_0-auc:0.78824
[3]	validation_0-auc:0.79890
[4]	validation_0-auc:0.81867
[5]	validation_0-auc:0.82506
[6]	validation_0-auc:0.83475
[7]	validation_0-auc:0.83724
[8]	validation_0-auc:0.84402
[9]	validation_0-auc:0.84518
[10]	validation_0-auc:0.84573
[11]	validation_0-auc:0.85015
[12]	validation_0-auc:0.84938
[13]	validation_0-auc:0.84916
[14]	validation_0-auc:0.84980
[15]	validation_0-auc:0.85249
[16]	validation_0-auc:0.85598
[17]	validation_0-auc:0.85529
[18]	validation_0-auc:0.85831
[19]	validation_0-auc:0.85577
[20]	validation_0-auc:0.85871
[21]	validation_0-auc:0.85901
[22]	validation_0-auc:0.86172
[23]	validation_0-auc:0.86109
[24]	validation_0-auc:0.86138
[25]	validation_0-auc:0.86178
[26]	validation_0-auc:0.86121
[27]	validation_0-auc:0.86060
[28]	validation_0-auc:0.86131
[29]	validation_0-auc:0.86007
[30]	validation_0-auc:0.86032
[31]	validation_0-auc:0.85934
[32]	validation_0-auc:0.86095
[33]	validation_0-au

[I 2024-11-23 21:11:33,905] Trial 27 finished with value: 0.9049421323342358 and parameters: {'n_estimators': 700, 'learning_rate': 0.006396538429943633, 'max_depth': 10, 'subsample': 0.7233437332578784, 'colsample_bytree': 0.634554110041051, 'min_child_weight': 4}. Best is trial 26 with value: 0.9320724549266064.


[0]	validation_0-auc:0.77385
[1]	validation_0-auc:0.79942
[2]	validation_0-auc:0.82289
[3]	validation_0-auc:0.83577
[4]	validation_0-auc:0.85447
[5]	validation_0-auc:0.86085
[6]	validation_0-auc:0.86301
[7]	validation_0-auc:0.86592
[8]	validation_0-auc:0.87055
[9]	validation_0-auc:0.87348
[10]	validation_0-auc:0.87421
[11]	validation_0-auc:0.87841
[12]	validation_0-auc:0.88100
[13]	validation_0-auc:0.88266
[14]	validation_0-auc:0.88217
[15]	validation_0-auc:0.88272
[16]	validation_0-auc:0.88517
[17]	validation_0-auc:0.88634
[18]	validation_0-auc:0.88817
[19]	validation_0-auc:0.88993
[20]	validation_0-auc:0.89150
[21]	validation_0-auc:0.89185
[22]	validation_0-auc:0.89293
[23]	validation_0-auc:0.89344
[24]	validation_0-auc:0.89361
[25]	validation_0-auc:0.89455
[26]	validation_0-auc:0.89557
[27]	validation_0-auc:0.89516
[28]	validation_0-auc:0.89502
[29]	validation_0-auc:0.89626
[30]	validation_0-auc:0.89696
[31]	validation_0-auc:0.89692
[32]	validation_0-auc:0.89801
[33]	validation_0-au

[I 2024-11-23 21:11:50,775] Trial 28 finished with value: 0.933148121105479 and parameters: {'n_estimators': 818, 'learning_rate': 0.025519080744339073, 'max_depth': 11, 'subsample': 0.7244069470995078, 'colsample_bytree': 0.5758485018927367, 'min_child_weight': 1}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.75235
[1]	validation_0-auc:0.77457
[2]	validation_0-auc:0.79402
[3]	validation_0-auc:0.81406
[4]	validation_0-auc:0.83519
[5]	validation_0-auc:0.84037
[6]	validation_0-auc:0.84003
[7]	validation_0-auc:0.84218
[8]	validation_0-auc:0.84821
[9]	validation_0-auc:0.84949
[10]	validation_0-auc:0.84831
[11]	validation_0-auc:0.85483
[12]	validation_0-auc:0.85642
[13]	validation_0-auc:0.85887
[14]	validation_0-auc:0.85924
[15]	validation_0-auc:0.86270
[16]	validation_0-auc:0.86597
[17]	validation_0-auc:0.86560
[18]	validation_0-auc:0.86683
[19]	validation_0-auc:0.86791
[20]	validation_0-auc:0.87093
[21]	validation_0-auc:0.87193
[22]	validation_0-auc:0.87419
[23]	validation_0-auc:0.87505
[24]	validation_0-auc:0.87511
[25]	validation_0-auc:0.87602
[26]	validation_0-auc:0.87670
[27]	validation_0-auc:0.87641
[28]	validation_0-auc:0.87734
[29]	validation_0-auc:0.87850
[30]	validation_0-auc:0.87856
[31]	validation_0-auc:0.88003
[32]	validation_0-auc:0.88068
[33]	validation_0-au

[I 2024-11-23 21:12:08,462] Trial 29 finished with value: 0.9269255614732588 and parameters: {'n_estimators': 998, 'learning_rate': 0.02449228648427526, 'max_depth': 11, 'subsample': 0.7247448091509786, 'colsample_bytree': 0.5616238457095415, 'min_child_weight': 4}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.75084
[1]	validation_0-auc:0.77942
[2]	validation_0-auc:0.82171
[3]	validation_0-auc:0.82344
[4]	validation_0-auc:0.83141
[5]	validation_0-auc:0.82912
[6]	validation_0-auc:0.83722
[7]	validation_0-auc:0.83804
[8]	validation_0-auc:0.84487
[9]	validation_0-auc:0.84785
[10]	validation_0-auc:0.84873
[11]	validation_0-auc:0.85214
[12]	validation_0-auc:0.85455
[13]	validation_0-auc:0.85637
[14]	validation_0-auc:0.85645
[15]	validation_0-auc:0.85732
[16]	validation_0-auc:0.86143
[17]	validation_0-auc:0.86323
[18]	validation_0-auc:0.86578
[19]	validation_0-auc:0.86590
[20]	validation_0-auc:0.86893
[21]	validation_0-auc:0.86938
[22]	validation_0-auc:0.87136
[23]	validation_0-auc:0.87176
[24]	validation_0-auc:0.87270
[25]	validation_0-auc:0.87280
[26]	validation_0-auc:0.87332
[27]	validation_0-auc:0.87289
[28]	validation_0-auc:0.87300
[29]	validation_0-auc:0.87169
[30]	validation_0-auc:0.87204
[31]	validation_0-auc:0.87184
[32]	validation_0-auc:0.87301
[33]	validation_0-au

[I 2024-11-23 21:12:22,590] Trial 30 finished with value: 0.9223831235157871 and parameters: {'n_estimators': 792, 'learning_rate': 0.012765772232551776, 'max_depth': 9, 'subsample': 0.723208938033389, 'colsample_bytree': 0.6786165969432774, 'min_child_weight': 2}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.75961
[1]	validation_0-auc:0.78430
[2]	validation_0-auc:0.80745
[3]	validation_0-auc:0.82314
[4]	validation_0-auc:0.85024
[5]	validation_0-auc:0.85318
[6]	validation_0-auc:0.86020
[7]	validation_0-auc:0.86275
[8]	validation_0-auc:0.86862
[9]	validation_0-auc:0.86995
[10]	validation_0-auc:0.87140
[11]	validation_0-auc:0.87124
[12]	validation_0-auc:0.87375
[13]	validation_0-auc:0.87383
[14]	validation_0-auc:0.87237
[15]	validation_0-auc:0.87468
[16]	validation_0-auc:0.87749
[17]	validation_0-auc:0.87894
[18]	validation_0-auc:0.88239
[19]	validation_0-auc:0.88276
[20]	validation_0-auc:0.88671
[21]	validation_0-auc:0.88770
[22]	validation_0-auc:0.89061
[23]	validation_0-auc:0.89119
[24]	validation_0-auc:0.89074
[25]	validation_0-auc:0.89092
[26]	validation_0-auc:0.89221
[27]	validation_0-auc:0.89089
[28]	validation_0-auc:0.89095
[29]	validation_0-auc:0.89034
[30]	validation_0-auc:0.89196
[31]	validation_0-auc:0.89294
[32]	validation_0-auc:0.89398
[33]	validation_0-au

[I 2024-11-23 21:12:34,807] Trial 31 finished with value: 0.9325815846610709 and parameters: {'n_estimators': 574, 'learning_rate': 0.026865839235835628, 'max_depth': 11, 'subsample': 0.704816223641413, 'colsample_bytree': 0.642511202313606, 'min_child_weight': 1}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.76034
[1]	validation_0-auc:0.76948
[2]	validation_0-auc:0.80500
[3]	validation_0-auc:0.82299
[4]	validation_0-auc:0.84555
[5]	validation_0-auc:0.85094
[6]	validation_0-auc:0.85949
[7]	validation_0-auc:0.86299
[8]	validation_0-auc:0.86405
[9]	validation_0-auc:0.86525
[10]	validation_0-auc:0.86689
[11]	validation_0-auc:0.87295
[12]	validation_0-auc:0.87392
[13]	validation_0-auc:0.87413
[14]	validation_0-auc:0.87255
[15]	validation_0-auc:0.87508
[16]	validation_0-auc:0.87475
[17]	validation_0-auc:0.87470
[18]	validation_0-auc:0.87614
[19]	validation_0-auc:0.87899
[20]	validation_0-auc:0.88281
[21]	validation_0-auc:0.88514
[22]	validation_0-auc:0.88717
[23]	validation_0-auc:0.88670
[24]	validation_0-auc:0.88732
[25]	validation_0-auc:0.88625
[26]	validation_0-auc:0.88697
[27]	validation_0-auc:0.88692
[28]	validation_0-auc:0.88707
[29]	validation_0-auc:0.88721
[30]	validation_0-auc:0.88770
[31]	validation_0-auc:0.88744
[32]	validation_0-auc:0.88877
[33]	validation_0-au

[I 2024-11-23 21:12:47,237] Trial 32 finished with value: 0.9308064298720087 and parameters: {'n_estimators': 608, 'learning_rate': 0.024110100443151344, 'max_depth': 11, 'subsample': 0.5911255173996701, 'colsample_bytree': 0.5690708023329701, 'min_child_weight': 1}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.73817
[1]	validation_0-auc:0.75878
[2]	validation_0-auc:0.79057
[3]	validation_0-auc:0.80378
[4]	validation_0-auc:0.82090
[5]	validation_0-auc:0.82523
[6]	validation_0-auc:0.83249
[7]	validation_0-auc:0.83983
[8]	validation_0-auc:0.84992
[9]	validation_0-auc:0.85132
[10]	validation_0-auc:0.85610
[11]	validation_0-auc:0.86060
[12]	validation_0-auc:0.85965
[13]	validation_0-auc:0.86046
[14]	validation_0-auc:0.86066
[15]	validation_0-auc:0.86421
[16]	validation_0-auc:0.86719
[17]	validation_0-auc:0.86785
[18]	validation_0-auc:0.86953
[19]	validation_0-auc:0.87024
[20]	validation_0-auc:0.87365
[21]	validation_0-auc:0.87557
[22]	validation_0-auc:0.87764
[23]	validation_0-auc:0.87828
[24]	validation_0-auc:0.88020
[25]	validation_0-auc:0.88129
[26]	validation_0-auc:0.88206
[27]	validation_0-auc:0.88384
[28]	validation_0-auc:0.88534
[29]	validation_0-auc:0.88659
[30]	validation_0-auc:0.88709
[31]	validation_0-auc:0.88821
[32]	validation_0-auc:0.88939
[33]	validation_0-au

[I 2024-11-23 21:12:55,603] Trial 33 finished with value: 0.9285050868074833 and parameters: {'n_estimators': 561, 'learning_rate': 0.05942268699545747, 'max_depth': 9, 'subsample': 0.6977324928996836, 'colsample_bytree': 0.646649232061754, 'min_child_weight': 3}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.76983
[1]	validation_0-auc:0.78005
[2]	validation_0-auc:0.80840
[3]	validation_0-auc:0.82446
[4]	validation_0-auc:0.84232
[5]	validation_0-auc:0.84484
[6]	validation_0-auc:0.85461
[7]	validation_0-auc:0.85456
[8]	validation_0-auc:0.85807
[9]	validation_0-auc:0.85957
[10]	validation_0-auc:0.86259
[11]	validation_0-auc:0.86738
[12]	validation_0-auc:0.86867
[13]	validation_0-auc:0.86903
[14]	validation_0-auc:0.86827
[15]	validation_0-auc:0.86963
[16]	validation_0-auc:0.87200
[17]	validation_0-auc:0.87404
[18]	validation_0-auc:0.87662
[19]	validation_0-auc:0.87672
[20]	validation_0-auc:0.87952
[21]	validation_0-auc:0.87969
[22]	validation_0-auc:0.88133
[23]	validation_0-auc:0.88049
[24]	validation_0-auc:0.88052
[25]	validation_0-auc:0.88111
[26]	validation_0-auc:0.88144
[27]	validation_0-auc:0.88084
[28]	validation_0-auc:0.88053
[29]	validation_0-auc:0.88020
[30]	validation_0-auc:0.88014
[31]	validation_0-auc:0.88005
[32]	validation_0-auc:0.88144
[33]	validation_0-au

[I 2024-11-23 21:13:12,400] Trial 34 finished with value: 0.9290035610608168 and parameters: {'n_estimators': 828, 'learning_rate': 0.01653142206474494, 'max_depth': 11, 'subsample': 0.7525525551178694, 'colsample_bytree': 0.6277426182563156, 'min_child_weight': 2}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.73992
[1]	validation_0-auc:0.77778
[2]	validation_0-auc:0.80908
[3]	validation_0-auc:0.81238
[4]	validation_0-auc:0.82506
[5]	validation_0-auc:0.82356
[6]	validation_0-auc:0.83417
[7]	validation_0-auc:0.83255
[8]	validation_0-auc:0.83766
[9]	validation_0-auc:0.84108
[10]	validation_0-auc:0.84379
[11]	validation_0-auc:0.84872
[12]	validation_0-auc:0.85138
[13]	validation_0-auc:0.85377
[14]	validation_0-auc:0.85216
[15]	validation_0-auc:0.85500
[16]	validation_0-auc:0.85721
[17]	validation_0-auc:0.85781
[18]	validation_0-auc:0.85789
[19]	validation_0-auc:0.85944
[20]	validation_0-auc:0.86486
[21]	validation_0-auc:0.86636
[22]	validation_0-auc:0.86703
[23]	validation_0-auc:0.86812
[24]	validation_0-auc:0.86736
[25]	validation_0-auc:0.86776
[26]	validation_0-auc:0.86887
[27]	validation_0-auc:0.86757
[28]	validation_0-auc:0.86754
[29]	validation_0-auc:0.86758
[30]	validation_0-auc:0.86825
[31]	validation_0-auc:0.86869
[32]	validation_0-auc:0.86965
[33]	validation_0-au

[I 2024-11-23 21:13:23,977] Trial 35 finished with value: 0.9286529815633795 and parameters: {'n_estimators': 752, 'learning_rate': 0.023611164308538313, 'max_depth': 8, 'subsample': 0.6134811676992394, 'colsample_bytree': 0.6961627038816225, 'min_child_weight': 1}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.75661
[1]	validation_0-auc:0.76727
[2]	validation_0-auc:0.80707
[3]	validation_0-auc:0.81610
[4]	validation_0-auc:0.82812
[5]	validation_0-auc:0.83095
[6]	validation_0-auc:0.83340
[7]	validation_0-auc:0.83795
[8]	validation_0-auc:0.84308
[9]	validation_0-auc:0.84587
[10]	validation_0-auc:0.84987
[11]	validation_0-auc:0.85573
[12]	validation_0-auc:0.85598
[13]	validation_0-auc:0.85353
[14]	validation_0-auc:0.85302
[15]	validation_0-auc:0.85702
[16]	validation_0-auc:0.85850
[17]	validation_0-auc:0.85781
[18]	validation_0-auc:0.85999
[19]	validation_0-auc:0.86226
[20]	validation_0-auc:0.86343
[21]	validation_0-auc:0.86346
[22]	validation_0-auc:0.86492
[23]	validation_0-auc:0.86526
[24]	validation_0-auc:0.86410
[25]	validation_0-auc:0.86474
[26]	validation_0-auc:0.86496
[27]	validation_0-auc:0.86387
[28]	validation_0-auc:0.86407
[29]	validation_0-auc:0.86409
[30]	validation_0-auc:0.86477
[31]	validation_0-auc:0.86482
[32]	validation_0-auc:0.86639
[33]	validation_0-au

[I 2024-11-23 21:13:35,835] Trial 36 finished with value: 0.9109402487422159 and parameters: {'n_estimators': 647, 'learning_rate': 0.007266148169732674, 'max_depth': 10, 'subsample': 0.5453604804419829, 'colsample_bytree': 0.5813623250338809, 'min_child_weight': 2}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.71734
[1]	validation_0-auc:0.74833
[2]	validation_0-auc:0.77077
[3]	validation_0-auc:0.79651
[4]	validation_0-auc:0.81313
[5]	validation_0-auc:0.83046
[6]	validation_0-auc:0.83052
[7]	validation_0-auc:0.83717
[8]	validation_0-auc:0.84815
[9]	validation_0-auc:0.85043
[10]	validation_0-auc:0.85305
[11]	validation_0-auc:0.85771
[12]	validation_0-auc:0.86223
[13]	validation_0-auc:0.86223
[14]	validation_0-auc:0.86473
[15]	validation_0-auc:0.86930
[16]	validation_0-auc:0.87288
[17]	validation_0-auc:0.87347
[18]	validation_0-auc:0.87474
[19]	validation_0-auc:0.87597
[20]	validation_0-auc:0.87853
[21]	validation_0-auc:0.88063
[22]	validation_0-auc:0.88368
[23]	validation_0-auc:0.88457
[24]	validation_0-auc:0.88429
[25]	validation_0-auc:0.88566
[26]	validation_0-auc:0.88665
[27]	validation_0-auc:0.88875
[28]	validation_0-auc:0.88955
[29]	validation_0-auc:0.88950
[30]	validation_0-auc:0.88908
[31]	validation_0-auc:0.88963
[32]	validation_0-auc:0.89063
[33]	validation_0-au

[I 2024-11-23 21:13:48,054] Trial 37 finished with value: 0.9278914131723146 and parameters: {'n_estimators': 898, 'learning_rate': 0.09278372744464297, 'max_depth': 8, 'subsample': 0.6924753649913231, 'colsample_bytree': 0.5101157700894704, 'min_child_weight': 3}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.77473
[1]	validation_0-auc:0.79370
[2]	validation_0-auc:0.82099
[3]	validation_0-auc:0.82671
[4]	validation_0-auc:0.84562
[5]	validation_0-auc:0.84717
[6]	validation_0-auc:0.85527
[7]	validation_0-auc:0.85890
[8]	validation_0-auc:0.86479
[9]	validation_0-auc:0.86836
[10]	validation_0-auc:0.87292
[11]	validation_0-auc:0.87422
[12]	validation_0-auc:0.87719
[13]	validation_0-auc:0.87816
[14]	validation_0-auc:0.87658
[15]	validation_0-auc:0.87972
[16]	validation_0-auc:0.88353
[17]	validation_0-auc:0.88432
[18]	validation_0-auc:0.88745
[19]	validation_0-auc:0.88750
[20]	validation_0-auc:0.89015
[21]	validation_0-auc:0.88995
[22]	validation_0-auc:0.89227
[23]	validation_0-auc:0.89249
[24]	validation_0-auc:0.89318
[25]	validation_0-auc:0.89328
[26]	validation_0-auc:0.89475
[27]	validation_0-auc:0.89494
[28]	validation_0-auc:0.89467
[29]	validation_0-auc:0.89454
[30]	validation_0-auc:0.89528
[31]	validation_0-auc:0.89633
[32]	validation_0-auc:0.89746
[33]	validation_0-au

[I 2024-11-23 21:14:02,392] Trial 38 finished with value: 0.9328039573704541 and parameters: {'n_estimators': 658, 'learning_rate': 0.02462645787390766, 'max_depth': 12, 'subsample': 0.6623894670948736, 'colsample_bytree': 0.6056791693930271, 'min_child_weight': 1}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.75222
[1]	validation_0-auc:0.76918
[2]	validation_0-auc:0.78886
[3]	validation_0-auc:0.80035
[4]	validation_0-auc:0.82479
[5]	validation_0-auc:0.82891
[6]	validation_0-auc:0.84040
[7]	validation_0-auc:0.84121
[8]	validation_0-auc:0.84302
[9]	validation_0-auc:0.84598
[10]	validation_0-auc:0.84810
[11]	validation_0-auc:0.85006
[12]	validation_0-auc:0.84989
[13]	validation_0-auc:0.84955
[14]	validation_0-auc:0.84690
[15]	validation_0-auc:0.85015
[16]	validation_0-auc:0.85330
[17]	validation_0-auc:0.85429
[18]	validation_0-auc:0.85622
[19]	validation_0-auc:0.85724
[20]	validation_0-auc:0.86011
[21]	validation_0-auc:0.86131
[22]	validation_0-auc:0.86370
[23]	validation_0-auc:0.86314
[24]	validation_0-auc:0.86383
[25]	validation_0-auc:0.86407
[26]	validation_0-auc:0.86452
[27]	validation_0-auc:0.86412
[28]	validation_0-auc:0.86475
[29]	validation_0-auc:0.86350
[30]	validation_0-auc:0.86473
[31]	validation_0-auc:0.86556
[32]	validation_0-auc:0.86628
[33]	validation_0-au

[I 2024-11-23 21:14:15,473] Trial 39 finished with value: 0.9174059585391859 and parameters: {'n_estimators': 674, 'learning_rate': 0.011963320526942479, 'max_depth': 12, 'subsample': 0.6544771950564097, 'colsample_bytree': 0.6132457432923192, 'min_child_weight': 4}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.65287
[1]	validation_0-auc:0.64667
[2]	validation_0-auc:0.68429
[3]	validation_0-auc:0.71253
[4]	validation_0-auc:0.72548
[5]	validation_0-auc:0.74110
[6]	validation_0-auc:0.74155
[7]	validation_0-auc:0.73967
[8]	validation_0-auc:0.75466
[9]	validation_0-auc:0.75794
[10]	validation_0-auc:0.76077
[11]	validation_0-auc:0.76860
[12]	validation_0-auc:0.77256
[13]	validation_0-auc:0.77049
[14]	validation_0-auc:0.76996
[15]	validation_0-auc:0.77188
[16]	validation_0-auc:0.77408
[17]	validation_0-auc:0.77656
[18]	validation_0-auc:0.77664
[19]	validation_0-auc:0.77684
[20]	validation_0-auc:0.77879
[21]	validation_0-auc:0.77981
[22]	validation_0-auc:0.78356
[23]	validation_0-auc:0.78065
[24]	validation_0-auc:0.78300
[25]	validation_0-auc:0.78367
[26]	validation_0-auc:0.78463
[27]	validation_0-auc:0.78459
[28]	validation_0-auc:0.78436
[29]	validation_0-auc:0.78430
[30]	validation_0-auc:0.78592
[31]	validation_0-auc:0.78794
[32]	validation_0-auc:0.79092
[33]	validation_0-au

[I 2024-11-23 21:14:20,912] Trial 40 finished with value: 0.8900239545573289 and parameters: {'n_estimators': 756, 'learning_rate': 0.062236953473848784, 'max_depth': 3, 'subsample': 0.5580120922849433, 'colsample_bytree': 0.5378538298846014, 'min_child_weight': 9}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.74314
[1]	validation_0-auc:0.76712
[2]	validation_0-auc:0.80601
[3]	validation_0-auc:0.81658
[4]	validation_0-auc:0.83921
[5]	validation_0-auc:0.84195
[6]	validation_0-auc:0.85321
[7]	validation_0-auc:0.85570
[8]	validation_0-auc:0.86104
[9]	validation_0-auc:0.86547
[10]	validation_0-auc:0.86761
[11]	validation_0-auc:0.87008
[12]	validation_0-auc:0.87228
[13]	validation_0-auc:0.87274
[14]	validation_0-auc:0.86989
[15]	validation_0-auc:0.87147
[16]	validation_0-auc:0.87503
[17]	validation_0-auc:0.87535
[18]	validation_0-auc:0.87637
[19]	validation_0-auc:0.87597
[20]	validation_0-auc:0.88045
[21]	validation_0-auc:0.88038
[22]	validation_0-auc:0.88278
[23]	validation_0-auc:0.88285
[24]	validation_0-auc:0.88490
[25]	validation_0-auc:0.88548
[26]	validation_0-auc:0.88589
[27]	validation_0-auc:0.88543
[28]	validation_0-auc:0.88570
[29]	validation_0-auc:0.88601
[30]	validation_0-auc:0.88673
[31]	validation_0-auc:0.88768
[32]	validation_0-auc:0.88891
[33]	validation_0-au

[I 2024-11-23 21:14:31,913] Trial 41 finished with value: 0.9304862529392347 and parameters: {'n_estimators': 574, 'learning_rate': 0.024548364390637865, 'max_depth': 10, 'subsample': 0.6721866137276582, 'colsample_bytree': 0.6518899536359626, 'min_child_weight': 1}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.77621
[1]	validation_0-auc:0.80107
[2]	validation_0-auc:0.84042
[3]	validation_0-auc:0.84306
[4]	validation_0-auc:0.85869
[5]	validation_0-auc:0.85770
[6]	validation_0-auc:0.86135
[7]	validation_0-auc:0.86575
[8]	validation_0-auc:0.87206
[9]	validation_0-auc:0.87520
[10]	validation_0-auc:0.87966
[11]	validation_0-auc:0.88249
[12]	validation_0-auc:0.88472
[13]	validation_0-auc:0.88655
[14]	validation_0-auc:0.88708
[15]	validation_0-auc:0.88844
[16]	validation_0-auc:0.88983
[17]	validation_0-auc:0.89054
[18]	validation_0-auc:0.89259
[19]	validation_0-auc:0.89335
[20]	validation_0-auc:0.89738
[21]	validation_0-auc:0.89845
[22]	validation_0-auc:0.90117
[23]	validation_0-auc:0.90210
[24]	validation_0-auc:0.90199
[25]	validation_0-auc:0.90289
[26]	validation_0-auc:0.90372
[27]	validation_0-auc:0.90436
[28]	validation_0-auc:0.90365
[29]	validation_0-auc:0.90476
[30]	validation_0-auc:0.90461
[31]	validation_0-auc:0.90481
[32]	validation_0-auc:0.90569
[33]	validation_0-au

[I 2024-11-23 21:14:43,270] Trial 42 finished with value: 0.9324735504918108 and parameters: {'n_estimators': 531, 'learning_rate': 0.045808913356411886, 'max_depth': 12, 'subsample': 0.7105795470302316, 'colsample_bytree': 0.687571778849608, 'min_child_weight': 1}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.76186
[1]	validation_0-auc:0.78282
[2]	validation_0-auc:0.79648
[3]	validation_0-auc:0.81333
[4]	validation_0-auc:0.83440
[5]	validation_0-auc:0.84328
[6]	validation_0-auc:0.85296
[7]	validation_0-auc:0.85521
[8]	validation_0-auc:0.86362
[9]	validation_0-auc:0.86764
[10]	validation_0-auc:0.87141
[11]	validation_0-auc:0.87376
[12]	validation_0-auc:0.87512
[13]	validation_0-auc:0.87754
[14]	validation_0-auc:0.87613
[15]	validation_0-auc:0.87894
[16]	validation_0-auc:0.88094
[17]	validation_0-auc:0.88171
[18]	validation_0-auc:0.88258
[19]	validation_0-auc:0.88179
[20]	validation_0-auc:0.88653
[21]	validation_0-auc:0.88943
[22]	validation_0-auc:0.89111
[23]	validation_0-auc:0.89196
[24]	validation_0-auc:0.89237
[25]	validation_0-auc:0.89343
[26]	validation_0-auc:0.89421
[27]	validation_0-auc:0.89455
[28]	validation_0-auc:0.89378
[29]	validation_0-auc:0.89325
[30]	validation_0-auc:0.89293
[31]	validation_0-auc:0.89262
[32]	validation_0-auc:0.89381
[33]	validation_0-au

[I 2024-11-23 21:15:00,632] Trial 43 finished with value: 0.9312073490672594 and parameters: {'n_estimators': 873, 'learning_rate': 0.046223947852593456, 'max_depth': 12, 'subsample': 0.6461817112182265, 'colsample_bytree': 0.5988009215248417, 'min_child_weight': 2}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.75454
[1]	validation_0-auc:0.76648
[2]	validation_0-auc:0.81213
[3]	validation_0-auc:0.82297
[4]	validation_0-auc:0.83998
[5]	validation_0-auc:0.84770
[6]	validation_0-auc:0.85165
[7]	validation_0-auc:0.85371
[8]	validation_0-auc:0.85926
[9]	validation_0-auc:0.86208
[10]	validation_0-auc:0.86359
[11]	validation_0-auc:0.86824
[12]	validation_0-auc:0.86994
[13]	validation_0-auc:0.86968
[14]	validation_0-auc:0.86866
[15]	validation_0-auc:0.87175
[16]	validation_0-auc:0.87371
[17]	validation_0-auc:0.87489
[18]	validation_0-auc:0.87546
[19]	validation_0-auc:0.87836
[20]	validation_0-auc:0.88205
[21]	validation_0-auc:0.88245
[22]	validation_0-auc:0.88391
[23]	validation_0-auc:0.88372
[24]	validation_0-auc:0.88428
[25]	validation_0-auc:0.88626
[26]	validation_0-auc:0.88758
[27]	validation_0-auc:0.88618
[28]	validation_0-auc:0.88524
[29]	validation_0-auc:0.88658
[30]	validation_0-auc:0.88783
[31]	validation_0-auc:0.88800
[32]	validation_0-auc:0.88876
[33]	validation_0-au

[I 2024-11-23 21:15:14,835] Trial 44 finished with value: 0.9293228639607738 and parameters: {'n_estimators': 539, 'learning_rate': 0.018612670624402864, 'max_depth': 14, 'subsample': 0.5044493976093773, 'colsample_bytree': 0.5577927119930339, 'min_child_weight': 1}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.77681
[1]	validation_0-auc:0.79223
[2]	validation_0-auc:0.81656
[3]	validation_0-auc:0.82579
[4]	validation_0-auc:0.84137
[5]	validation_0-auc:0.84306
[6]	validation_0-auc:0.85062
[7]	validation_0-auc:0.85196
[8]	validation_0-auc:0.85310
[9]	validation_0-auc:0.85644
[10]	validation_0-auc:0.86186
[11]	validation_0-auc:0.86718
[12]	validation_0-auc:0.86697
[13]	validation_0-auc:0.86962
[14]	validation_0-auc:0.86918
[15]	validation_0-auc:0.87240
[16]	validation_0-auc:0.87629
[17]	validation_0-auc:0.87647
[18]	validation_0-auc:0.87907
[19]	validation_0-auc:0.87966
[20]	validation_0-auc:0.88400
[21]	validation_0-auc:0.88524
[22]	validation_0-auc:0.88715
[23]	validation_0-auc:0.88794
[24]	validation_0-auc:0.88892
[25]	validation_0-auc:0.89018
[26]	validation_0-auc:0.89153
[27]	validation_0-auc:0.89269
[28]	validation_0-auc:0.89240
[29]	validation_0-auc:0.89367
[30]	validation_0-auc:0.89449
[31]	validation_0-auc:0.89527
[32]	validation_0-auc:0.89707
[33]	validation_0-au

[I 2024-11-23 21:15:27,986] Trial 45 finished with value: 0.9309183339205003 and parameters: {'n_estimators': 669, 'learning_rate': 0.04300043564726491, 'max_depth': 12, 'subsample': 0.6239415572195838, 'colsample_bytree': 0.6191874233847017, 'min_child_weight': 2}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.79464
[1]	validation_0-auc:0.82049
[2]	validation_0-auc:0.84023
[3]	validation_0-auc:0.84779
[4]	validation_0-auc:0.86500
[5]	validation_0-auc:0.86788
[6]	validation_0-auc:0.87048
[7]	validation_0-auc:0.87322
[8]	validation_0-auc:0.87749
[9]	validation_0-auc:0.87888
[10]	validation_0-auc:0.88746
[11]	validation_0-auc:0.89074
[12]	validation_0-auc:0.89212
[13]	validation_0-auc:0.89170
[14]	validation_0-auc:0.89348
[15]	validation_0-auc:0.89577
[16]	validation_0-auc:0.89694
[17]	validation_0-auc:0.89742
[18]	validation_0-auc:0.90015
[19]	validation_0-auc:0.89952
[20]	validation_0-auc:0.90100
[21]	validation_0-auc:0.90189
[22]	validation_0-auc:0.90206
[23]	validation_0-auc:0.90319
[24]	validation_0-auc:0.90458
[25]	validation_0-auc:0.90573
[26]	validation_0-auc:0.90650
[27]	validation_0-auc:0.90702
[28]	validation_0-auc:0.90801
[29]	validation_0-auc:0.90822
[30]	validation_0-auc:0.90833
[31]	validation_0-auc:0.90900
[32]	validation_0-auc:0.90891
[33]	validation_0-au

[I 2024-11-23 21:15:39,678] Trial 46 finished with value: 0.9329373531481963 and parameters: {'n_estimators': 586, 'learning_rate': 0.06214575178600849, 'max_depth': 11, 'subsample': 0.8059442743810313, 'colsample_bytree': 0.6537334296938118, 'min_child_weight': 1}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.78127
[1]	validation_0-auc:0.78082
[2]	validation_0-auc:0.80774
[3]	validation_0-auc:0.82773
[4]	validation_0-auc:0.84755
[5]	validation_0-auc:0.85308
[6]	validation_0-auc:0.85936
[7]	validation_0-auc:0.86387
[8]	validation_0-auc:0.86679
[9]	validation_0-auc:0.86656
[10]	validation_0-auc:0.87416
[11]	validation_0-auc:0.87823
[12]	validation_0-auc:0.87822
[13]	validation_0-auc:0.87858
[14]	validation_0-auc:0.88122
[15]	validation_0-auc:0.88397
[16]	validation_0-auc:0.88415
[17]	validation_0-auc:0.88504
[18]	validation_0-auc:0.88633
[19]	validation_0-auc:0.88806
[20]	validation_0-auc:0.89026
[21]	validation_0-auc:0.89220
[22]	validation_0-auc:0.89266
[23]	validation_0-auc:0.89539
[24]	validation_0-auc:0.89590
[25]	validation_0-auc:0.89593
[26]	validation_0-auc:0.89546
[27]	validation_0-auc:0.89658
[28]	validation_0-auc:0.89718
[29]	validation_0-auc:0.89812
[30]	validation_0-auc:0.89795
[31]	validation_0-auc:0.89969
[32]	validation_0-auc:0.89993
[33]	validation_0-au

[I 2024-11-23 21:15:50,851] Trial 47 finished with value: 0.9292724846896467 and parameters: {'n_estimators': 737, 'learning_rate': 0.06713902748214698, 'max_depth': 9, 'subsample': 0.8204260339831457, 'colsample_bytree': 0.5863097114059127, 'min_child_weight': 3}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.80280
[1]	validation_0-auc:0.82083
[2]	validation_0-auc:0.84238
[3]	validation_0-auc:0.85197
[4]	validation_0-auc:0.86045
[5]	validation_0-auc:0.86108
[6]	validation_0-auc:0.86342
[7]	validation_0-auc:0.87153
[8]	validation_0-auc:0.87769
[9]	validation_0-auc:0.87884
[10]	validation_0-auc:0.88042
[11]	validation_0-auc:0.88383
[12]	validation_0-auc:0.88627
[13]	validation_0-auc:0.88805
[14]	validation_0-auc:0.88872
[15]	validation_0-auc:0.88909
[16]	validation_0-auc:0.89158
[17]	validation_0-auc:0.89302
[18]	validation_0-auc:0.89408
[19]	validation_0-auc:0.89410
[20]	validation_0-auc:0.89475
[21]	validation_0-auc:0.89535
[22]	validation_0-auc:0.89663
[23]	validation_0-auc:0.89755
[24]	validation_0-auc:0.89802
[25]	validation_0-auc:0.90010
[26]	validation_0-auc:0.90207
[27]	validation_0-auc:0.90300
[28]	validation_0-auc:0.90287
[29]	validation_0-auc:0.90396
[30]	validation_0-auc:0.90520
[31]	validation_0-auc:0.90596
[32]	validation_0-auc:0.90682
[33]	validation_0-au

[I 2024-11-23 21:16:02,462] Trial 48 finished with value: 0.9316457842524496 and parameters: {'n_estimators': 531, 'learning_rate': 0.05605482630923885, 'max_depth': 14, 'subsample': 0.7901646618276682, 'colsample_bytree': 0.7129387062276996, 'min_child_weight': 2}. Best is trial 28 with value: 0.933148121105479.


[0]	validation_0-auc:0.74763
[1]	validation_0-auc:0.77648
[2]	validation_0-auc:0.80001
[3]	validation_0-auc:0.81303
[4]	validation_0-auc:0.83061
[5]	validation_0-auc:0.83443
[6]	validation_0-auc:0.84341
[7]	validation_0-auc:0.84832
[8]	validation_0-auc:0.85222
[9]	validation_0-auc:0.85416
[10]	validation_0-auc:0.85927
[11]	validation_0-auc:0.86376
[12]	validation_0-auc:0.86477
[13]	validation_0-auc:0.86759
[14]	validation_0-auc:0.87124
[15]	validation_0-auc:0.87671
[16]	validation_0-auc:0.87986
[17]	validation_0-auc:0.88184
[18]	validation_0-auc:0.88371
[19]	validation_0-auc:0.88354
[20]	validation_0-auc:0.88455
[21]	validation_0-auc:0.88521
[22]	validation_0-auc:0.88635
[23]	validation_0-auc:0.88640
[24]	validation_0-auc:0.88823
[25]	validation_0-auc:0.89001
[26]	validation_0-auc:0.89081
[27]	validation_0-auc:0.89120
[28]	validation_0-auc:0.89193
[29]	validation_0-auc:0.89228
[30]	validation_0-auc:0.89277
[31]	validation_0-auc:0.89317
[32]	validation_0-auc:0.89302
[33]	validation_0-au

[I 2024-11-23 21:16:09,234] Trial 49 finished with value: 0.9240277650277339 and parameters: {'n_estimators': 412, 'learning_rate': 0.08423782308579442, 'max_depth': 11, 'subsample': 0.7665335485258027, 'colsample_bytree': 0.6615597582076272, 'min_child_weight': 7}. Best is trial 28 with value: 0.933148121105479.


Best Parameters (XGBoost): {'n_estimators': 818, 'learning_rate': 0.025519080744339073, 'max_depth': 11, 'subsample': 0.7244069470995078, 'colsample_bytree': 0.5758485018927367, 'min_child_weight': 1}
Best AUC (XGBoost): 0.933148121105479


In [42]:
# Predict on the test set
y_test_pred = final_model_xgb.predict(X_test)

# Function to print metrics
def print_metrics(y_true, y_pred, dataset_name):
    print(f"Metrics for {dataset_name}:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, pos_label=1))  # Change 'Yes' to the positive label in your dataset
    print("Recall:", recall_score(y_true, y_pred, pos_label=1))
    print("F1 Score:", f1_score(y_true, y_pred, pos_label=1))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("\n" + "="*50 + "\n")

# Print metrics for test set
print_metrics(y_test, y_test_pred, "Test Set")

Metrics for Test Set:
Accuracy: 0.7610169491525424
Precision: 0.8344465648854962
Recall: 0.8896236012207528
F1 Score: 0.8611521418020679

Classification Report:
               precision    recall  f1-score   support

           0       0.18      0.12      0.14       394
           1       0.83      0.89      0.86      1966

    accuracy                           0.76      2360
   macro avg       0.51      0.50      0.50      2360
weighted avg       0.72      0.76      0.74      2360

Confusion Matrix:
 [[  47  347]
 [ 217 1749]]




In [34]:
from catboost import CatBoostClassifier

def objective_catboost(trial):
    # Suggest hyperparameters
    params = {
        'iterations': trial.suggest_int('iterations', 500, 2000),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-2, 10.0),
        'bagging_temperature': trial.suggest_uniform('bagging_temperature', 0.0, 1.0),
        'random_strength': trial.suggest_uniform('random_strength', 0.0, 10.0)
    }

    # Stratified K-Fold Cross-Validation
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = []

    for train_idx, val_idx in skf.split(X_train_res, y_train_res):
        X_train_fold, X_val_fold = X_train_res[train_idx], X_train_res[val_idx]
        y_train_fold, y_val_fold = y_train_res[train_idx], y_train_res[val_idx]

        model = CatBoostClassifier(**params, random_state=42, verbose=0)
        model.fit(X_train_fold, y_train_fold, eval_set=(X_val_fold, y_val_fold), early_stopping_rounds=50)
        preds = model.predict_proba(X_val_fold)[:, 1]
        cv_scores.append(roc_auc_score(y_val_fold, preds))

    # Return the average ROC-AUC across folds
    return np.mean(cv_scores)

# Create Optuna study and optimize
study = optuna.create_study(direction='maximize')
study.optimize(objective_catboost, n_trials=20)

# Best parameters and score
print("Best Parameters (CatBoost):", study.best_params)
print("Best AUC (CatBoost):", study.best_value)

# Train final model with best parameters
best_params = study.best_params
final_model_cat = CatBoostClassifier(**best_params, random_state=42, verbose=0)
final_model_cat.fit(X_train_res, y_train_res)


[I 2024-11-23 20:46:08,766] A new study created in memory with name: no-name-24476f0a-c28a-4a6a-bf0f-041a3dc9863c
[I 2024-11-23 20:46:45,799] Trial 0 finished with value: 0.8443000919615413 and parameters: {'iterations': 1404, 'learning_rate': 0.0012617913705019691, 'depth': 9, 'l2_leaf_reg': 0.5348100709718068, 'bagging_temperature': 0.37712282590604496, 'random_strength': 2.8819301600313665}. Best is trial 0 with value: 0.8443000919615413.
[I 2024-11-23 20:47:25,484] Trial 1 finished with value: 0.9380290662569152 and parameters: {'iterations': 951, 'learning_rate': 0.021773980821340393, 'depth': 10, 'l2_leaf_reg': 0.24817360984164608, 'bagging_temperature': 0.11196524383675577, 'random_strength': 2.3833324334438446}. Best is trial 1 with value: 0.9380290662569152.
[I 2024-11-23 20:47:41,303] Trial 2 finished with value: 0.9344781537964562 and parameters: {'iterations': 1398, 'learning_rate': 0.029647311836170177, 'depth': 7, 'l2_leaf_reg': 0.1064095357782081, 'bagging_temperature': 

Best Parameters (CatBoost): {'iterations': 951, 'learning_rate': 0.021773980821340393, 'depth': 10, 'l2_leaf_reg': 0.24817360984164608, 'bagging_temperature': 0.11196524383675577, 'random_strength': 2.3833324334438446}
Best AUC (CatBoost): 0.9380290662569152


<catboost.core.CatBoostClassifier at 0x1776917e0>

In [35]:
# Predict on the test set
y_test_pred = final_model_cat.predict(X_test)

# Function to print metrics
def print_metrics(y_true, y_pred, dataset_name):
    print(f"Metrics for {dataset_name}:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, pos_label=1))  # Change 'Yes' to the positive label in your dataset
    print("Recall:", recall_score(y_true, y_pred, pos_label=1))
    print("F1 Score:", f1_score(y_true, y_pred, pos_label=1))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("\n" + "="*50 + "\n")

# Print metrics for test set
print_metrics(y_test, y_test_pred, "Test Set")

Metrics for Test Set:
Accuracy: 0.760593220338983
Precision: 0.8321479374110953
Recall: 0.8926754832146491
F1 Score: 0.8613496932515338

Classification Report:
               precision    recall  f1-score   support

           0       0.16      0.10      0.12       394
           1       0.83      0.89      0.86      1966

    accuracy                           0.76      2360
   macro avg       0.50      0.50      0.49      2360
weighted avg       0.72      0.76      0.74      2360

Confusion Matrix:
 [[  40  354]
 [ 211 1755]]


