In [1]:
#imports
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
import plotly.io as pio

from helpers.datasetHelper import get_samples, split_healthy_data
from imblearn.over_sampling import SMOTE
from sklearn.metrics import cohen_kappa_score

from helpers.ploting import display_confusion_matrix_pink_variants
from helpers.metaheuristics import run_pso_with_progress, run_ga_with_progress
from models import MyXGboost
import numpy as np
from sklearn.metrics import recall_score, precision_score
from helpers.datasetHelper import split_healthy_data_HxC

# Set the default renderer to 'browser' to ensure plots open in the browser
pio.renderers.default = 'browser'

In [2]:
directory_path_combined = './datasets/Combined'
data_HB_4C = get_samples(os.path.join(directory_path_combined, 'DT.Health_BRCA_4C.csv'))
data_HPB_3C = get_samples(os.path.join(directory_path_combined, 'DT.Consolidated_3C.csv'))

healthy_mt_cases, healthy_wt_cases, brca_mt_cases, brca_wt_cases = split_healthy_data_HxC(data_HB_4C)
healthy_consolidated_cases, pre_brca_consolidated_cases, brca_consolidated_cases = split_healthy_data(data_HPB_3C)

directory_path_binary = './datasets/Binary'
data_HP_2C = get_samples(os.path.join(directory_path_binary, 'Healthy.PRE-BRCA.csv'))
healthy_cases, pre_brca_cases,_ = split_healthy_data(data_HP_2C)


feature_names = np.array(data_HB_4C[0][:-1])

print(f"Healthy with mutation cases: {len(healthy_mt_cases)}")
print(f"Healthy without mutation cases: {len(healthy_wt_cases)}")
print(f"Healthy consolidated cases(adding not specified cases): {(len(healthy_consolidated_cases)) - (len(healthy_mt_cases) + len(healthy_wt_cases))}")

print(f"BRCA with mutation cases: {len(brca_mt_cases)}")
print(f"BRCA without mutation cases: {len(brca_wt_cases)}")
print(f"BRCA consolidated cases: {len(brca_consolidated_cases)}")

print(f"PRE-BRCA cases: {len(pre_brca_cases)}")



Columns (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144) have mixed types. Specify dtype option on import or set low_memory=False.


Columns (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,

Healthy with mutation cases: 22
Healthy without mutation cases: 57
Healthy consolidated cases(adding not specified cases): 512
BRCA with mutation cases: 50
BRCA without mutation cases: 15
BRCA consolidated cases: 65
PRE-BRCA cases: 134



Columns (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,27

In [3]:
healthy_mt_cases = pd.DataFrame(healthy_mt_cases)
healthy_wt_cases = pd.DataFrame(healthy_wt_cases)
healthy_cases = pd.DataFrame(healthy_cases)
healthy_consolidated_cases = pd.DataFrame(healthy_consolidated_cases)
brca_consolidated_cases = pd.DataFrame(brca_consolidated_cases)

pre_brca_cases = pd.DataFrame(pre_brca_cases)

brca_mt_cases = pd.DataFrame(brca_mt_cases)
brca_wt_cases = pd.DataFrame(brca_wt_cases)

healthy_mt_cases['Tag'] = 'HEALTHY-MUT'
healthy_wt_cases['Tag'] = 'HEALTHY-WT'
healthy_consolidated_cases['Tag'] = 'HEALTHY'
pre_brca_cases['Tag'] = 'PRE-BRCA'
brca_mt_cases['Tag'] = 'BRCA-MUT'
brca_wt_cases['Tag'] = 'BRCA-WT'
brca_consolidated_cases['Tag'] = 'BRCA'

print(f"Healthy MT cases shape: {healthy_mt_cases.shape}")
print(f"Healthy WT cases shape: {healthy_wt_cases.shape}")
print(f"Healthy consolidated cases shape: {healthy_consolidated_cases.shape}")

print(f"PRE-BRCA cases shape: {pre_brca_cases.shape}")
print(f"BRCA MT cases shape: {brca_mt_cases.shape}")
print(f"BRCA WT cases shape: {brca_wt_cases.shape}")    
print(f"BRCA consolidated cases shape: {brca_consolidated_cases.shape}")

Healthy MT cases shape: (22, 27579)
Healthy WT cases shape: (57, 27579)
Healthy consolidated cases shape: (591, 27579)
PRE-BRCA cases shape: (134, 27579)
BRCA MT cases shape: (50, 27579)
BRCA WT cases shape: (15, 27579)
BRCA consolidated cases shape: (65, 27579)


In [4]:
df_cancer = pd.concat([healthy_consolidated_cases, brca_consolidated_cases], ignore_index=True) #blood samples
X = df_cancer.iloc[:, :-1].apply(pd.to_numeric, errors='coerce')
Y = df_cancer.iloc[:, -1]

# Fill missing values with the lowest value of its cpg site
X = X.apply(lambda col: col.fillna(col.min()), axis=0)
label_encoder = LabelEncoder()
label_encoder.fit(Y)
Y_encoded = label_encoder.transform(Y)

print("Label indices and names:")
for idx, name in enumerate(label_encoder.classes_):
    print(f"{idx}: {name}")

X_train_all, X_test_all, y_train_all, y_test_all = train_test_split(
    X, Y_encoded, test_size=0.25, random_state=42
)

smote = SMOTE(sampling_strategy='auto', random_state=None, k_neighbors=10)
X_train_all, y_train_all = smote.fit_resample(X_train_all, y_train_all)

selector = MyXGboost.XGBoostBinary()[0].fit(X_train_all, y_train_all)

# Evaluate the model
y_pred = selector.predict(X_test_all)
y_pred_proba = selector.predict_proba(X_test_all)

accuracy = accuracy_score(y_test_all, y_pred)
f1 = f1_score(y_test_all, y_pred, average='weighted')

# Calculate sensitivity (recall) and specificity for each class
sensitivity = recall_score(y_test_all, y_pred, average='macro')

# Specificity is the recall of the negative class
# For multi-class, we calculate specificity for each class and average
specificities = []
for class_idx in range(len(np.unique(y_test_all))):
    true_neg = np.sum((y_test_all != class_idx) & (y_pred != class_idx))
    total_neg = np.sum(y_test_all != class_idx)
    specificities.append(true_neg / total_neg if total_neg > 0 else 0)
specificity = np.mean(specificities)
precision = precision_score(y_test_all, y_pred, average='weighted')

# Handle binary and multiclass cases for ROC AUC
if y_pred_proba.shape[1] == 2:
    roc_auc = roc_auc_score(y_test_all, y_pred_proba[:, 1])
else:
    roc_auc = roc_auc_score(y_test_all, y_pred_proba, multi_class='ovr')

# Compute Kappa index
kappa = cohen_kappa_score(y_test_all, y_pred)
# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")
print(f"Sensitivity (Recall): {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Kappa index: {kappa:.4f}")

# Confusion matrix
display_confusion_matrix_pink_variants(selector, X_test_all, y_test_all, label_encoder.classes_)

Label indices and names:
0: BRCA
1: HEALTHY



`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


Parameters: { "use_label_encoder" } are not used.




Accuracy: 0.8963
F1 Score: 0.9017
ROC AUC: 0.9212
Sensitivity (Recall): 0.8808
Specificity: 0.8808
Precision: 0.9139
Kappa index: 0.6754


In [5]:
def process_dataframes(X, Y):
    n_features = X.shape[1]
    print(f"Loaded dataset with {n_features} features and {len(Y)} samples")

    # Use DecisionTreeClassifier as the estimator
    estimator = MyXGboost.DecisionTreeMultiClass()

    # Run PSO
    best_weights, best_fitness, progress, X_selected = run_pso_with_progress(
        X, Y, estimator, n_features,
        swarmsize=30,
        maxiter=10,
        threshold=0.7
    )

    X_selected_pso = X.iloc[:, X_selected]

    best_weights_ga, best_fitness_ga, progress_ga, X_selected_proc = run_ga_with_progress(
    X, Y, estimator, X.shape[1], 
    pop_size=25, n_generations=10, threshold=0.8
    )

    X_selected_ga = X.iloc[:, X_selected_proc]

    # Use LabelEncoder to encode the target classes
    label_encoder = LabelEncoder()
    label_encoder.fit(Y)
    Y_encoded = label_encoder.transform(Y)
    print("Label indices and names:")
    for idx, name in enumerate(label_encoder.classes_):
        print(f"{idx}: {name}")

    print(f"Encoded target classes: {label_encoder.classes_}")

    # 1) evaluate with all features
    X_train_all, X_test_all, y_train_all, y_test_all = train_test_split(
        X, Y_encoded, test_size=0.1, random_state=42
    )
    # 2) evaluate with selected features
    X_train_ga, X_test_ga, y_train_ga, y_test_ga = train_test_split(
        X_selected_ga, Y_encoded, test_size=0.1, random_state=42
    )
    # 3) evaluate with PSO selected features
    X_train_pso, X_test_pso, y_train_pso, y_test_pso = train_test_split(
        X_selected_pso, Y_encoded, test_size=0.1, random_state=42
    )

    # Apply SMOTE to balance the training instances - ALL
    smote = SMOTE(sampling_strategy='auto', random_state=None, k_neighbors=10)
    X_train_all, y_train_all = smote.fit_resample(X_train_all, y_train_all)

    # Apply SMOTE to balance the training instances - GA
    smote = SMOTE(sampling_strategy='auto', random_state=None, k_neighbors=10)
    X_train_ga, y_train_ga = smote.fit_resample(X_train_ga, y_train_ga)

    # Apply SMOTE to balance the training instances - PSO
    smote = SMOTE(sampling_strategy='auto', random_state=None, k_neighbors=10)
    X_train_pso, y_train_pso = smote.fit_resample(X_train_pso, y_train_pso)

    return [('GA', X_train_ga, X_test_ga, y_train_ga, y_test_ga), 
                   ('PSO', X_train_pso, X_test_pso, y_train_pso, y_test_pso),
                   ('ALL', X_train_all, X_test_all, y_train_all, y_test_all)]

In [6]:
modes_multiclass = [
    {
        'Name': 'Random Forest',
        'Model': MyXGboost.RandomForest300()
    },
    {
        'Name': 'XGBoost',
        'Model': MyXGboost.XGBoostMultiClass()
    },
    {
        'Name': 'Light GBM',
        'Model': MyXGboost.LightGBMMulticlass()
    },
    {
        'Name': 'Gradient Boosting',
        'Model': MyXGboost.GradientBoosting()
    },
    {
        'Name': 'Ada Boost',
        'Model': MyXGboost.AdaBoostMultiClass()
    }
]

modes_binary = [
    {
        'Name': 'Random Forest',
        'Model': MyXGboost.RandomForest300()
    },
    {
        'Name': 'XGBoost',
        'Model': MyXGboost.XGBoostBinary()
    },
    {
        'Name': 'Light GBM',
        'Model': MyXGboost.LightGBMBinary()
    },
    {
        'Name': 'Gradient Boosting',
        'Model': MyXGboost.GradientBoosting()
    },
    {
        'Name': 'Ada Boost',
        'Model': MyXGboost.AdaBoostBinary()
    }
]

In [7]:
def run_models(feature_set, models):
    method, X_train, X_test, y_train, y_test = feature_set
    print(f"\n=== Results for {method} selected features ===")
    results = []
    for m in models:
        # Split training data into train and validation sets for early stopping
        X_train_split, X_val, y_train_split, y_val = train_test_split(
            X_train, y_train, test_size=0.2, random_state=42
        )   

        selector = m['Model'][0].fit(X_train_split, y_train_split)
    
        # Evaluate the model
        y_pred = selector.predict(X_test)
        y_pred_proba = selector.predict_proba(X_test)
        
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        # Calculate sensitivity (recall) and specificity for each class
        sensitivity = recall_score(y_test, y_pred, average='macro')
        
        # Specificity is the recall of the negative class
        # For multi-class, we calculate specificity for each class and average
        specificities = []
        for class_idx in range(len(np.unique(y_test))):
            true_neg = np.sum((y_test != class_idx) & (y_pred != class_idx))
            total_neg = np.sum(y_test != class_idx)
            specificities.append(true_neg / total_neg if total_neg > 0 else 0)
        specificity = np.mean(specificities)
        precision = precision_score(y_test, y_pred, average='weighted')

        # Handle binary and multiclass cases for ROC AUC
        if y_pred_proba.shape[1] == 2:
            roc_auc = roc_auc_score(y_test, y_pred_proba[:, 1])
        else:
            roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')

        # Compute Kappa index
        kappa = cohen_kappa_score(y_test, y_pred)
        

        results.append({
            'Name': m['Name'],
            'Model': f"{method}",
            'Kappa': kappa,
            'Sensitivity': sensitivity,
            'Accuracy': accuracy,
            'F1 Score': f1,
            'ROC AUC': roc_auc,
            'Specificity': specificity,
            'Precision': precision})

    return results

In [None]:

datasets = {
    "HEALTHY-MT-WT-BRCA": pd.concat([healthy_mt_cases, healthy_wt_cases, brca_consolidated_cases], ignore_index=True),
    "HEALTHY-WT-BRCA": pd.concat([healthy_wt_cases, brca_consolidated_cases], ignore_index=True),
    "HEALTHY-MT-BRCA": pd.concat([healthy_mt_cases, brca_mt_cases], ignore_index=True),
    "HEALTHY-BRCA": pd.concat([healthy_consolidated_cases, brca_consolidated_cases], ignore_index=True),
    "HEALTHY-PRE-BRCA": pd.concat([healthy_consolidated_cases, pre_brca_cases], ignore_index=True),
    "HEALTHY-PRE-BRCA-BRCA": pd.concat([healthy_consolidated_cases, pre_brca_cases, brca_consolidated_cases], ignore_index=True),
    "PRE-BRCA-BRCA": pd.concat([pre_brca_cases, brca_consolidated_cases], ignore_index=True),
    "PRE-BRCA-BRCA-MT": pd.concat([pre_brca_cases, brca_mt_cases], ignore_index=True),
    "PRE-BRCA-BRCA-WT": pd.concat([pre_brca_cases, brca_wt_cases], ignore_index=True),
}

for name, df_cancer in datasets.items():
    X = df_cancer.iloc[:, :-1].apply(pd.to_numeric, errors='coerce')
    Y = df_cancer.iloc[:, -1]
    # Fill missing values with the lowest value of its cpg site
    X = X.apply(lambda col: col.fillna(col.min()), axis=0)

    features_set = process_dataframes(X, Y)
    for feature_set in features_set:
        if len(np.unique(feature_set[3])) > 2:
            models_param = modes_multiclass
        else:
            models_param = modes_binary
        result = run_models(feature_set, models_param)
        
        metrics_dict = []
        
        for res in result:
            print(f"Model: {res['Name']} "
                  f"Kappa: {res['Kappa']:.4f}, "
                  f"Sensitivity: {res['Sensitivity']:.4f}, "
                  f"Accuracy: {res['Accuracy']:.4f}, "
                  f"F1 Score: {res['F1 Score']:.4f}, "        
                  f"ROC AUC: {res['ROC AUC']:.4f}, "
                  f"Specificity: {res['Specificity']:.4f}, "
                  f"Precision: {res['Precision']:.4f}")
            
            metrics_dict.append({
            'Dataset': name,
            'Method': res['Name'],
            'Model': f"{res['Model']}",
            'Kappa': res['Kappa'],
            'Accuracy': res['Accuracy'],
            'ROC_AUC': res['ROC AUC'],
            'F1_Score': res['F1 Score'],
            'Sensitivity': res['Sensitivity'],
            'Specificity': res['Specificity'],
            'Precision': res['Precision']
            })
            
        df_metrics = pd.DataFrame(metrics_dict)
        csv_path = f"metrics_consolidated.csv"
        df_metrics.to_csv(csv_path, mode='a', header=not os.path.exists(csv_path), index=False)


Loaded dataset with 27578 features and 144 samples
Eval 10: best fitness so far = 0.8499
Eval 20: best fitness so far = 0.8499
Eval 30: best fitness so far = 0.8499
Eval 40: best fitness so far = 0.8499
Eval 50: best fitness so far = 0.8499
Eval 60: best fitness so far = 0.8499
Eval 70: best fitness so far = 0.8499
Eval 80: best fitness so far = 0.8499
Eval 90: best fitness so far = 0.8448
Eval 100: best fitness so far = 0.8448
Eval 110: best fitness so far = 0.8448
Eval 120: best fitness so far = 0.8448
Eval 130: best fitness so far = 0.8448
Eval 140: best fitness so far = 0.8438
Eval 150: best fitness so far = 0.8438
Eval 160: best fitness so far = 0.8438
Eval 170: best fitness so far = 0.8438
Eval 180: best fitness so far = 0.8438
Eval 190: best fitness so far = 0.8438
Eval 200: best fitness so far = 0.8438
Eval 210: best fitness so far = 0.8438
Eval 220: best fitness so far = 0.8438
Eval 230: best fitness so far = 0.8438
Eval 240: best fitness so far = 0.8438
Eval 250: best fitness


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.




=== Results for GA selected features ===



Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007688 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 267504
[LightGBM] [Info] Number of data points in the train set: 139, number of used features: 5574
[LightGBM] [Info] Start training from score -1.042654
[LightGBM] [Info] Start training from score -1.150284
[LightGBM] [Info] Start training from score -1.105833



Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Model: Random Forest Kappa: 0.4565, Sensitivity: 0.6159, Accuracy: 0.6667, F1 Score: 0.6556, ROC AUC: 0.8665, Specificity: 0.8167, Precision: 0.7238
Model: XGBoost Kappa: 0.5775, Sensitivity: 0.6825, Accuracy: 0.7333, F1 Score: 0.7168, ROC AUC: 0.8277, Specificity: 0.8639, Precision: 0.7270
Model: Light GBM Kappa: 0.5522, Sensitivity: 0.6190, Accuracy: 0.7333, F1 Score: 0.6511, ROC AUC: 0.9398, Specificity: 0.8500, Precision: 0.5881
Model: Gradient Boosting Kappa: 0.6739, Sensitivity: 0.7746, Accuracy: 0.8000, F1 Score: 0.8000, ROC AUC: 0.9424, Specificity: 0.8833, Precision: 0.8167
Model: Ada Boost Kappa: 0.5833, Sensitivity: 0.7460, Accuracy: 0.7333, F1 Score: 0.7275, ROC AUC: 0.9041, Specificity: 0.8583, Precision: 0.7817

=== Results for PSO selected features ===



Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008965 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 334681
[LightGBM] [Info] Number of data points in the train set: 139, number of used features: 6974
[LightGBM] [Info] Start training from score -1.042654
[LightGBM] [Info] Start training from score -1.150284
[LightGBM] [Info] Start training from score -1.105833



Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Model: Random Forest Kappa: 0.3382, Sensitivity: 0.5048, Accuracy: 0.6000, F1 Score: 0.5385, ROC AUC: 0.8277, Specificity: 0.7833, Precision: 0.5000
Model: XGBoost Kappa: 0.3662, Sensitivity: 0.5683, Accuracy: 0.6000, F1 Score: 0.6016, ROC AUC: 0.8502, Specificity: 0.7917, Precision: 0.7215
Model: Light GBM Kappa: 0.2045, Sensitivity: 0.4825, Accuracy: 0.5333, F1 Score: 0.5227, ROC AUC: 0.7565, Specificity: 0.7250, Precision: 0.6000
Model: Gradient Boosting Kappa: -0.0075, Sensitivity: 0.3429, Accuracy: 0.4000, F1 Score: 0.3533, ROC AUC: 0.7141, Specificity: 0.6583, Precision: 0.3179
Model: Ada Boost Kappa: 0.2808, Sensitivity: 0.5651, Accuracy: 0.5333, F1 Score: 0.5333, ROC AUC: 0.6887, Specificity: 0.7556, Precision: 0.5562

=== Results for ALL selected features ===



Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1323480
[LightGBM] [Info] Number of data points in the train set: 139, number of used features: 27578
[LightGBM] [Info] Start training from score -1.042654
[LightGBM] [Info] Start training from score -1.150284
[LightGBM] [Info] Start training from score -1.105833
Model: Random Forest Kappa: 0.5522, Sensitivity: 0.6190, Accuracy: 0.7333, F1 Score: 0.6511, ROC AUC: 0.8876, Specificity: 0.8500, Precision: 0.5881
Model: XGBoost Kappa: 0.4565, Sensitivity: 0.5714, Accuracy: 0.6667, F1 Score: 0.6111, ROC AUC: 0.8278, Specificity: 0.8222, Precision: 0.5714
Model: Light GBM Kappa: 0.6739, Sensitivity: 0.7302, Accuracy: 0.8000, F1 Score: 0.7778, ROC AUC: 0.9696, Specificity: 0.8917, Precision: 0.8381
Model: Gradient Boosting Kappa: 0.4792, Sensitivity: 0.6603, Accuracy: 0.6667, F1 Score: 0.6741, ROC AUC: 0.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.




=== Results for GA selected features ===



Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 45, number of negative: 52
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004886 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 186966
[LightGBM] [Info] Number of data points in the train set: 97, number of used features: 5500
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.463918 -> initscore=-0.144581
[LightGBM] [Info] Start training from score -0.144581
Model: Random Forest Kappa: 0.3299, Sensitivity: 0.7222, Accuracy: 0.6154, F1 Score: 0.6154, ROC AUC: 0.7361, Specificity: 0.7222, Precision: 0.8291
Model: XGBoost Kappa: 0.3299, Sensitivity: 0.7222, Accuracy: 0.6154, F1 Score: 0.6154, ROC AUC: 0.6389, Specificity: 0.7222, Precision: 0.8291
Model: Light GBM Kappa: 0.5517, Sensitivity: 0.8333, Accuracy: 0.7692, F1 Score: 0.7776, ROC AUC: 0.8889, Specificity: 0.8333, Precision: 0.8681
Model: Gradient Boosting Kappa: 0.3659, Sensitivity: 0.7083, Accuracy: 0.6


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 45, number of negative: 52
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005801 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 219744
[LightGBM] [Info] Number of data points in the train set: 97, number of used features: 6464
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.463918 -> initscore=-0.144581
[LightGBM] [Info] Start training from score -0.144581
Model: Random Forest Kappa: 0.3299, Sensitivity: 0.7222, Accuracy: 0.6154, F1 Score: 0.6154, ROC AUC: 0.7222, Specificity: 0.7222, Precision: 0.8291
Model: XGBoost Kappa: 0.3299, Sensitivity: 0.7222, Accuracy: 0.6154, F1 Score: 0.6154, ROC AUC: 0.8333, Specificity: 0.7222, Precision: 0.8291
Model: Light GBM Kappa: 0.3299, Sensitivity: 0.7222, Accuracy: 0.6154, F1 Score: 0.6154, ROC AUC: 0.9444, Specificity: 0.7222, Precision: 0.8291
Model: Gradient Boosting Kappa: 0.2529, Sensitivity: 0.6528, Accuracy: 0.6


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 45, number of negative: 52
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031803 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 937459
[LightGBM] [Info] Number of data points in the train set: 97, number of used features: 27578
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.463918 -> initscore=-0.144581
[LightGBM] [Info] Start training from score -0.144581
Model: Random Forest Kappa: 0.3299, Sensitivity: 0.7222, Accuracy: 0.6154, F1 Score: 0.6154, ROC AUC: 0.9167, Specificity: 0.7222, Precision: 0.8291
Model: XGBoost Kappa: 0.2353, Sensitivity: 0.6667, Accuracy: 0.5385, F1 Score: 0.5220, ROC AUC: 0.8611, Specificity: 0.6667, Precision: 0.8154
Model: Light GBM Kappa: 0.3299, Sensitivity: 0.7222, Accuracy: 0.6154, F1 Score: 0.6154, ROC AUC: 0.5833, Specificity: 0.7222, Precision: 0.8291
Model: Gradient Boosting Kappa: 0.1522, Sensitivity: 0.5972, Accuracy: 0.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.




=== Results for GA selected features ===



Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 37, number of negative: 36
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004171 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 140648
[LightGBM] [Info] Number of data points in the train set: 73, number of used features: 5410
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.506849 -> initscore=0.027399
[LightGBM] [Info] Start training from score 0.027399
Model: Random Forest Kappa: 0.2500, Sensitivity: 0.6250, Accuracy: 0.6250, F1 Score: 0.6190, ROC AUC: 0.6562, Specificity: 0.6250, Precision: 0.6333
Model: XGBoost Kappa: 0.5000, Sensitivity: 0.7500, Accuracy: 0.7500, F1 Score: 0.7333, ROC AUC: 0.8125, Specificity: 0.7500, Precision: 0.8333
Model: Light GBM Kappa: 0.2500, Sensitivity: 0.6250, Accuracy: 0.6250, F1 Score: 0.6190, ROC AUC: 0.6250, Specificity: 0.6250, Precision: 0.6333
Model: Gradient Boosting Kappa: 0.5000, Sensitivity: 0.7500, Accuracy: 0.750


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 37, number of negative: 36
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006035 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 196498
[LightGBM] [Info] Number of data points in the train set: 73, number of used features: 7558
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.506849 -> initscore=0.027399
[LightGBM] [Info] Start training from score 0.027399
Model: Random Forest Kappa: -0.2500, Sensitivity: 0.3750, Accuracy: 0.3750, F1 Score: 0.2727, ROC AUC: 0.6250, Specificity: 0.3750, Precision: 0.2143
Model: XGBoost Kappa: 0.0000, Sensitivity: 0.5000, Accuracy: 0.5000, F1 Score: 0.4667, ROC AUC: 0.6250, Specificity: 0.5000, Precision: 0.5000
Model: Light GBM Kappa: 0.5000, Sensitivity: 0.7500, Accuracy: 0.7500, F1 Score: 0.7333, ROC AUC: 1.0000, Specificity: 0.7500, Precision: 0.8333
Model: Gradient Boosting Kappa: 0.2500, Sensitivity: 0.6250, Accuracy: 0.62


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 37, number of negative: 36
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.026069 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 716964
[LightGBM] [Info] Number of data points in the train set: 73, number of used features: 27578
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.506849 -> initscore=0.027399
[LightGBM] [Info] Start training from score 0.027399
Model: Random Forest Kappa: 0.5000, Sensitivity: 0.7500, Accuracy: 0.7500, F1 Score: 0.7333, ROC AUC: 0.7500, Specificity: 0.7500, Precision: 0.8333
Model: XGBoost Kappa: 0.2500, Sensitivity: 0.6250, Accuracy: 0.6250, F1 Score: 0.6190, ROC AUC: 0.5000, Specificity: 0.6250, Precision: 0.6333
Model: Light GBM Kappa: 0.5000, Sensitivity: 0.7500, Accuracy: 0.7500, F1 Score: 0.7333, ROC AUC: 0.8750, Specificity: 0.7500, Precision: 0.8333
Model: Gradient Boosting Kappa: 0.5000, Sensitivity: 0.7500, Accuracy: 0.75


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.




=== Results for GA selected features ===



Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 420, number of negative: 431
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.044049 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1385160
[LightGBM] [Info] Number of data points in the train set: 851, number of used features: 5432
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.493537 -> initscore=-0.025853
[LightGBM] [Info] Start training from score -0.025853
Model: Random Forest Kappa: 0.5749, Sensitivity: 0.9322, Accuracy: 0.8788, F1 Score: 0.8964, ROC AUC: 0.9407, Specificity: 0.9322, Precision: 0.9434
Model: XGBoost Kappa: 0.4807, Sensitivity: 0.9068, Accuracy: 0.8333, F1 Score: 0.8614, ROC AUC: 0.9419, Specificity: 0.9068, Precision: 0.9352
Model: Light GBM Kappa: 0.6118, Sensitivity: 0.9407, Accuracy: 0.8939, F1 Score: 0.9083, ROC AUC: 0.9322, Specificity: 0.9407, Precision: 0.9470
Model: Gradient Boosting Kappa: 0.5749, Sensitivity: 0.9322, Accuracy:


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 420, number of negative: 431
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.070369 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2203965
[LightGBM] [Info] Number of data points in the train set: 851, number of used features: 8643
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.493537 -> initscore=-0.025853
[LightGBM] [Info] Start training from score -0.025853
Model: Random Forest Kappa: 0.5749, Sensitivity: 0.9322, Accuracy: 0.8788, F1 Score: 0.8964, ROC AUC: 0.9358, Specificity: 0.9322, Precision: 0.9434
Model: XGBoost Kappa: 0.4807, Sensitivity: 0.9068, Accuracy: 0.8333, F1 Score: 0.8614, ROC AUC: 0.9443, Specificity: 0.9068, Precision: 0.9352
Model: Light GBM Kappa: 0.6520, Sensitivity: 0.9492, Accuracy: 0.9091, F1 Score: 0.9203, ROC AUC: 0.9298, Specificity: 0.9492, Precision: 0.9510
Model: Gradient Boosting Kappa: 0.5749, Sensitivity: 0.9322, Accuracy:


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 420, number of negative: 431
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.240645 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7032390
[LightGBM] [Info] Number of data points in the train set: 851, number of used features: 27578
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.493537 -> initscore=-0.025853
[LightGBM] [Info] Start training from score -0.025853
Model: Random Forest Kappa: 0.5749, Sensitivity: 0.9322, Accuracy: 0.8788, F1 Score: 0.8964, ROC AUC: 0.9528, Specificity: 0.9322, Precision: 0.9434
Model: XGBoost Kappa: 0.5097, Sensitivity: 0.9153, Accuracy: 0.8485, F1 Score: 0.8730, ROC AUC: 0.9467, Specificity: 0.9153, Precision: 0.9376
Model: Light GBM Kappa: 0.5746, Sensitivity: 0.8777, Accuracy: 0.8939, F1 Score: 0.9055, ROC AUC: 0.9225, Specificity: 0.8777, Precision: 0.9304
Model: Gradient Boosting Kappa: 0.5360, Sensitivity: 0.8692, Accuracy


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.




=== Results for GA selected features ===



Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 425, number of negative: 423
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042608 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1373940
[LightGBM] [Info] Number of data points in the train set: 848, number of used features: 5388
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501179 -> initscore=0.004717
[LightGBM] [Info] Start training from score 0.004717
Model: Random Forest Kappa: 0.3596, Sensitivity: 0.7520, Accuracy: 0.7534, F1 Score: 0.7811, ROC AUC: 0.8238, Specificity: 0.7520, Precision: 0.8461
Model: XGBoost Kappa: 0.3075, Sensitivity: 0.7104, Accuracy: 0.7397, F1 Score: 0.7677, ROC AUC: 0.8074, Specificity: 0.7104, Precision: 0.8259
Model: Light GBM Kappa: 0.2526, Sensitivity: 0.6516, Accuracy: 0.7534, F1 Score: 0.7717, ROC AUC: 0.8251, Specificity: 0.6516, Precision: 0.7993
Model: Gradient Boosting Kappa: 0.2526, Sensitivity: 0.6516, Accuracy: 0


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 425, number of negative: 423
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.098847 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1951515
[LightGBM] [Info] Number of data points in the train set: 848, number of used features: 7653
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501179 -> initscore=0.004717
[LightGBM] [Info] Start training from score 0.004717
Model: Random Forest Kappa: 0.3075, Sensitivity: 0.7104, Accuracy: 0.7397, F1 Score: 0.7677, ROC AUC: 0.8367, Specificity: 0.7104, Precision: 0.8259
Model: XGBoost Kappa: 0.4357, Sensitivity: 0.8272, Accuracy: 0.7671, F1 Score: 0.7956, ROC AUC: 0.8265, Specificity: 0.8272, Precision: 0.8844
Model: Light GBM Kappa: 0.3356, Sensitivity: 0.7015, Accuracy: 0.7808, F1 Score: 0.7971, ROC AUC: 0.8251, Specificity: 0.7015, Precision: 0.8236
Model: Gradient Boosting Kappa: 0.3132, Sensitivity: 0.6933, Accuracy: 0


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 425, number of negative: 423
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.731702 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7032390
[LightGBM] [Info] Number of data points in the train set: 848, number of used features: 27578
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501179 -> initscore=0.004717
[LightGBM] [Info] Start training from score 0.004717
Model: Random Forest Kappa: 0.3398, Sensitivity: 0.7439, Accuracy: 0.7397, F1 Score: 0.7699, ROC AUC: 0.8149, Specificity: 0.7439, Precision: 0.8426
Model: XGBoost Kappa: 0.3888, Sensitivity: 0.7855, Accuracy: 0.7534, F1 Score: 0.7829, ROC AUC: 0.8210, Specificity: 0.7855, Precision: 0.8633
Model: Light GBM Kappa: 0.2920, Sensitivity: 0.6851, Accuracy: 0.7534, F1 Score: 0.7756, ROC AUC: 0.8497, Specificity: 0.6851, Precision: 0.8143
Model: Gradient Boosting Kappa: 0.2920, Sensitivity: 0.6851, Accuracy: 


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.




=== Results for GA selected features ===



Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.049211 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1432845
[LightGBM] [Info] Number of data points in the train set: 1267, number of used features: 5619
[LightGBM] [Info] Start training from score -1.082950
[LightGBM] [Info] Start training from score -1.116129
[LightGBM] [Info] Start training from score -1.097035
Model: Random Forest Kappa: 0.5434, Sensitivity: 0.8290, Accuracy: 0.7975, F1 Score: 0.8143, ROC AUC: 0.9211, Specificity: 0.8913, Precision: 0.8675
Model: XGBoost Kappa: 0.4733, Sensitivity: 0.8639, Accuracy: 0.7342, F1 Score: 0.7601, ROC AUC: 0.9081, Specificity: 0.8835, Precision: 0.8661
Model: Light GBM Kappa: 0.4399, Sensitivity: 0.7236, Accuracy: 0.7848, F1 Score: 0.7945, ROC AUC: 0.9073, Specificity: 0.8223, Precision: 0.8108
Model: Gradient Boosting Kappa: 0.4304, Sensitivity: 0.7578, Accuracy: 0.7468, F1 Score: 0.7680, ROC AUC: 0.


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.066021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1928310
[LightGBM] [Info] Number of data points in the train set: 1267, number of used features: 7562
[LightGBM] [Info] Start training from score -1.082950
[LightGBM] [Info] Start training from score -1.116129
[LightGBM] [Info] Start training from score -1.097035
Model: Random Forest Kappa: 0.4863, Sensitivity: 0.7934, Accuracy: 0.7722, F1 Score: 0.7914, ROC AUC: 0.9196, Specificity: 0.8656, Precision: 0.8449
Model: XGBoost Kappa: 0.4543, Sensitivity: 0.8025, Accuracy: 0.7342, F1 Score: 0.7595, ROC AUC: 0.9084, Specificity: 0.8676, Precision: 0.8487
Model: Light GBM Kappa: 0.3211, Sensitivity: 0.6722, Accuracy: 0.7215, F1 Score: 0.7393, ROC AUC: 0.8793, Specificity: 0.7822, Precision: 0.7711
Model: Gradient Boosting Kappa: 0.3401, Sensitivity: 0.6774, Accuracy: 0.7342, F1 Score: 0.7486, ROC AUC: 0.


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.293199 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7032390
[LightGBM] [Info] Number of data points in the train set: 1267, number of used features: 27578
[LightGBM] [Info] Start training from score -1.082950
[LightGBM] [Info] Start training from score -1.116129
[LightGBM] [Info] Start training from score -1.097035
Model: Random Forest Kappa: 0.5059, Sensitivity: 0.8184, Accuracy: 0.7722, F1 Score: 0.7925, ROC AUC: 0.9205, Specificity: 0.8819, Precision: 0.8594
Model: XGBoost Kappa: 0.4532, Sensitivity: 0.8025, Accuracy: 0.7342, F1 Score: 0.7596, ROC AUC: 0.8998, Specificity: 0.8672, Precision: 0.8489
Model: Light GBM Kappa: 0.5039, Sensitivity: 0.7426, Accuracy: 0.7975, F1 Score: 0.8103, ROC AUC: 0.9061, Specificity: 0.8590, Precision: 0.8407
Model: Gradient Boosting Kappa: 0.3954, Sensitivity: 0.6933, Accuracy: 0.7722, F1 Score: 0.7793, ROC AUC: 0


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.




=== Results for GA selected features ===



Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 94, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009806 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 360928
[LightGBM] [Info] Number of data points in the train set: 192, number of used features: 5553
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.489583 -> initscore=-0.041673
[LightGBM] [Info] Start training from score -0.041673
Model: Random Forest Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: XGBoost Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Light GBM Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Gradient Boosting Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 94, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014784 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 535645
[LightGBM] [Info] Number of data points in the train set: 192, number of used features: 8241
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.489583 -> initscore=-0.041673
[LightGBM] [Info] Start training from score -0.041673
Model: Random Forest Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: XGBoost Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Light GBM Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Gradient Boosting Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 94, number of negative: 98
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056877 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1792494
[LightGBM] [Info] Number of data points in the train set: 192, number of used features: 27578
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.489583 -> initscore=-0.041673
[LightGBM] [Info] Start training from score -0.041673
Model: Random Forest Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: XGBoost Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Light GBM Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Gradient Boosting Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.




=== Results for GA selected features ===



Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 95, number of negative: 95
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008818 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 353091
[LightGBM] [Info] Number of data points in the train set: 190, number of used features: 5438
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Model: Random Forest Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: XGBoost Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Light GBM Kappa: 0.8257, Sensitivity: 0.8750, Accuracy: 0.9474, F1 Score: 0.9445, ROC AUC: 0.8750, Specificity: 0.8750, Precision: 0.9507
Model: Gradient Boosting Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 95, number of negative: 95
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014835 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547471
[LightGBM] [Info] Number of data points in the train set: 190, number of used features: 8432
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Model: Random Forest Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: XGBoost Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Light GBM Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Gradient Boosting Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 95, number of negative: 95
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058256 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1790592
[LightGBM] [Info] Number of data points in the train set: 190, number of used features: 27578
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Model: Random Forest Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: XGBoost Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Light GBM Kappa: 0.8550, Sensitivity: 0.9667, Accuracy: 0.9474, F1 Score: 0.9494, ROC AUC: 0.9500, Specificity: 0.9667, Precision: 0.9579
Model: Gradient Boosting Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity:


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.




=== Results for GA selected features ===



Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 97, number of negative: 95
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009005 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 355346
[LightGBM] [Info] Number of data points in the train set: 192, number of used features: 5467
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.505208 -> initscore=0.020834
[LightGBM] [Info] Start training from score 0.020834
Model: Random Forest Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: XGBoost Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Light GBM Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Gradient Boosting Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.00


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 97, number of negative: 95
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.015671 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 552468
[LightGBM] [Info] Number of data points in the train set: 192, number of used features: 8500
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.505208 -> initscore=0.020834
[LightGBM] [Info] Start training from score 0.020834
Model: Random Forest Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: XGBoost Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Light GBM Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Gradient Boosting Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.00


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Number of positive: 97, number of negative: 95
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055749 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1792486
[LightGBM] [Info] Number of data points in the train set: 192, number of used features: 27578
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.505208 -> initscore=0.020834
[LightGBM] [Info] Start training from score 0.020834



Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Model: Random Forest Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: XGBoost Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Light GBM Kappa: 0.0000, Sensitivity: 0.5000, Accuracy: 0.9333, F1 Score: 0.9011, ROC AUC: 0.5000, Specificity: 0.5000, Precision: 0.8711
Model: Gradient Boosting Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
Model: Ada Boost Kappa: 1.0000, Sensitivity: 1.0000, Accuracy: 1.0000, F1 Score: 1.0000, ROC AUC: 1.0000, Specificity: 1.0000, Precision: 1.0000
