# Pipeline for feature importance

The idea of this pipeline is to select significant features from a list of features. Presented with a series of rules for feature evaluation the pipeline will run through all of these and record the performance of the models and the importance of features in determining the decisions of the models. Then we should be able to evaluate that data to decide what to include in our model for general prediction. It could be that from each group of features one is particularly important it could also be that there is interaction between groups that makes this significant.

In [170]:
import pandas as pd
import numpy as np
from os import path
from sklearn.linear_model import RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.gaussian_process.kernels import RBF
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_log_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    average_precision_score,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score,
    accuracy_score,
)
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import shap
from sklearn.preprocessing import StandardScaler
import regex as re

## Load and Process Data

In [171]:
data_file = path.join("..", "data", "zoonosis_dataset_full.csv")
target_column = "label"

In [172]:
def prepare_dataframe_for_ml(df, target_column=None, one_hot_encode=True):
    """
    Prepare a pandas DataFrame for machine learning algorithms.
    - Normalizes numerical features
    - Optionally one-hot encodes categorical features
    - Optionally separates target variable

    Parameters:
    -----------
    df : pandas.DataFrame
        The input DataFrame to prepare
    target_column : str, optional
        Name of the target column to separate
    one_hot_encode : bool, optional
        Whether to one-hot encode categorical features

    Returns:
    --------
    df_processed: pandas.DataFrame
        The processed DataFrame
    """

    # Create a copy of the dataframe to avoid modifying the original
    df_processed = df.copy()

    # Separate target if specified
    y = None
    if target_column and target_column in df_processed.columns:
        y = df_processed[target_column].replace({"nz": 0, "hzoon": 1})
        df_processed = df_processed.drop(columns=[target_column])

    # Identify numerical and categorical columns
    numerical_cols = df_processed.select_dtypes(
        include=["int64", "float64"]
    ).columns.tolist()
    categorical_cols = df_processed.select_dtypes(
        include=["object", "category", "bool"]
    ).columns.tolist()

    # Handle missing values
    df_processed[numerical_cols] = df_processed[numerical_cols].fillna(
        df_processed[numerical_cols].median()
    )
    for col in categorical_cols:
        df_processed[col] = df_processed[col].fillna(df_processed[col].mode()[0])

    # Normalize numerical features
    if numerical_cols:
        scaler = StandardScaler()
        df_processed[numerical_cols] = scaler.fit_transform(
            df_processed[numerical_cols]
        )

    # One-hot encode categorical features
    if categorical_cols and one_hot_encode:
        df_processed = pd.get_dummies(
            df_processed, columns=categorical_cols, drop_first=False
        )

    # If we have a target column, add it back to the processed dataframe
    if target_column and y is not None:
        df_processed[target_column] = y

    return df_processed

In [173]:
data = pd.read_csv(data_file)
processed_data = prepare_dataframe_for_ml(
    data, target_column=target_column
)
y = processed_data["label"]
X = processed_data.drop(columns=["label"])
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

  y = df_processed[target_column].replace({"nz": 0, "hzoon": 1})


## Dataset Splitting Rules

## Model Definitions

In [174]:
def rmsle(y_true, y_pred):
    return np.sqrt(mean_squared_log_error(np.expm1(y_true), np.expm1(y_pred)))

In [175]:
models = [
    ("XGBoost", XGBClassifier(enable_categorical=True)),
    ("Random Forest", RandomForestClassifier()),
    ("Ridge Classifier", RidgeClassifier()),
    ("Decision Tree", DecisionTreeClassifier()),
    ("Support Vector Classification", SVC()),
    ("LightGBM", LGBMClassifier()),
    ("KNN", KNeighborsClassifier(5, weights="uniform")),
    ("Naive Bayes", GaussianNB()),
    ("Neural Network", MLPClassifier()),
    ("Quadratic Discriminant Analysis", QuadraticDiscriminantAnalysis()),
]

In [176]:
def get_feature_importance(model, X):
    try:
        mdi_importances = pd.Series(
            model.feature_importances_, index=X.columns
        ).sort_values(ascending=True)
        return mdi_importances
    except AttributeError:
        pass


def get_permutation_importance(model, X, y):
    result = permutation_importance(model, X, y, n_repeats=10, random_state=42)
    # Create a Series with feature names and their mean importances
    importances = pd.Series(result.importances_mean, index=X.columns)
    # Sort importances from most to least important
    sorted_importances = importances.sort_values(ascending=False)
    return sorted_importances

In [None]:
def get_results_all_models(models, X_train, X_test, y_train, y_test):
    results = {}
    for name, model in models:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        average_precision = average_precision_score(y_test, y_pred)
        feature_importance = get_feature_importance(model, X_test)
        permutation_importance = get_permutation_importance(model, X_test, y_test)
        most_important_permutation = permutation_importance.index[0]
        most_important_permutation = None
        try:
            most_important_feature = feature_importance.index[-1]
        except AttributeError:
            most_important_feature = None
        results[name] = {
            "accuracy": accuracy,
            "roc_auc": roc_auc,
            "precision": precision,
            "recall": recall,
            "f1": f1,
            "average_precision": average_precision,
            "feature_importance": feature_importance,
            "most_important_feature": most_important_feature,
            "permutation_importance": permutation_importance,
            "most_important_permutation": most_important_permutation,
            "columns": ",".join(X_test.columns.to_list()),
        }
    return results

In [178]:
def save_results(results, filename):
    results_to_be_saved = pd.DataFrame.from_dict(data=results, orient="index")
    results_to_be_saved.to_csv(path.join("..", "model_comparison_data", filename))

## Pipeline Level Functions

In [179]:
def convert_results_list_to_dataframe(results):
    results_df = pd.DataFrame.from_dict(data=results, orient="index")
    # data_frame = data_frame.append(results_df, ignore_index=True)
    pass

In [180]:
def get_new_test_train_sets(X, y, columns_to_include, test_size=0.2, random_state=42):
    X_dropped = X[columns_to_include]
    X_train, X_test, y_train, y_test = train_test_split(
        X_dropped, y, test_size=test_size, random_state=random_state
    )
    return X_train, X_test, y_train, y_test

In [181]:
def run_pipeline(rules, models, X, y):
    rule_results = {}
    for rule in rules:
        rule_name = rule
        columns_to_include = rules[rule]
        X_train, X_test, y_train, y_test = get_new_test_train_sets(
            X, y, columns_to_include
        )
        results = get_results_all_models(models, X_train, X_test, y_train, y_test)
        rule_results[rule_name] = results
    return rule_results

In [182]:
def extract_columns_from_rule(rule, columns):
    rule_columns = [c for c in columns if re.fullmatch(rule, c)]
    return rule_columns

In [183]:
def get_rules_dict(rules, columns):
    rules_dict = {}
    for rule in rules:
        rule_name = rule
        rule_columns = extract_columns_from_rule(rule, columns)
        rules_dict[rule_name] = rule_columns
    return rules_dict

## Pipeline

In [184]:
genes = [r"HA", r"NA", r"M1", r"NS1", r"NP", r"PA", r"PB1", r"PB2"]
prefix = [
    r"\b([CTGA]){2}_",
    r"\b([CTGA]){3}_",
    r"\b([CTGA]){4}_",
    r"\b([CTGA]){5}_",
    r"\b([CTGA]){6}_",
    r"\bDPC_.*_",
    r"\bCTDC_.*_",
    r"\bCTDD_.*_",
    r"\bCTDT_.*_",
    r"\bCTriad_.*_",
    r"\bPAAC_.*_",
]
rules = [p + g for p in prefix for g in genes]

In [185]:
rules_dict = get_rules_dict(rules, X.columns)

In [186]:

all_rule_results = run_pipeline(rules_dict, models, X, y)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000112 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000118 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2549
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000109 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2404
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2490
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000628 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2513
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000126 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2531
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000123 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2549
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2538
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000504 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000288 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000292 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2303
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000111 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2441
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2455
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000107 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2520
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000300 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2527
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000124 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2484
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000374 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2549
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000109 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2547
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000536 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1685
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000294 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1823
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000299 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2373
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001894 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2465
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000444 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2419
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000105 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2377
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000354 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1958
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000112 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1965
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000293 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 903
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000533 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 877
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000808 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1635
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000077 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2065
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000520 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2314
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000387 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2041
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000486 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 918
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000587 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1028
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000212 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 250
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000400 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 306
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000933 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 568
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000275 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1156
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000757 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 912
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000938 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 656
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366




[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000971 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1046
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366




[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000201 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 848
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000722 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 111
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
 

[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000263 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 249
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366




[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000628 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 190
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000480 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 211
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366




[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000205 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 300
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366




[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000754 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 329
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366
[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2544
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000122 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2533
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 379
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000108 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1667
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000061 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 696
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000139 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1303
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000282 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1201
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000549 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1273
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000113 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1882
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000338 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1982
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366




[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 394
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000330 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1468
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 655
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000107 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1035
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000117 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1031
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000223 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1016
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000125 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2531
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000099 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2545
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366




[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000418 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 403
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366
[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000115 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1933
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightG

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000105 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 877
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366
[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000418 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1436
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightG

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000110 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1521
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000096 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1454
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366
[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000242 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 281
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 9
[LightGBM] [Info] [binar



[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000294 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 320
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 57
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000311 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) f

[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000488 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 139
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) f

[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000198 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 155
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366




[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000330 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 110
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000199 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 177
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366




[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000220 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000122 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000230 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1829
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366




[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000230 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000933 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2544
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366
[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000222 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[Light

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000122 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2549
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.032066 -> initscore=-3.407366
[LightGBM] [Info] Start training from score -3.407366
[LightGBM] [Info] Number of positive: 501, number of negative: 15123
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000257 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 15624, number of used features: 10
[LightGBM] [Info] [bin

In [187]:
def flatten_dict(nested_dict):
    res = {}
    if isinstance(nested_dict, dict):
        for k in nested_dict:
            flattened_dict = flatten_dict(nested_dict[k])
            for key, val in flattened_dict.items():
                key = list(key)
                key.insert(0, k)
                res[tuple(key)] = val
    else:
        res[()] = nested_dict
    return res


def nested_dict_to_df(values_dict):
    flat_dict = flatten_dict(values_dict)
    df = pd.DataFrame.from_dict(flat_dict, orient="index")
    df.index = pd.MultiIndex.from_tuples(df.index)
    df = df.unstack(level=-1)
    df.columns = df.columns.map("{0[1]}".format)
    return df

In [188]:
output_dataframe = nested_dict_to_df(all_rule_results)
output_dataframe.index.set_names(["Rules", "Models"], inplace=True)

In [189]:
output_dataframe.to_csv(path.join("..", "model_comparison_data", "gene_division_rules_results.csv"))

In [192]:
output_dataframe.to_excel(path.join("..", "model_comparison_data", "gene_division_rules_results.xlsx"))