In [1]:
import os

import numpy as np
import pandas as pd
import optuna
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss

from data import *


SEED = 662
DATA_DIR = "processed"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import warnings

warnings.filterwarnings("ignore")

In [3]:
from itertools import combinations
from pyexpat import model
import traceback

import lightgbm as lgb
from sklearn.ensemble import StackingClassifier
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsTransformer
from sklearn.pipeline import Pipeline

from models import *


def load_objective(
    X: pd.DataFrame,
    y: pd.DataFrame,
    y_binarized: pd.DataFrame,
    get_trained_model: callable,
):
    X.replace(-999, np.nan, inplace=True)

    def objective(trial: optuna.Trial):
        chi2_threshold = trial.suggest_int("chi2_threshold", 2, 100)
        cv = 3
        imputer_strategy = trial.suggest_categorical(
            "imputer_strategy", ["mean", "median", "most_frequent"]
        )

        train_losses, valid_losses, valid_loss_shift, n_iters = [], [], [], []
        X_train, X_valid, y_train, y_valid, y_train_binarized, y_valid_binarized = (
            train_test_split(X, y, y_binarized, test_size=1 / cv, random_state=SEED)
        )  # lazy, just do 1 round of cv

        # compute pov means aggregated by psu
        aggregated_pov_train_data = pd.concat(
            [X_train["psu_hh_idcode"], y_train], axis=1
        )
        aggregated_pov_train_data[["psu", "hh", "idcode"]] = aggregated_pov_train_data[
            "psu_hh_idcode"
        ].str.split("_", expand=True)
        df_mean = aggregated_pov_train_data.groupby("psu")["num_pov"].mean()
        X_train_means = pd.merge(
            aggregated_pov_train_data,
            df_mean,
            on="psu",
            how="left",
            suffixes=("", "_mean"),
        )["num_pov_mean"]
        X_train_means.replace(np.nan, X_train_means.mean(), inplace=True)

        X_valid[["psu", "hh", "idcode"]] = X_valid["psu_hh_idcode"].str.split(
            "_", expand=True
        )
        X_valid_means = pd.merge(
            X_valid,
            df_mean,
            on="psu",
            how="left",
            suffixes=("", "_mean"),
        )["num_pov"]
        X_valid_means.replace(np.nan, X_valid_means.mean(), inplace=True)
        X_valid_means.rename("num_pov_mean", inplace=True)

        preprocessor = get_preprocessor(
            # ordinal_transformer=Pipeline([("imputer", "passthrough")]),
            imputer_strategy=[
                "most_frequent",
                "most_frequent",
                imputer_strategy,
                imputer_strategy,
            ],
            remainder="drop",
        )

        X_train_processed = preprocessor.fit_transform(X_train)
        selector = SelectPercentile(chi2, percentile=chi2_threshold)
        X_train = selector.fit_transform(X_train_processed, y_train)

        X_valid = preprocessor.transform(X_valid)
        X_valid = selector.transform(X_valid)

        # add pov means to the data
        X_train = np.column_stack([X_train, X_train_means])
        X_valid = np.column_stack([X_valid, X_valid_means])

        # add knn means too
        p = trial.suggest_int("p", 1, 3)
        k_neighbors = trial.suggest_int("k_neighbors", 2, 15)
        transformer = KNeighborsTransformer(
            mode="connectivity", n_neighbors=k_neighbors, p=p
        )
        X_dist_graph = transformer.fit_transform(X_train)
        X_dist_graph.setdiag(0)

        knn_train_means = np.divide(
            (X_dist_graph @ y_train.T), np.asarray(X_dist_graph.sum(axis=1)).flatten()
        )
        np.nan_to_num(knn_train_means, nan=np.nanmean(knn_train_means), copy=False)
        X_train = np.hstack([X_train, knn_train_means.reshape(-1, 1)])
        knn_valid_means = (
            (transformer.kneighbors_graph(X_valid) @ y_train.T) / k_neighbors
        ).reshape(-1, 1)
        np.nan_to_num(knn_valid_means, nan=np.nanmean(knn_train_means), copy=False)
        X_valid = np.hstack([X_valid, knn_valid_means])

        model = get_trained_model(
            trial, seed=SEED, X_train=X_train, y_train=y_train - 1
        )
        # train_losses.append(
        #     log_loss(y_train_binarized, model.predict_proba(X_train), normalize=False)
        #     / len(y_train_binarized)
        # )

        y_pred = model.predict_proba(X_valid)
        for column_index in [
            column for column in range(10) if column not in model.classes_
        ]:
            y_pred = np.insert(y_pred, column_index - 1, 0, axis=1)

        valid_losses.append(
            log_loss(y_valid_binarized, y_pred, normalize=False)
            / len(y_valid_binarized)
        )
        # valid_loss_shift.append(valid_losses[-1] - train_losses[-1])
        # n_iters.append(model.n_iter_[0])

        mean_valid_loss = np.mean(valid_losses)
        trial.set_user_attr(
            "selected_columns", X_train_processed.columns[selector.get_support()]
        )
        # trial.set_user_attr("n_iter", np.mean(n_iters))
        # trial.set_user_attr("train_loss", np.mean(train_losses))
        # trial.set_user_attr("valid_loss_shift", np.mean(valid_loss_shift))
        return mean_valid_loss

    return objective


models = {
    "naive_bayes": suggest_naive_bayes,
    "knn": suggest_knn_classifier,
    "lr": suggest_logistic_regression,
    "linear_svc": suggest_linear_svc,
    "kernel_svc": suggest_kernel_svc,
    "random_forest": suggest_random_forest,
    # "gradient_boosting": (get_gradient_boosting, 200),
    # "lightgbm": (get_lightgbm, 200),
    # "mlp": (get_mlp, 200),
}


optuna.logging.set_verbosity(optuna.logging.ERROR)
# data = get_divided_edu(
#     remove_boring_columns(transform_all_house(combined_transformed_train_with_num_pov))
# )
# poi = data[1]
data = remove_boring_columns(
    transform_education_levels(
        transform_all_house(combined_transformed_train_with_num_pov)
    )
)

poi = data
nunique = poi.nunique()
cols_to_drop = nunique[nunique == 1].index
poi = poi.drop(cols_to_drop, axis=1)
poi = poi.drop(columns=["edu_q09", "edu_q10", "edu_q11", "edu_q12", "edu_q13"])
print(poi.shape)
for choices in range(2, 4):
    for index, model_names in enumerate(combinations(models.keys(), choices)):
        best_values, best_params = [], []
        print(model_names)

        def get_trained_model(trial, seed, X_train, y_train):
            estimators = [
                (model_name, models[model_name](trial, max_iter=MAX_ITER, seed=seed))
                for model_name in model_names
            ]
            clf = StackingClassifier(
                estimators=estimators, final_estimator=LogisticRegression(max_iter=200)
            ).fit(X_train, y_train)
            return clf

        # if True:
        try:
            X, y, y_binarized = (
                poi.drop(Y_COLUMNS + ["num_pov"], axis=1),
                poi["num_pov"],
                poi[Y_COLUMNS],
            )
            study = optuna.create_study(direction="minimize")
            study.optimize(
                load_objective(X, y, y_binarized, get_trained_model),
                n_trials=100,
                n_jobs=-1,
                show_progress_bar=True,
            )
            best_values.append(study.best_value)
            best_params.append(study.best_params)
            print(optuna.importance.get_param_importances(study))
            print(study.best_trial.user_attrs)
        except Exception as e:
            print(traceback.format_exc())
        print(best_values), print(best_params)
        print("=====================================")
        print()
        # break

(5337, 83)
('naive_bayes', 'knn')


Best trial: 35. Best value: 1.77866: 100%|██████████| 100/100 [00:20<00:00,  4.81it/s]


{'chi2_threshold': 0.5227829752564205, 'n_neighbors': 0.3563907129207518, 'leaf_size': 0.053998449100407034, 'k_neighbors': 0.03883109228281512, 'p': 0.010077626862610694, 'algorithm': 0.00990479912274357, 'weights': 0.00477508354562651, 'imputer_strategy': 0.003239260908624691}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q43_2.0',
       'binary__edu_q45_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infrequent_sklearn',
       'categorical__edu_q42_infrequent_sklearn',
 

Best trial: 50. Best value: 1.74768: 100%|██████████| 100/100 [09:35<00:00,  5.76s/it]


{'chi2_threshold': 0.9734007031396735, 'l1_ratio': 0.01738177031154062, 'k_neighbors': 0.004793642015515371, 'imputer_strategy': 0.00167908533964254, 'C': 0.0015182236954004763, 'p': 0.0012265754982274848}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infrequent_sklearn',
       'categorical__edu_q42_infrequent_sklearn',
       'categorical__edu_q63_infrequent_sklea

Best trial: 99. Best value: 1.72689: 100%|██████████| 100/100 [11:00<00:00,  6.60s/it]


{'intercept_scaling': 0.3606702888520495, 'chi2_threshold': 0.3020745409506756, 'penalty': 0.14279696821441865, 'p': 0.09908621519438018, 'k_neighbors': 0.07386933165418796, 'calibration_method': 0.01389035116815888, 'imputer_strategy': 0.006180688897695257, 'C': 0.001431615068433869}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q19_2.0', 'binary__edu_q25_2.0',
       'binary__edu_q32_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'binary__edu_q46_2.0', 'binary__edu_q50_2.0', 'binary__edu_q57_2.0',
       'binary__edu_q61_2.0', 'binary__edu_q64_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categ

Best trial: 49. Best value: 1.73307: 100%|██████████| 100/100 [01:57<00:00,  1.17s/it]


{'kernel': 0.3397077069006071, 'chi2_threshold': 0.32281397571988835, 'k_neighbors': 0.18928617081547253, 'C': 0.055999294511415595, 'p': 0.05573873016562463, 'imputer_strategy': 0.03645412188699162}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infrequent_sklearn',
       'categorical__edu_q42_infrequent_sklearn',
       'categorical__edu_q63_infrequent_sklearn', '

Best trial: 49. Best value: 1.72909: 100%|██████████| 100/100 [04:37<00:00,  2.78s/it]


{'chi2_threshold': 0.40511733155058655, 'max_features': 0.20342460762286152, 'k_neighbors': 0.13177611929865987, 'imputer_strategy': 0.10310355981203673, 'max_depth': 0.07953499241294766, 'n_estimators': 0.03494474837779452, 'min_samples_split': 0.015734063831317452, 'min_samples_leaf': 0.012185780941924748, 'p': 0.00839315142082783, 'criterion': 0.005785644731043307}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infr

Best trial: 81. Best value: 1.75544: 100%|██████████| 100/100 [09:31<00:00,  5.72s/it]


{'chi2_threshold': 0.9671062142094942, 'imputer_strategy': 0.007024641413715986, 'l1_ratio': 0.006664679918391082, 'leaf_size': 0.004190078219596662, 'p': 0.0039509312797308856, 'algorithm': 0.003494266138029674, 'C': 0.0025561859950915585, 'n_neighbors': 0.0018704524812977995, 'weights': 0.0018382872361286894, 'k_neighbors': 0.0013042631085234898}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infrequent_sklearn',
   

Best trial: 84. Best value: 1.74167: 100%|██████████| 100/100 [03:37<00:00,  2.18s/it]


{'chi2_threshold': 0.685684085372749, 'k_neighbors': 0.11449462409464503, 'algorithm': 0.06258938646181825, 'n_neighbors': 0.04193749490703517, 'weights': 0.035460420429122896, 'imputer_strategy': 0.017516738981125643, 'leaf_size': 0.015947780000605958, 'intercept_scaling': 0.015550032547302707, 'penalty': 0.00751559028543224, 'p': 0.002280245090576656, 'calibration_method': 0.000778990561700695, 'C': 0.00024461126788569897}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0', 'binary__edu_q61_2.0',
       'binary__edu_q64_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
     

Best trial: 7. Best value: 1.74608: 100%|██████████| 100/100 [01:43<00:00,  1.03s/it]


{'chi2_threshold': 0.9050413874541365, 'leaf_size': 0.0198754593515891, 'p': 0.01713682776174915, 'algorithm': 0.01606363742798086, 'kernel': 0.013739038209407285, 'k_neighbors': 0.00997082431792727, 'n_neighbors': 0.009619410889958931, 'imputer_strategy': 0.004754013238810529, 'weights': 0.0032588331212021586, 'C': 0.0005405682272382755}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infrequent_sklearn',
       'categ

Best trial: 89. Best value: 1.74593: 100%|██████████| 100/100 [03:57<00:00,  2.38s/it]


{'chi2_threshold': 0.8328418408176703, 'max_depth': 0.05926077145818906, 'algorithm': 0.027842262525653114, 'min_samples_leaf': 0.022739815496729997, 'imputer_strategy': 0.021082078131289365, 'k_neighbors': 0.011050816514798043, 'max_features': 0.008381924488423698, 'n_estimators': 0.004600836675663303, 'n_neighbors': 0.003153758351237637, 'leaf_size': 0.003103671016304917, 'weights': 0.0023035927413097095, 'min_samples_split': 0.0019184002597321846, 'criterion': 0.0014602605953888592, 'p': 0.00025997092760994013}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'cate

Best trial: 93. Best value: 1.747: 100%|██████████| 100/100 [16:30<00:00,  9.91s/it]


{'k_neighbors': 0.46148177652975353, 'chi2_threshold': 0.4389619263163284, 'l1_ratio': 0.026068397505817572, 'calibration_method': 0.021974559456936344, 'imputer_strategy': 0.020182477981253773, 'p': 0.013873730465532723, 'C': 0.008520536717978379, 'penalty': 0.007599936262452729, 'intercept_scaling': 0.001336658763946398}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0', 'binary__edu_q46_2.0',
       'binary__edu_q50_2.0', 'binary__edu_q57_2.0', 'binary__edu_q61_2.0',
       'binary__edu_q64_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0

Best trial: 25. Best value: 1.74249: 100%|██████████| 100/100 [12:26<00:00,  7.46s/it]


{'chi2_threshold': 0.5337915832514449, 'kernel': 0.239925547062567, 'l1_ratio': 0.1056029873600533, 'k_neighbors': 0.05658152211570952, 'p': 0.046084416853620025, 'C': 0.014173704556162346, 'imputer_strategy': 0.0038402388004428515}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q43_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infrequent_sklearn',
       'categorical__edu_q42_infrequent_sklearn',
       'categorical__edu_q63_infrequent_sklearn', 'numerical__house_q05y',
       'numerical__

Best trial: 97. Best value: 1.72955: 100%|██████████| 100/100 [04:46<00:00,  2.87s/it]


{'chi2_threshold': 0.3984012242780856, 'max_features': 0.3941328343417425, 'max_depth': 0.046821042045964135, 'n_estimators': 0.034164310589748545, 'min_samples_leaf': 0.0332028715662743, 'min_samples_split': 0.022214041558219193, 'imputer_strategy': 0.019829447942205373, 'l1_ratio': 0.019802162428581547, 'C': 0.01908705008819858, 'k_neighbors': 0.006116329800492948, 'criterion': 0.005799919513699016, 'p': 0.0004287658467883587}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0', 'binary__edu_q61_2.0',
       'binary__edu_q64_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
 

Best trial: 79. Best value: 1.71985: 100%|██████████| 100/100 [07:27<00:00,  4.48s/it]


{'intercept_scaling': 0.3749793105484931, 'chi2_threshold': 0.24883033492616521, 'kernel': 0.15963362663036376, 'C': 0.08154801775890881, 'imputer_strategy': 0.044251366955054786, 'p': 0.034282187775085404, 'calibration_method': 0.029118076489141068, 'k_neighbors': 0.025207338494761845, 'penalty': 0.002149740422026125}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q19_2.0', 'binary__edu_q25_2.0',
       'binary__edu_q32_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'binary__edu_q46_2.0', 'binary__edu_q50_2.0', 'binary__edu_q57_2.0',
       'binary__edu_q61_2.0', 'binary__edu_q64_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       

Best trial: 53. Best value: 1.71648: 100%|██████████| 100/100 [07:11<00:00,  4.32s/it]


{'max_features': 0.7766261119980978, 'chi2_threshold': 0.06636584509639042, 'max_depth': 0.04158014579434213, 'imputer_strategy': 0.03969082987778749, 'min_samples_leaf': 0.017656191624925047, 'k_neighbors': 0.017357517122760766, 'calibration_method': 0.01443399753848959, 'min_samples_split': 0.008261068574025782, 'p': 0.00608043321959403, 'n_estimators': 0.0044978220281626175, 'criterion': 0.003677595705962069, 'C': 0.0030836639905504385, 'penalty': 0.00035535957566841406, 'intercept_scaling': 0.0003334178532434314}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q43_2.0',
       'binary__edu_q45_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infreque

Best trial: 33. Best value: 1.72662: 100%|██████████| 100/100 [05:02<00:00,  3.03s/it]


{'chi2_threshold': 0.5396178235856665, 'kernel': 0.28176773706175995, 'max_depth': 0.07315077970273377, 'max_features': 0.056583980802941115, 'n_estimators': 0.013754846557638823, 'min_samples_leaf': 0.013266249670700033, 'imputer_strategy': 0.007978635294912835, 'min_samples_split': 0.004623617753440233, 'k_neighbors': 0.003966429742622255, 'criterion': 0.0033514236666828943, 'p': 0.0015588628094223142, 'C': 0.00037961335147947687}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q43_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infrequent_skl

Best trial: 90. Best value: 1.75522: 100%|██████████| 100/100 [11:52<00:00,  7.13s/it]


{'chi2_threshold': 0.6780766484749136, 'p': 0.10587368973168734, 'n_neighbors': 0.05089662373473062, 'leaf_size': 0.04530813873007918, 'k_neighbors': 0.04184707862305152, 'weights': 0.02880607291486739, 'l1_ratio': 0.01753571459506538, 'C': 0.012496368812027207, 'imputer_strategy': 0.010640853980092492, 'algorithm': 0.00851881040348536}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0', 'binary__edu_q61_2.0',
       'binary__edu_q64_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categori

Best trial: 96. Best value: 1.74536: 100%|██████████| 100/100 [05:21<00:00,  3.21s/it]


{'chi2_threshold': 0.5452155293242846, 'p': 0.12784982182847177, 'leaf_size': 0.12645559487908847, 'algorithm': 0.07327047733971592, 'k_neighbors': 0.03461062516317183, 'imputer_strategy': 0.02396390569074495, 'weights': 0.02377973555004287, 'C': 0.02016304628046003, 'n_neighbors': 0.012693441585345454, 'penalty': 0.009000129363082447, 'intercept_scaling': 0.0021110620717582932, 'calibration_method': 0.0008866309238335073}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q19_2.0', 'binary__edu_q25_2.0',
       'binary__edu_q32_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'binary__edu_q46_2.0', 'binary__edu_q50_2.0', 'binary__edu_q57_2.0',
       'binary__edu_q61_2.0', 'binary__edu_q64_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_

Best trial: 82. Best value: 1.74201: 100%|██████████| 100/100 [02:05<00:00,  1.25s/it]


{'chi2_threshold': 0.4458855395157307, 'leaf_size': 0.12425434634672457, 'n_neighbors': 0.10746498043324462, 'p': 0.07555487044598404, 'algorithm': 0.07191451043300476, 'imputer_strategy': 0.0539939025660382, 'k_neighbors': 0.04912384305361253, 'kernel': 0.042245164912557996, 'C': 0.0204902990368843, 'weights': 0.009072543256218245}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q43_2.0',
       'binary__edu_q45_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infrequent_sklear

Best trial: 92. Best value: 1.75043: 100%|██████████| 100/100 [05:08<00:00,  3.09s/it]


{'chi2_threshold': 0.7873614483963173, 'n_neighbors': 0.06578776208193104, 'min_samples_leaf': 0.03721966230152307, 'max_features': 0.022227723870389425, 'imputer_strategy': 0.018922915839135593, 'k_neighbors': 0.017818321660459848, 'n_estimators': 0.017581216081219004, 'max_depth': 0.015222112773241182, 'leaf_size': 0.00521084685774025, 'algorithm': 0.005208753107559556, 'criterion': 0.002925652886839977, 'min_samples_split': 0.002172952465741423, 'weights': 0.0019570067439105498, 'p': 0.0003836249339919353}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorica

Best trial: 83. Best value: 1.74666: 100%|██████████| 100/100 [12:28<00:00,  7.49s/it]


{'chi2_threshold': 0.8138241715930223, 'k_neighbors': 0.07482612082978474, 'calibration_method': 0.04323433698975936, 'l1_ratio': 0.03027329557394638, 'penalty': 0.024924976775746566, 'imputer_strategy': 0.0062921705173388616, 'p': 0.0024681947766647372, 'C': 0.002373462782497614, 'intercept_scaling': 0.0017832701612393528}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q43_2.0',
       'binary__edu_q45_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infrequent_sklearn',
     

Best trial: 87. Best value: 1.73818: 100%|██████████| 100/100 [11:42<00:00,  7.03s/it]


{'p': 0.305543029674999, 'chi2_threshold': 0.23495324451977728, 'kernel': 0.151714940133389, 'imputer_strategy': 0.14162680748059042, 'l1_ratio': 0.09213300619616704, 'C': 0.05255490223000892, 'k_neighbors': 0.021474069765068094}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0', 'binary__edu_q61_2.0',
       'binary__edu_q64_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infrequent_sklearn',
       'categorical__edu_q28_infrequent_sklearn',
       'categorical__edu_

Best trial: 72. Best value: 1.73187: 100%|██████████| 100/100 [04:55<00:00,  2.95s/it]


{'chi2_threshold': 0.3004901299559418, 'max_features': 0.22912559434688615, 'k_neighbors': 0.11336452854899413, 'l1_ratio': 0.1025564548317162, 'min_samples_leaf': 0.10045184738518996, 'max_depth': 0.05661961009760461, 'min_samples_split': 0.04701882769427892, 'n_estimators': 0.0166745340451864, 'imputer_strategy': 0.012106308618250008, 'p': 0.011319132940343538, 'criterion': 0.010022515466548938, 'C': 0.0002505160690593463}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0', 'binary__edu_q61_2.0',
       'binary__edu_q64_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
     

Best trial: 36. Best value: 1.72589: 100%|██████████| 100/100 [05:15<00:00,  3.16s/it]


{'intercept_scaling': 0.4665811797863528, 'C': 0.10695154665771013, 'k_neighbors': 0.09798965717442266, 'chi2_threshold': 0.09279671802231622, 'imputer_strategy': 0.08223259456101711, 'kernel': 0.058568420724673026, 'penalty': 0.04946263895274186, 'calibration_method': 0.034495122058876566, 'p': 0.01092212206188986}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0', 'binary__edu_q61_2.0',
       'binary__edu_q64_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu_q23_infreque

Best trial: 86. Best value: 1.72673: 100%|██████████| 100/100 [08:04<00:00,  4.85s/it]


{'max_features': 0.47058058237427103, 'chi2_threshold': 0.3245678719191069, 'n_estimators': 0.09391961256570629, 'C': 0.04487833559446101, 'criterion': 0.01574000723296101, 'min_samples_split': 0.014577045143656608, 'max_depth': 0.011044333355703774, 'imputer_strategy': 0.00848141149223582, 'min_samples_leaf': 0.006898975125542102, 'k_neighbors': 0.004047367065209089, 'penalty': 0.0025535783693007557, 'calibration_method': 0.001754248193515886, 'p': 0.0009242258435058359, 'intercept_scaling': 3.2405724823502175e-05}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'ca

Best trial: 83. Best value: 1.72055: 100%|██████████| 100/100 [05:32<00:00,  3.33s/it]


{'max_features': 0.3478609938520782, 'chi2_threshold': 0.32142855459677017, 'imputer_strategy': 0.1433728864140104, 'n_estimators': 0.045712390606689246, 'criterion': 0.04135237090877326, 'C': 0.024331895346038253, 'max_depth': 0.022501642214986448, 'min_samples_split': 0.017591959640870222, 'kernel': 0.01122139266259931, 'min_samples_leaf': 0.010512525447247614, 'p': 0.008741374925785054, 'k_neighbors': 0.005372013384151539}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_sklearn',
       'categorical__edu

Best trial: 14. Best value: 1.75532: 100%|██████████| 100/100 [19:52<00:00, 11.92s/it]


{'chi2_threshold': 0.9531892079744658, 'weights': 0.014005292813063636, 'leaf_size': 0.007596302485500043, 'n_neighbors': 0.00722919555941439, 'p': 0.006973444903980325, 'l1_ratio': 0.0025030000456495003, 'algorithm': 0.0023815358817532227, 'imputer_strategy': 0.0020504874422655915, 'penalty': 0.0013623811767154558, 'calibration_method': 0.00133209786168249, 'k_neighbors': 0.0010422196782827547, 'C': 0.00033427072455396897, 'intercept_scaling': 5.634526727494152e-07}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q43_2.0',
       'binary__edu_q45_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'c

Best trial: 89. Best value: 1.75283: 100%|██████████| 100/100 [13:34<00:00,  8.14s/it]


{'chi2_threshold': 0.975251359545269, 'l1_ratio': 0.006282298103030506, 'leaf_size': 0.003987783116924852, 'n_neighbors': 0.003717003288953134, 'weights': 0.0033124600998841033, 'kernel': 0.0022450467759934254, 'p': 0.0018568232401842283, 'k_neighbors': 0.0012698059112788822, 'imputer_strategy': 0.0008903925686502185, 'C': 0.0008533847045910018, 'algorithm': 0.0003336426452409075}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q19_2.0', 'binary__edu_q25_2.0',
       'binary__edu_q32_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'binary__edu_q46_2.0', 'binary__edu_q50_2.0', 'binary__edu_q57_2.0',
       'binary__edu_q61_2.0', 'binary__edu_q64_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.

Best trial: 86. Best value: 1.75316: 100%|██████████| 100/100 [12:53<00:00,  7.73s/it]


{'chi2_threshold': 0.8815508817518702, 'n_estimators': 0.031991360700463774, 'weights': 0.01477692592625544, 'leaf_size': 0.013579011169711953, 'k_neighbors': 0.010994245622368101, 'algorithm': 0.008391492042876583, 'max_features': 0.006744964965929789, 'criterion': 0.005855117183657148, 'l1_ratio': 0.005434392426820606, 'max_depth': 0.005128628260270808, 'n_neighbors': 0.004763905322736148, 'min_samples_leaf': 0.0026661994293543834, 'min_samples_split': 0.002557663542956161, 'p': 0.0024744971837308387, 'imputer_strategy': 0.0016838879795226557, 'C': 0.0014068264914753267}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q43_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q

Best trial: 24. Best value: 1.74804: 100%|██████████| 100/100 [08:03<00:00,  4.83s/it]


{'chi2_threshold': 0.7934636881799022, 'k_neighbors': 0.06243862546516715, 'kernel': 0.031028964408264956, 'C': 0.02621711541678286, 'intercept_scaling': 0.019597541601593625, 'n_neighbors': 0.014654373068784119, 'imputer_strategy': 0.014503022350062312, 'algorithm': 0.01426010751552852, 'leaf_size': 0.011905244066025241, 'weights': 0.00697235597704465, 'p': 0.0029101265093780483, 'calibration_method': 0.0016405730442084353, 'penalty': 0.00040826239725781465}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',
       'categorical__edu_q17_13.0', 'categorical__edu_q17_infrequent_

Best trial: 34. Best value: 1.74517: 100%|██████████| 100/100 [04:44<00:00,  2.85s/it]


{'chi2_threshold': 0.7125048428399708, 'intercept_scaling': 0.11104234888809825, 'leaf_size': 0.06070127410030616, 'n_neighbors': 0.030788828806537837, 'max_features': 0.0302755141115104, 'min_samples_split': 0.02144662091203777, 'k_neighbors': 0.007944631119228733, 'p': 0.00727444226820003, 'max_depth': 0.0038012532828484336, 'criterion': 0.0036417507079039356, 'n_estimators': 0.0035395961165684657, 'penalty': 0.0020580425527786978, 'min_samples_leaf': 0.0016817674557482111, 'calibration_method': 0.0013913293076502462, 'algorithm': 0.0009862348489397457, 'imputer_strategy': 0.0007909213751374033, 'weights': 6.926371402628754e-05, 'C': 6.133759250851834e-05}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q

Best trial: 34. Best value: 1.73519: 100%|██████████| 100/100 [05:35<00:00,  3.35s/it]


{'chi2_threshold': 0.8942162972345341, 'kernel': 0.030998003238021384, 'n_estimators': 0.015691999980133795, 'max_features': 0.015609544518348322, 'leaf_size': 0.011034524834231453, 'imputer_strategy': 0.007071070198548521, 'n_neighbors': 0.00575670694892325, 'min_samples_leaf': 0.004411062527401694, 'criterion': 0.003884460506083133, 'max_depth': 0.0038790948522141626, 'weights': 0.0018868730416496404, 'min_samples_split': 0.00175563307319489, 'k_neighbors': 0.0016480853892593041, 'algorithm': 0.0010205664213696038, 'p': 0.0007328935044113985, 'C': 0.00040318373167528627}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent

Best trial: 63. Best value: 1.74098: 100%|██████████| 100/100 [20:59<00:00, 12.60s/it]


{'chi2_threshold': 0.843537645386718, 'kernel': 0.0778040109979024, 'imputer_strategy': 0.034185169867362294, 'l1_ratio': 0.026812457578294393, 'k_neighbors': 0.010679223288936605, 'C': 0.0049531076215377015, 'p': 0.0007801033742593427, 'penalty': 0.0006758573523329035, 'calibration_method': 0.0004958890837805436, 'intercept_scaling': 7.653544887571475e-05}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q19_2.0', 'binary__edu_q25_2.0',
       'binary__edu_q32_2.0', 'binary__edu_q43_2.0', 'binary__edu_q45_2.0',
       'binary__edu_q46_2.0', 'binary__edu_q50_2.0', 'binary__edu_q57_2.0',
       'binary__edu_q61_2.0', 'binary__edu_q64_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical_

Best trial: 75. Best value: 1.74349: 100%|██████████| 100/100 [19:02<00:00, 11.42s/it]


{'chi2_threshold': 0.3867905427396493, 'l1_ratio': 0.15015548007610413, 'max_features': 0.1395106542766929, 'p': 0.07976860445903929, 'max_depth': 0.060344108764801306, 'criterion': 0.04296358829722028, 'min_samples_split': 0.04165521079738258, 'n_estimators': 0.03748283263220745, 'k_neighbors': 0.026828435063417082, 'imputer_strategy': 0.014926888577185627, 'min_samples_leaf': 0.011228564488089572, 'C': 0.0034535315896510803, 'penalty': 0.002847614294690611, 'intercept_scaling': 0.0012916978123532008, 'calibration_method': 0.0007522461315156685}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0', 'binary__edu_q46_2.0',
       'binary__edu_q50_2.0', 'binary__edu_q57_2.0', 'binary__edu_q61_2.0',
      

Best trial: 75. Best value: 1.73629: 100%|██████████| 100/100 [14:50<00:00,  8.91s/it]


{'chi2_threshold': 0.7312636135061613, 'max_features': 0.08448780027764755, 'n_estimators': 0.0823318559615283, 'l1_ratio': 0.03418769844909908, 'min_samples_split': 0.018427104993198665, 'p': 0.014070495479315177, 'min_samples_leaf': 0.01193280456841838, 'kernel': 0.00726955829361717, 'max_depth': 0.005835158347943416, 'k_neighbors': 0.00432077670302759, 'C': 0.0027568237112476487, 'criterion': 0.002638527968500804, 'imputer_strategy': 0.0004777817402949007}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q25_2.0', 'binary__edu_q32_2.0',
       'binary__edu_q43_2.0', 'binary__edu_q45_2.0', 'binary__edu_q61_2.0',
       'binary__edu_q64_2.0', 'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__h

Best trial: 83. Best value: 1.7342: 100%|██████████| 100/100 [05:53<00:00,  3.53s/it]


{'chi2_threshold': 0.5662645955414046, 'max_features': 0.201228677632925, 'n_estimators': 0.13354274200014088, 'imputer_strategy': 0.03392393074061137, 'min_samples_split': 0.022470173526911413, 'kernel': 0.015053049711494688, 'k_neighbors': 0.008437995821772622, 'min_samples_leaf': 0.006492635886290935, 'max_depth': 0.005683983694044486, 'penalty': 0.003013942096120324, 'p': 0.0015791540231456958, 'criterion': 0.0012303750248893078, 'C': 0.0004740825343334017, 'calibration_method': 0.00039822329360978065, 'intercept_scaling': 0.00020643847230534775}
{'selected_columns': Index(['binary__house_q02_2.0', 'binary__house_q07_1.0',
       'binary__house_q07_2.0', 'binary__house_q11_2.0',
       'binary__house_q17_1.0', 'binary__edu_q03_2.0', 'binary__edu_q08_2.0',
       'binary__edu_q14_2.0', 'binary__edu_q43_2.0',
       'categorical__house_q03_2.0',
       'categorical__house_q03_infrequent_sklearn',
       'categorical__house_q06_4.0',
       'categorical__house_q06_infrequent_sklearn',