<a href="https://colab.research.google.com/github/Chienstartup/ISIC_2024/blob/main/features_selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install optuna

Collecting optuna
  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.0.0-py3-none-any.whl (362 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.8/362.8 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [10]:
import optuna
import lightgbm as lgb
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold,StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import PredefinedSplit
from tqdm import tqdm

In [4]:
df_train = pd.read_csv('https://raw.githubusercontent.com/Chienstartup/ISIC_2024/main/meta_data/df_train.csv')

<h3>Split dataset into 5 Folds</h3>

In [5]:
N_SPLITS = 5
gkf = GroupKFold(n_splits=N_SPLITS)
df_train["fold"] = -1
for idx, (train_idx, val_idx) in enumerate(gkf.split(df_train, df_train["target"], groups=df_train["patient_id"])):
    df_train.loc[val_idx, "fold"] = idx

In [8]:
feature_cols = [x for x in df_train.columns if x not in ["target", "fold", "patient_id", 'isic_id',]]

In [11]:
def custom_metric_binary(y_true, y_pred):
    y_hat = y_pred[:, 1] if y_pred.ndim > 1 else y_pred
    y_true_binary = y_true
    min_tpr = 0.80
    max_fpr = 1 - min_tpr

    v_gt = 1 - y_true_binary
    v_pred = 1 - y_hat
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)

    return partial_auc

In [12]:
def optimize_feature_selection(sampled_df, feature_cols, target_col, n_trials=10):
    X = sampled_df[feature_cols]
    y = sampled_df[target_col]
    fold = sampled_df['fold']

    best_features = []
    best_overall_score = -np.inf
    remaining_features = feature_cols.copy()

    while remaining_features:
        best_score = -np.inf
        best_feature = None

        for feature in tqdm(remaining_features, desc=f"Evaluating features (current best: {len(best_features)})"):
            current_features = best_features + [feature]

            study = optuna.create_study(direction='maximize')
            scores = []

            for _ in range(n_trials):
                study.optimize(lambda trial: lgb_objective(trial, X, y, fold, current_features), n_trials=1)
                scores.append(study.best_value)

            avg_score = np.mean(scores)

            if avg_score > best_score:
                best_score = avg_score
                best_feature = feature

        if best_score > best_overall_score:
            best_features.append(best_feature)
            remaining_features.remove(best_feature)
            best_overall_score = best_score
            print(f"\nAdded feature: {best_feature}")
            print(f"New best score: {best_score:.4f}")
            print("Current feature set:")
            for i, feat in enumerate(best_features, 1):
                print(f"{i}. {feat}")
        else:
            print(f"\nStopping: No improvement (best overall: {best_overall_score:.4f}, current best: {best_score:.4f})")
            break

    print(f"\nFinal best feature combination:")
    for i, feature in enumerate(best_features, 1):
        print(f"{i}. {feature}")
    print(f"Best overall score: {best_overall_score:.4f}")

    return best_features

def lgb_objective(trial, X, y, fold, selected_features):
    params = {
        'objective': 'binary',
        'num_class': 1,
        'n_estimators': 200,
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-3, 10.0, log=True),
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-3, 10.0, log=True),
        'learning_rate': trial.suggest_float('learning_rate', 1e-2, 1e-1, log=True),
        'max_depth': trial.suggest_int('max_depth', 4, 8),
        'num_leaves': trial.suggest_int('num_leaves', 16, 256),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 1.0),
        'colsample_bynode': trial.suggest_float('colsample_bynode', 0.4, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 0.8, 4.0),
        'device': 'cpu'
    }

    ps = PredefinedSplit(fold)

    cv_scores = []
    for train_index, val_index in ps.split():
        X_train, X_val = X.iloc[train_index][selected_features], X.iloc[val_index][selected_features]
        y_train, y_val = y.iloc[train_index], y.iloc[val_index]

        model = lgb.LGBMClassifier(**params, random_state=42)
        model.fit(X_train, y_train)

        y_pred = model.predict_proba(X_val)
        score = custom_metric_binary(y_val, y_pred)
        cv_scores.append(score)

    cv_score = np.mean(cv_scores)
    return cv_score


In [13]:
best_features = optimize_feature_selection(df_train, feature_cols, 'target', n_trials=1)
print("Best feature combination:", best_features)

Evaluating features (current best: 0):   0%|          | 0/124 [00:00<?, ?it/s][I 2024-09-11 08:35:06,226] A new study created in memory with name: no-name-c3365a5f-9447-44d9-a29a-c224a45b0118
[I 2024-09-11 08:35:06,968] Trial 0 finished with value: 0.03088223288233165 and parameters: {'lambda_l1': 5.294003892553125, 'lambda_l2': 0.07408247868285786, 'learning_rate': 0.06168355492491374, 'max_depth': 8, 'num_leaves': 211, 'colsample_bytree': 0.8402081749780288, 'colsample_bynode': 0.8817645764858415, 'bagging_fraction': 0.9263357375313878, 'bagging_freq': 1, 'min_child_samples': 14, 'scale_pos_weight': 3.0033960674544344}. Best is trial 0 with value: 0.03088223288233165.
Evaluating features (current best: 0):   1%|          | 1/124 [00:00<01:31,  1.34it/s][I 2024-09-11 08:35:06,979] A new study created in memory with name: no-name-e34b2e0e-bb49-42f2-9166-edb43851be4e
[I 2024-09-11 08:35:07,479] Trial 0 finished with value: 0.090252952433932 and parameters: {'lambda_l1': 0.7393605544546


Added feature: clin_size_long_diam_mm
New best score: 0.0903
Current feature set:
1. clin_size_long_diam_mm


Evaluating features (current best: 1):   0%|          | 0/123 [00:00<?, ?it/s][I 2024-09-11 08:36:43,261] A new study created in memory with name: no-name-918230e9-1a9b-4cde-83bf-6f95b0c6b677
[I 2024-09-11 08:36:43,893] Trial 0 finished with value: 0.10265085375095244 and parameters: {'lambda_l1': 0.0020198805325433006, 'lambda_l2': 0.03871505168236007, 'learning_rate': 0.08949626435895061, 'max_depth': 4, 'num_leaves': 31, 'colsample_bytree': 0.8561731674833717, 'colsample_bynode': 0.7006337279115988, 'bagging_fraction': 0.8378134391937835, 'bagging_freq': 2, 'min_child_samples': 47, 'scale_pos_weight': 3.248569531919732}. Best is trial 0 with value: 0.10265085375095244.
Evaluating features (current best: 1):   1%|          | 1/123 [00:00<01:17,  1.58it/s][I 2024-09-11 08:36:43,898] A new study created in memory with name: no-name-05147753-1051-4322-9438-3fc6bf3816d4
[I 2024-09-11 08:36:44,698] Trial 0 finished with value: 0.10161670786997926 and parameters: {'lambda_l1': 0.098671389


Added feature: tbp_lv_H_between_std
New best score: 0.1265
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std


Evaluating features (current best: 2):   0%|          | 0/122 [00:00<?, ?it/s][I 2024-09-11 08:38:22,262] A new study created in memory with name: no-name-40718e97-6378-4fe1-b07c-42918aeb13d5
[I 2024-09-11 08:38:23,330] Trial 0 finished with value: 0.1227718162399793 and parameters: {'lambda_l1': 0.07284983267431701, 'lambda_l2': 0.023711464859400223, 'learning_rate': 0.015517867720591619, 'max_depth': 7, 'num_leaves': 231, 'colsample_bytree': 0.8220437647653114, 'colsample_bynode': 0.6856771192083685, 'bagging_fraction': 0.8993941168224606, 'bagging_freq': 6, 'min_child_samples': 28, 'scale_pos_weight': 1.783994337710762}. Best is trial 0 with value: 0.1227718162399793.
Evaluating features (current best: 2):   1%|          | 1/122 [00:01<02:09,  1.07s/it][I 2024-09-11 08:38:23,335] A new study created in memory with name: no-name-3f44d339-6831-49f9-ae83-83dd34b585ee
[I 2024-09-11 08:38:24,309] Trial 0 finished with value: 0.12192232784966044 and parameters: {'lambda_l1': 0.0027718676


Added feature: lesion_visibility_score
New best score: 0.1399
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score


Evaluating features (current best: 3):   0%|          | 0/121 [00:00<?, ?it/s][I 2024-09-11 08:40:13,840] A new study created in memory with name: no-name-a5a0e486-9b93-4fea-a219-490f9130ff2c
[I 2024-09-11 08:40:14,983] Trial 0 finished with value: 0.14154559739061598 and parameters: {'lambda_l1': 0.017437111595907016, 'lambda_l2': 4.771067251857727, 'learning_rate': 0.012937295617345834, 'max_depth': 5, 'num_leaves': 140, 'colsample_bytree': 0.45538563300413637, 'colsample_bynode': 0.6588825441802131, 'bagging_fraction': 0.9724625114825483, 'bagging_freq': 6, 'min_child_samples': 21, 'scale_pos_weight': 2.277673087395178}. Best is trial 0 with value: 0.14154559739061598.
Evaluating features (current best: 3):   1%|          | 1/121 [00:01<02:17,  1.15s/it][I 2024-09-11 08:40:14,991] A new study created in memory with name: no-name-381f837a-cd3d-4eef-aa93-82081a77dfb8
[I 2024-09-11 08:40:15,861] Trial 0 finished with value: 0.14088521118319125 and parameters: {'lambda_l1': 0.010187393


Added feature: tbp_lv_deltaLBnorm_between_std
New best score: 0.1457
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std


Evaluating features (current best: 4):   0%|          | 0/120 [00:00<?, ?it/s][I 2024-09-11 08:42:09,442] A new study created in memory with name: no-name-b32b54d2-6d7d-4510-bc1c-cc494518825b
[I 2024-09-11 08:42:10,019] Trial 0 finished with value: 0.14043409872705062 and parameters: {'lambda_l1': 0.0017973098412186615, 'lambda_l2': 0.610006814919878, 'learning_rate': 0.022668348567214936, 'max_depth': 4, 'num_leaves': 135, 'colsample_bytree': 0.4056504535937802, 'colsample_bynode': 0.7816720311448572, 'bagging_fraction': 0.7370197355587356, 'bagging_freq': 1, 'min_child_samples': 62, 'scale_pos_weight': 2.264283690122957}. Best is trial 0 with value: 0.14043409872705062.
Evaluating features (current best: 4):   1%|          | 1/120 [00:00<01:08,  1.73it/s][I 2024-09-11 08:42:10,024] A new study created in memory with name: no-name-c456b6f1-9867-4af4-8b99-580415e12f96
[I 2024-09-11 08:42:11,137] Trial 0 finished with value: 0.14493506971668008 and parameters: {'lambda_l1': 1.167155575


Added feature: age_approx_zscore
New best score: 0.1506
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore


Evaluating features (current best: 5):   0%|          | 0/119 [00:00<?, ?it/s][I 2024-09-11 08:44:06,685] A new study created in memory with name: no-name-f60baa14-531c-4892-a87f-9a52e723eec4
[I 2024-09-11 08:44:07,452] Trial 0 finished with value: 0.1504629565450198 and parameters: {'lambda_l1': 0.23798177897972117, 'lambda_l2': 0.04536758227535953, 'learning_rate': 0.06584891088308338, 'max_depth': 4, 'num_leaves': 256, 'colsample_bytree': 0.8870383365440124, 'colsample_bynode': 0.5479503216151043, 'bagging_fraction': 0.49396394065927135, 'bagging_freq': 6, 'min_child_samples': 5, 'scale_pos_weight': 2.1231248745957414}. Best is trial 0 with value: 0.1504629565450198.
Evaluating features (current best: 5):   1%|          | 1/119 [00:00<01:30,  1.30it/s][I 2024-09-11 08:44:07,459] A new study created in memory with name: no-name-d8bb5ed0-e729-44b4-b5c6-b5823a5a1d6b
[I 2024-09-11 08:44:08,219] Trial 0 finished with value: 0.14952905011148737 and parameters: {'lambda_l1': 4.45468475998


Added feature: tbp_lv_B
New best score: 0.1543
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore
6. tbp_lv_B


Evaluating features (current best: 6):   0%|          | 0/118 [00:00<?, ?it/s][I 2024-09-11 08:46:00,883] A new study created in memory with name: no-name-4591440b-8e10-464e-b831-75b4513523df
[I 2024-09-11 08:46:02,021] Trial 0 finished with value: 0.15122286339231922 and parameters: {'lambda_l1': 0.26079574050029647, 'lambda_l2': 0.013113716186474764, 'learning_rate': 0.015370384795914915, 'max_depth': 5, 'num_leaves': 135, 'colsample_bytree': 0.5152881068707109, 'colsample_bynode': 0.992351468802473, 'bagging_fraction': 0.8977560539808498, 'bagging_freq': 7, 'min_child_samples': 84, 'scale_pos_weight': 1.177329657991617}. Best is trial 0 with value: 0.15122286339231922.
Evaluating features (current best: 6):   1%|          | 1/118 [00:01<02:13,  1.14s/it][I 2024-09-11 08:46:02,027] A new study created in memory with name: no-name-3d56c2ed-f6c1-451f-9109-b62b32e2ca4f
[I 2024-09-11 08:46:03,042] Trial 0 finished with value: 0.15333647341691775 and parameters: {'lambda_l1': 1.296170543


Added feature: avg_contrast
New best score: 0.1563
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore
6. tbp_lv_B
7. avg_contrast


Evaluating features (current best: 7):   0%|          | 0/117 [00:00<?, ?it/s][I 2024-09-11 08:48:06,547] A new study created in memory with name: no-name-65a84e50-b17d-4408-a2d3-2cac6bb79a30
[I 2024-09-11 08:48:07,441] Trial 0 finished with value: 0.15229156489956247 and parameters: {'lambda_l1': 1.849224549358295, 'lambda_l2': 0.07605414335528049, 'learning_rate': 0.024180326492969837, 'max_depth': 8, 'num_leaves': 151, 'colsample_bytree': 0.5419765587535175, 'colsample_bynode': 0.6647832431479397, 'bagging_fraction': 0.4580258030172143, 'bagging_freq': 3, 'min_child_samples': 81, 'scale_pos_weight': 3.2212257359366445}. Best is trial 0 with value: 0.15229156489956247.
Evaluating features (current best: 7):   1%|          | 1/117 [00:00<01:43,  1.12it/s][I 2024-09-11 08:48:07,447] A new study created in memory with name: no-name-611a3a05-5526-4cf7-b58a-26c5d016bd08
[I 2024-09-11 08:48:08,724] Trial 0 finished with value: 0.15294280118989417 and parameters: {'lambda_l1': 0.5456043372


Added feature: hue_contrast
New best score: 0.1577
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore
6. tbp_lv_B
7. avg_contrast
8. hue_contrast


Evaluating features (current best: 8):   0%|          | 0/116 [00:00<?, ?it/s][I 2024-09-11 08:50:10,014] A new study created in memory with name: no-name-64bd08ad-0f77-451c-8067-1324b08062aa
[I 2024-09-11 08:50:10,823] Trial 0 finished with value: 0.15328467477336474 and parameters: {'lambda_l1': 0.032473346968029844, 'lambda_l2': 0.04757056178297108, 'learning_rate': 0.01603097702945143, 'max_depth': 4, 'num_leaves': 63, 'colsample_bytree': 0.48008868294285406, 'colsample_bynode': 0.8620602590604702, 'bagging_fraction': 0.916167317823329, 'bagging_freq': 4, 'min_child_samples': 15, 'scale_pos_weight': 1.5597844878415472}. Best is trial 0 with value: 0.15328467477336474.
Evaluating features (current best: 8):   1%|          | 1/116 [00:00<01:33,  1.23it/s][I 2024-09-11 08:50:10,828] A new study created in memory with name: no-name-e56cb289-28e6-4f50-8976-e52d527d96e4
[I 2024-09-11 08:50:11,572] Trial 0 finished with value: 0.15367339029423768 and parameters: {'lambda_l1': 2.242625542


Added feature: anatom_site_general_encoded
New best score: 0.1609
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore
6. tbp_lv_B
7. avg_contrast
8. hue_contrast
9. anatom_site_general_encoded


Evaluating features (current best: 9):   0%|          | 0/115 [00:00<?, ?it/s][I 2024-09-11 08:52:13,381] A new study created in memory with name: no-name-09ef7c73-7c48-49f4-9296-8d1a57a5152a
[I 2024-09-11 08:52:14,182] Trial 0 finished with value: 0.16276351826253502 and parameters: {'lambda_l1': 0.006965563519569691, 'lambda_l2': 1.7733586115734565, 'learning_rate': 0.05659480869979412, 'max_depth': 5, 'num_leaves': 185, 'colsample_bytree': 0.4629929134316021, 'colsample_bynode': 0.976976571968829, 'bagging_fraction': 0.9310343179838964, 'bagging_freq': 1, 'min_child_samples': 39, 'scale_pos_weight': 1.3340464540129786}. Best is trial 0 with value: 0.16276351826253502.
Evaluating features (current best: 9):   1%|          | 1/115 [00:00<01:31,  1.25it/s][I 2024-09-11 08:52:14,187] A new study created in memory with name: no-name-26d3249c-41b1-4913-9e24-84062d998f67
[I 2024-09-11 08:52:16,665] Trial 0 finished with value: 0.16135342501001182 and parameters: {'lambda_l1': 0.0082725175


Added feature: attribution_encoded
New best score: 0.1649
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore
6. tbp_lv_B
7. avg_contrast
8. hue_contrast
9. anatom_site_general_encoded
10. attribution_encoded


Evaluating features (current best: 10):   0%|          | 0/114 [00:00<?, ?it/s][I 2024-09-11 08:54:33,773] A new study created in memory with name: no-name-0b64d1cc-bcf3-4fcc-b582-51554eb2c2b7
[I 2024-09-11 08:54:34,856] Trial 0 finished with value: 0.1637297815453567 and parameters: {'lambda_l1': 0.006643448401904877, 'lambda_l2': 0.006257973589230603, 'learning_rate': 0.031924628163634866, 'max_depth': 6, 'num_leaves': 212, 'colsample_bytree': 0.6831273892383598, 'colsample_bynode': 0.5861903352310891, 'bagging_fraction': 0.6640672081493265, 'bagging_freq': 5, 'min_child_samples': 66, 'scale_pos_weight': 1.0481944236904441}. Best is trial 0 with value: 0.1637297815453567.
Evaluating features (current best: 10):   1%|          | 1/114 [00:01<02:02,  1.09s/it][I 2024-09-11 08:54:34,862] A new study created in memory with name: no-name-2018328f-4ed5-4042-9816-1f0e057698ca
[I 2024-09-11 08:54:36,019] Trial 0 finished with value: 0.16024598418870623 and parameters: {'lambda_l1': 0.803943


Added feature: tbp_lv_C
New best score: 0.1664
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore
6. tbp_lv_B
7. avg_contrast
8. hue_contrast
9. anatom_site_general_encoded
10. attribution_encoded
11. tbp_lv_C


Evaluating features (current best: 11):   0%|          | 0/113 [00:00<?, ?it/s][I 2024-09-11 08:56:49,791] A new study created in memory with name: no-name-4aa75a54-7949-41b4-9358-d168d09c7964
[I 2024-09-11 08:56:50,968] Trial 0 finished with value: 0.16111159949338455 and parameters: {'lambda_l1': 3.997929068741591, 'lambda_l2': 0.6341550965622448, 'learning_rate': 0.015838939817938588, 'max_depth': 7, 'num_leaves': 83, 'colsample_bytree': 0.6679943408112872, 'colsample_bynode': 0.43898477333387903, 'bagging_fraction': 0.7400770683018449, 'bagging_freq': 4, 'min_child_samples': 81, 'scale_pos_weight': 3.7779743664524164}. Best is trial 0 with value: 0.16111159949338455.
Evaluating features (current best: 11):   1%|          | 1/113 [00:01<02:12,  1.18s/it][I 2024-09-11 08:56:50,975] A new study created in memory with name: no-name-9190f855-8171-46f9-8c70-d71b8be7dd32
[I 2024-09-11 08:56:52,087] Trial 0 finished with value: 0.16295045304703465 and parameters: {'lambda_l1': 0.005880408


Added feature: tbp_lv_nevi_confidence_between_std
New best score: 0.1675
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore
6. tbp_lv_B
7. avg_contrast
8. hue_contrast
9. anatom_site_general_encoded
10. attribution_encoded
11. tbp_lv_C
12. tbp_lv_nevi_confidence_between_std


Evaluating features (current best: 12):   0%|          | 0/112 [00:00<?, ?it/s][I 2024-09-11 08:59:13,024] A new study created in memory with name: no-name-062811fd-0a79-4b71-be98-6e2fc998ca1a
[I 2024-09-11 08:59:15,020] Trial 0 finished with value: 0.16463617435426586 and parameters: {'lambda_l1': 0.007408056969831584, 'lambda_l2': 0.19327500415217155, 'learning_rate': 0.037281272259325864, 'max_depth': 8, 'num_leaves': 207, 'colsample_bytree': 0.9967767326971615, 'colsample_bynode': 0.9036099937395827, 'bagging_fraction': 0.9395976424240726, 'bagging_freq': 1, 'min_child_samples': 17, 'scale_pos_weight': 2.8446753077271216}. Best is trial 0 with value: 0.16463617435426586.
Evaluating features (current best: 12):   1%|          | 1/112 [00:01<03:41,  2.00s/it][I 2024-09-11 08:59:15,026] A new study created in memory with name: no-name-a7f1c92b-cc6e-4f7c-a65d-87519df1a15d
[I 2024-09-11 08:59:16,327] Trial 0 finished with value: 0.16612615937717018 and parameters: {'lambda_l1': 0.09245


Added feature: tbp_lv_B_between_std
New best score: 0.1677
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore
6. tbp_lv_B
7. avg_contrast
8. hue_contrast
9. anatom_site_general_encoded
10. attribution_encoded
11. tbp_lv_C
12. tbp_lv_nevi_confidence_between_std
13. tbp_lv_B_between_std


Evaluating features (current best: 13):   0%|          | 0/111 [00:00<?, ?it/s][I 2024-09-11 09:01:43,961] A new study created in memory with name: no-name-55a064c2-572d-4445-a0bd-55d3915b56d3
[I 2024-09-11 09:01:45,354] Trial 0 finished with value: 0.16496232367842525 and parameters: {'lambda_l1': 6.763624554992923, 'lambda_l2': 0.02104668273617002, 'learning_rate': 0.04425678538967845, 'max_depth': 7, 'num_leaves': 144, 'colsample_bytree': 0.83848914945385, 'colsample_bynode': 0.7416508078079678, 'bagging_fraction': 0.4942582550581113, 'bagging_freq': 1, 'min_child_samples': 21, 'scale_pos_weight': 2.8986701520642546}. Best is trial 0 with value: 0.16496232367842525.
Evaluating features (current best: 13):   1%|          | 1/111 [00:01<02:33,  1.40s/it][I 2024-09-11 09:01:45,357] A new study created in memory with name: no-name-14d39080-4f46-4cab-85ce-488af82d857f
[I 2024-09-11 09:01:46,587] Trial 0 finished with value: 0.16020431499433796 and parameters: {'lambda_l1': 4.00724027332


Added feature: tbp_lv_Aext
New best score: 0.1680
Current feature set:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore
6. tbp_lv_B
7. avg_contrast
8. hue_contrast
9. anatom_site_general_encoded
10. attribution_encoded
11. tbp_lv_C
12. tbp_lv_nevi_confidence_between_std
13. tbp_lv_B_between_std
14. tbp_lv_Aext


Evaluating features (current best: 14):   0%|          | 0/110 [00:00<?, ?it/s][I 2024-09-11 09:04:11,377] A new study created in memory with name: no-name-adb8aa61-39c7-4e98-8f01-623d73c87b8c
[I 2024-09-11 09:04:12,543] Trial 0 finished with value: 0.15599478242640724 and parameters: {'lambda_l1': 0.23120114959191834, 'lambda_l2': 4.222396436327102, 'learning_rate': 0.01284066191587296, 'max_depth': 5, 'num_leaves': 62, 'colsample_bytree': 0.9942596559953474, 'colsample_bynode': 0.42695783340289495, 'bagging_fraction': 0.9531602297263652, 'bagging_freq': 1, 'min_child_samples': 83, 'scale_pos_weight': 1.2812268995366864}. Best is trial 0 with value: 0.15599478242640724.
Evaluating features (current best: 14):   1%|          | 1/110 [00:01<02:07,  1.17s/it][I 2024-09-11 09:04:12,551] A new study created in memory with name: no-name-82d117f0-ef90-43db-9a42-469ef4bcfa05
[I 2024-09-11 09:04:13,452] Trial 0 finished with value: 0.16274327421531035 and parameters: {'lambda_l1': 0.098418785


Stopping: No improvement (best overall: 0.1680, current best: 0.1665)

Final best feature combination:
1. clin_size_long_diam_mm
2. tbp_lv_H_between_std
3. lesion_visibility_score
4. tbp_lv_deltaLBnorm_between_std
5. age_approx_zscore
6. tbp_lv_B
7. avg_contrast
8. hue_contrast
9. anatom_site_general_encoded
10. attribution_encoded
11. tbp_lv_C
12. tbp_lv_nevi_confidence_between_std
13. tbp_lv_B_between_std
14. tbp_lv_Aext
Best overall score: 0.1680
Best feature combination: ['clin_size_long_diam_mm', 'tbp_lv_H_between_std', 'lesion_visibility_score', 'tbp_lv_deltaLBnorm_between_std', 'age_approx_zscore', 'tbp_lv_B', 'avg_contrast', 'hue_contrast', 'anatom_site_general_encoded', 'attribution_encoded', 'tbp_lv_C', 'tbp_lv_nevi_confidence_between_std', 'tbp_lv_B_between_std', 'tbp_lv_Aext']





In [14]:
df_train[best_features].to_csv('df_train_filter.csv', index = False)