В ноутбуке представлен методы **feature selection** из библиотеки **[casualml](https://github.com/uber/causalml)**

In [3]:
pip install causalml scikit-uplift catboost optuna

Collecting causalml
  Downloading causalml-0.14.1.tar.gz (909 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m909.4/909.4 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting scikit-uplift
  Downloading scikit_uplift-0.5.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.1/42.1 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Collecting forestci==0.6 (from causalml)
  Downloading forestci-0.6-py3-none-any.whl (12 kB)
Collecting pathos==0.2.9 (from causalml)
  Downloading pathos-0.2.9-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.9/76.9 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Collecting numpy<1.24 (from causalml)
  Using cached numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinu

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
import itertools
from pprint import pprint

import optuna
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier
from optuna.samplers import TPESampler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklift.models import ClassTransformation
from sklift.metrics import uplift_at_k
from causalml.feature_selection.filters import FilterSelect
from tqdm import tqdm

In [6]:
df_train = pd.read_csv("/kaggle/input/uplift-ai-talent-hub/train_lenta.csv")
df_test = pd.read_csv("/kaggle/input/uplift-ai-talent-hub/test_lenta.csv")

df_train.shape, df_test.shape

((714510, 195), (170297, 193))

In [7]:
df_train.pivot_table(
    values='target',
    index='treatment',
    aggfunc=[np.mean, np.size],
    margins=True
)

Unnamed: 0_level_0,mean,size
Unnamed: 0_level_1,target,target
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2
0,0.102359,177669
1,0.110025,536841
All,0.108119,714510


In [8]:
features_names = df_test.columns.tolist()
target_name = "target"
len(features_names)

193

# Feature Selection

Метод имплементированы в библиотеке **causalml**

Также дополнительно можно почитать статью [Feature Selection Methods for Uplift Modeling and Heterogeneous Treatment Effect](https://arxiv.org/abs/2005.03447)

Так как **filter selection не может работать с пропущенными значениями** обработаем при помощи **SimpleImputer**

Приведем данные в необходимый вид для **filter selection**:

In [9]:
imputer = SimpleImputer(**{'missing_values': np.nan, 'strategy': 'constant', 'fill_value': 0})
df_train_not_nan = imputer.fit_transform(df_train)

df_train_not_nan = pd.DataFrame(df_train_not_nan, columns=df_train.columns.tolist())
df_train_not_nan["treatment_group_key"] = "treatment1"
index_control = df_train_not_nan[df_train_not_nan["treatment"] == 0].index
df_train_not_nan.loc[index_control, "treatment_group_key"] = "control"
df_train_not_nan = df_train_not_nan.drop("treatment", axis=1)

In [10]:
filter_method = FilterSelect()

## F Filter 

The F filter method is named after the F statistic for testing the significance of the interaction between the treatment indicator and a feature in linear regression.

In [11]:
method = "F"

orders = [1, 2, 3]

dfs_f_importances = []
for order in tqdm(orders):
     dfs_f_importances.append(
          filter_method.get_importance(
               df_train_not_nan, features_names, target_name, method,
               treatment_group='treatment1', order=order
          )
     )

100%|██████████| 3/3 [03:26<00:00, 68.79s/it]


In [12]:
dfs_f_importances[0].iloc[:20]

Unnamed: 0,method,feature,rank,score,p_value,misc
0,F filter,sale_sum_12m_g27,1.0,11423.143028,0.0,"df_num: 1.0, df_denom: 714506.0, order:1"
0,F filter,cheque_count_6m_g48,2.0,2812.203118,0.0,"df_num: 1.0, df_denom: 714506.0, order:1"
0,F filter,cheque_count_6m_g40,3.0,2453.218341,0.0,"df_num: 1.0, df_denom: 714506.0, order:1"
0,F filter,cheque_count_6m_g79,4.0,2146.632841,0.0,"df_num: 1.0, df_denom: 714506.0, order:1"
0,F filter,cheque_count_12m_g48,5.0,1824.92999,0.0,"df_num: 1.0, df_denom: 714506.0, order:1"
0,F filter,cheque_count_3m_g79,6.0,1771.27083,0.0,"df_num: 1.0, df_denom: 714506.0, order:1"
0,F filter,cheque_count_6m_g46,7.0,1766.078903,0.0,"df_num: 1.0, df_denom: 714506.0, order:1"
0,F filter,cheque_count_6m_g38,8.0,1588.344779,0.0,"df_num: 1.0, df_denom: 714506.0, order:1"
0,F filter,cheque_count_12m_g79,9.0,1588.236103,0.0,"df_num: 1.0, df_denom: 714506.0, order:1"
0,F filter,sale_sum_6m_g26,10.0,1493.526396,0.0,"df_num: 1.0, df_denom: 714506.0, order:1"


In [13]:
dfs_f_importances[1].iloc[:20]

Unnamed: 0,method,feature,rank,score,p_value,misc
0,F filter,sale_sum_12m_g27,1.0,2628.679046,0.0,"df_num: 2.0, df_denom: 714504.0, order:2"
0,F filter,sale_sum_12m_g26,2.0,1528.845745,0.0,"df_num: 2.0, df_denom: 714504.0, order:2"
0,F filter,sale_count_6m_g32,3.0,1476.154477,0.0,"df_num: 2.0, df_denom: 714504.0, order:2"
0,F filter,sale_count_12m_g32,4.0,1446.685571,0.0,"df_num: 2.0, df_denom: 714504.0, order:2"
0,F filter,cheque_count_6m_g40,5.0,1371.334505,0.0,"df_num: 2.0, df_denom: 714504.0, order:2"
0,F filter,sale_sum_3m_g32,6.0,1286.508725,0.0,"df_num: 2.0, df_denom: 714504.0, order:2"
0,F filter,sale_sum_12m_g32,7.0,1260.671312,0.0,"df_num: 2.0, df_denom: 714504.0, order:2"
0,F filter,sale_sum_6m_g32,8.0,1189.427052,0.0,"df_num: 2.0, df_denom: 714504.0, order:2"
0,F filter,cheque_count_12m_g32,9.0,1122.785782,0.0,"df_num: 2.0, df_denom: 714504.0, order:2"
0,F filter,crazy_purchases_goods_count_6m,10.0,1122.029345,0.0,"df_num: 2.0, df_denom: 714504.0, order:2"


In [14]:
dfs_f_importances[-1].iloc[:20]

Unnamed: 0,method,feature,rank,score,p_value,misc
0,F filter,sale_sum_12m_g27,1.0,5136.005458,0.0,"df_num: 2, df_denom: 714504.0, order:3"
0,F filter,sale_sum_12m_g44,2.0,738.047121,6.324e-321,"df_num: 2, df_denom: 714502.0, order:3"
0,F filter,sale_count_6m_g32,3.0,726.136434,0.0,"df_num: 3.0, df_denom: 714502.0, order:3"
0,F filter,sale_sum_12m_g26,4.0,697.426124,2.5523979999999997e-303,"df_num: 2, df_denom: 714502.0, order:3"
0,F filter,sale_count_6m_g33,5.0,672.580973,0.0,"df_num: 3.0, df_denom: 714502.0, order:3"
0,F filter,sale_sum_12m_g24,6.0,658.05291,2.979469e-286,"df_num: 2, df_denom: 714504.0, order:3"
0,F filter,sale_sum_12m_g25,7.0,649.749828,1.184498e-282,"df_num: 2, df_denom: 714502.0, order:3"
0,F filter,sale_sum_6m_g32,8.0,622.433259,8.239176e-271,"df_num: 2, df_denom: 714502.0, order:3"
0,F filter,sale_sum_6m_g24,9.0,614.315881,2.7234810000000002e-267,"df_num: 2, df_denom: 714503.0, order:3"
0,F filter,sale_count_3m_g33,10.0,560.761158,0.0,"df_num: 3.0, df_denom: 714502.0, order:3"


Важные фичи при разных порядках примерно одинаковые, возмьем с порядком 3

In [15]:
f_imp = dfs_f_importances[-1]

## KL divergence filter

It's **bin-based divergence filter metho**

The bin-based method first divides the samples into S (preferably equally sized) bins, where S is a hyperparameter. The importance score is defined as the divergence measure of the treatment effect over these S bins.

In [16]:
method = "KL"

n_bins = [10, 20, 30]

dfs_kl_importances = []
for bins in tqdm(n_bins):
     dfs_kl_importances.append(
          filter_method.get_importance(
               df_train_not_nan, features_names, target_name, method,
               treatment_group='treatment1', n_bins=bins
          )
     )

100%|██████████| 3/3 [09:23<00:00, 187.80s/it]


In [17]:
dfs_kl_importances[0].head(20)

Unnamed: 0,method,feature,rank,score,p_value,misc
0,KL filter,cheque_count_6m_g40,1.0,8.6e-05,,number_of_bins: 6
0,KL filter,months_from_register,2.0,7.4e-05,,number_of_bins: 10
0,KL filter,sale_sum_6m_g25,3.0,7.2e-05,,number_of_bins: 7
0,KL filter,response_viber,4.0,6.7e-05,,number_of_bins: 5
0,KL filter,age,5.0,6.3e-05,,number_of_bins: 10
0,KL filter,sale_count_6m_g54,6.0,5.5e-05,,number_of_bins: 8
0,KL filter,k_var_sku_price_3m_g44,7.0,5.3e-05,,number_of_bins: 3
0,KL filter,sale_sum_12m_g27,8.0,4.9e-05,,number_of_bins: 9
0,KL filter,k_var_disc_share_1m_g54,9.0,4.9e-05,,number_of_bins: 2
0,KL filter,cheque_count_3m_g52,10.0,3.8e-05,,number_of_bins: 3


In [18]:
dfs_kl_importances[1].head(20)

Unnamed: 0,method,feature,rank,score,p_value,misc
0,KL filter,response_viber,1.0,0.000176,,number_of_bins: 9
0,KL filter,age,2.0,0.00016,,number_of_bins: 20
0,KL filter,months_from_register,3.0,0.000129,,number_of_bins: 20
0,KL filter,sale_count_12m_g49,4.0,0.000125,,number_of_bins: 19
0,KL filter,cheque_count_6m_g40,5.0,0.000108,,number_of_bins: 10
0,KL filter,sale_sum_6m_g25,6.0,0.000105,,number_of_bins: 14
0,KL filter,sale_sum_12m_g27,7.0,9.2e-05,,number_of_bins: 17
0,KL filter,sale_count_6m_g24,8.0,9.2e-05,,number_of_bins: 15
0,KL filter,k_var_disc_share_3m_g49,9.0,8.8e-05,,number_of_bins: 10
0,KL filter,sale_count_6m_g44,10.0,8.3e-05,,number_of_bins: 13


In [19]:
dfs_kl_importances[2].head(20)

Unnamed: 0,method,feature,rank,score,p_value,misc
0,KL filter,months_from_register,1.0,0.00029,,number_of_bins: 30
0,KL filter,sale_count_12m_g49,2.0,0.000204,,number_of_bins: 26
0,KL filter,response_viber,3.0,0.000192,,number_of_bins: 13
0,KL filter,age,4.0,0.000191,,number_of_bins: 30
0,KL filter,sale_sum_6m_g25,5.0,0.000142,,number_of_bins: 20
0,KL filter,sale_count_6m_g24,6.0,0.000136,,number_of_bins: 22
0,KL filter,sale_sum_12m_g24,7.0,0.000122,,number_of_bins: 25
0,KL filter,sale_sum_12m_g27,8.0,0.000121,,number_of_bins: 25
0,KL filter,sale_sum_12m_g25,9.0,0.00011,,number_of_bins: 24
0,KL filter,cheque_count_6m_g40,10.0,0.000109,,number_of_bins: 12


На мой взгляд самый правдоподобный порядок фичей (из логических соображений) при n_bins=30 

In [20]:
kl_imp = dfs_kl_importances[-1]

# Select top features

In [21]:
top_n = [10, 25, 50]

FEATURES = {
    "F": {},
    "KL": {},
}
for n in top_n:
    FEATURES["F"][n] = f_imp.feature.iloc[:n]
    FEATURES["KL"][n] = kl_imp.feature.iloc[:n]

FEATURES

{'F': {10: 0     sale_sum_12m_g27
  0     sale_sum_12m_g44
  0    sale_count_6m_g32
  0     sale_sum_12m_g26
  0    sale_count_6m_g33
  0     sale_sum_12m_g24
  0     sale_sum_12m_g25
  0      sale_sum_6m_g32
  0      sale_sum_6m_g24
  0    sale_count_3m_g33
  Name: feature, dtype: object,
  25: 0                  sale_sum_12m_g27
  0                  sale_sum_12m_g44
  0                 sale_count_6m_g32
  0                  sale_sum_12m_g26
  0                 sale_count_6m_g33
  0                  sale_sum_12m_g24
  0                  sale_sum_12m_g25
  0                   sale_sum_6m_g32
  0                   sale_sum_6m_g24
  0                 sale_count_3m_g33
  0                 sale_count_6m_g24
  0                   sale_sum_6m_g44
  0                 sale_count_3m_g24
  0               cheque_count_6m_g40
  0                sale_count_12m_g32
  0                sale_count_12m_g33
  0                  sale_sum_12m_g32
  0                 sale_count_6m_g25
  0                 s

In [22]:
del df_train_not_nan

# Optuna

In [23]:
X, trmnt, target = df_train.drop(["treatment", "target"], axis=1), df_train["treatment"], df_train["target"]

X_train, X_val, trmnt_train, trmnt_val, y_train, y_val = train_test_split(
    X, 
    trmnt, 
    target, 
    random_state=59, 
    stratify=df_train[["treatment", "target"]], 
    test_size=0.2,
)

X_train.shape, X_val.shape, trmnt_train.shape, trmnt_val.shape, y_train.shape, y_val.shape

((571608, 193), (142902, 193), (571608,), (142902,), (571608,), (142902,))

In [24]:
def get_score(model, X_val, y_true, treatment) -> float:
    uplift_pred = model.predict(X_val)
    score = uplift_at_k(y_true=y_true, uplift=uplift_pred, treatment=treatment, k=0.05, strategy='overall')
    return score


def objective(
    trial, 
    config_model: dict, 
    config_imputer: dict,
    config_features: dict,
    features_name: dict, 
    X_train: pd.DataFrame,
    X_val: pd.DataFrame, 
    trmnt_train: pd.Series, 
    trmnt_val: pd.Series, 
    y_train: pd.Series,
    y_val: pd.Series,
) -> float:
    # Model parameters
    learning_rate = trial.suggest_categorical("learning_rate", config_model["learning_rate"])
    depth = trial.suggest_categorical("depth", config_model["depth"])
    l2_leaf_reg = trial.suggest_categorical("l2_leaf_reg", config_model["l2_leaf_reg"])
    bagging_temperature = trial.suggest_categorical("bagging_temperature", config_model["bagging_temperature"])
    grow_policy = trial.suggest_categorical("grow_policy", config_model["grow_policy"])

    # Imputer parameters
    strategy = trial.suggest_categorical("strategy", config_imputer["strategy"])
    add_indicator = trial.suggest_categorical("add_indicator", config_imputer["add_indicator"])
    
    # Feature selection parameters
    method = trial.suggest_categorical("method", config_features["method"])
    top_n_features = trial.suggest_categorical("top_n_features", config_features["top_n"])
    
    X_train_curr = X_train[features_name[method][top_n_features]].copy()
    X_val_curr = X_val[features_name[method][top_n_features]].copy()

    estimator = CatBoostClassifier(
        **{
            'iterations': config_model['iterations'],
            'learning_rate': learning_rate, 
            'depth': depth,
            'l2_leaf_reg': l2_leaf_reg,
            'grow_policy': grow_policy,
            'bagging_temperature': bagging_temperature,
            'verbose': config_model['verbose'],
            'thread_count': config_model['thread_count'],
            'random_state': config_model['random_state'],
            'task_type': config_model['task_type'],
            'devices': config_model['devices'],
        }
    )
    imputer = SimpleImputer(
        missing_values=config_imputer['missing_values'], 
        strategy=strategy, 
        add_indicator=add_indicator
    )

    ct_model = ClassTransformation(estimator=estimator)
    pipeline_uplift = Pipeline(
    steps=[
            ("imputer", imputer), 
            ("model", ct_model)
        ]
    )
    pipeline_uplift.fit(
        X=X_train_curr,
        y=y_train,
        model__treatment=trmnt_train,
    )

    trial.set_user_attr("EST_learning_rate", config_model["learning_rate"])
    trial.set_user_attr("EST_depth", config_model["depth"])
    trial.set_user_attr("EST_l2_leaf_reg", config_model["l2_leaf_reg"])
    trial.set_user_attr("EST_bagging_temperature", config_model["bagging_temperature"])
    trial.set_user_attr("EST_grow_policy", config_model["grow_policy"])
    trial.set_user_attr("IMP_strategy", config_imputer["strategy"])
    trial.set_user_attr("IMP_add_indicator", config_imputer["add_indicator"])
    trial.set_user_attr("FS_strategy", config_features["method"])
    trial.set_user_attr("FS_add_indicator", config_features["top_n"])
    
    uplift_at_5 = get_score(pipeline_uplift, X_val_curr, y_val, trmnt_val, )

    return uplift_at_5

Так как до этого были выбрано оптимальные параметры модели, то зафиксируем их и посмотрим, как она себя ведет с фичами, выбранными при помощи методов библиотеки **causalml** от uber

In [25]:
CONFIG_MODEL = {
    'iterations': 1000,
    'learning_rate': [0.1, ],
    'depth': [16, ],
    'l2_leaf_reg': [0.2, ],
    'bagging_temperature': [0.25, ],
    'grow_policy': ['Depthwise', ],
    'verbose': 100,
    'thread_count': -1,
    'random_state': 59,
    'task_type': 'GPU',
    'devices': '0:1'
}

CONFIG_IMPUTER = {
    "missing_values": np.nan,
    "strategy": ['constant', ],
    "fill_value": 0,
    "add_indicator": [False, ]
}

CONFIG_FEATURES = {
    "method": ["F", "KL", ],
    "top_n": [10, 25, 50]
}

COUNT_ITER_OPTUNA = (
    len(CONFIG_FEATURES["method"])
    * len(CONFIG_FEATURES["top_n"])
)

COUNT_ITER_OPTUNA

6

In [26]:
study_class_transformation = optuna.create_study(
    study_name="class_transformation_with_feature_selection",
    sampler=TPESampler(seed=59),
    direction='maximize'
)

study_class_transformation.optimize(
    lambda trial: objective(
        trial=trial, 
        config_model=CONFIG_MODEL, 
        config_imputer=CONFIG_IMPUTER, 
        config_features=CONFIG_FEATURES,
        features_name=FEATURES,
        X_train=X_train, 
        X_val=X_val, 
        trmnt_train=trmnt_train, 
        trmnt_val=trmnt_val, 
        y_train=y_train,
        y_val=y_val
    ),
    n_trials=COUNT_ITER_OPTUNA,
)

[I 2023-11-12 13:22:07,435] A new study created in memory with name: class_transformation_with_feature_selection


0:	learn: 0.6573518	total: 667ms	remaining: 11m 6s
100:	learn: 0.1116970	total: 1m 7s	remaining: 10m 1s
200:	learn: 0.0595446	total: 2m 14s	remaining: 8m 55s
300:	learn: 0.0522072	total: 3m 19s	remaining: 7m 44s
400:	learn: 0.0494754	total: 4m 24s	remaining: 6m 34s
500:	learn: 0.0480861	total: 5m 26s	remaining: 5m 25s
600:	learn: 0.0472240	total: 6m 28s	remaining: 4m 17s
700:	learn: 0.0467036	total: 7m 27s	remaining: 3m 10s
800:	learn: 0.0462787	total: 8m 26s	remaining: 2m 5s
900:	learn: 0.0459342	total: 9m 25s	remaining: 1m 2s
999:	learn: 0.0456765	total: 10m 22s	remaining: 0us


[I 2023-11-12 13:33:34,471] Trial 0 finished with value: 0.8467441857275504 and parameters: {'learning_rate': 0.1, 'depth': 16, 'l2_leaf_reg': 0.2, 'bagging_temperature': 0.25, 'grow_policy': 'Depthwise', 'strategy': 'constant', 'add_indicator': False, 'method': 'F', 'top_n_features': 10}. Best is trial 0 with value: 0.8467441857275504.


0:	learn: 0.6618193	total: 525ms	remaining: 8m 44s
100:	learn: 0.0540390	total: 1m 31s	remaining: 13m 34s
200:	learn: 0.0180519	total: 3m 3s	remaining: 12m 11s
300:	learn: 0.0124406	total: 4m 33s	remaining: 10m 34s
400:	learn: 0.0109454	total: 5m 53s	remaining: 8m 48s
500:	learn: 0.0103635	total: 7m 9s	remaining: 7m 8s
600:	learn: 0.0100204	total: 8m 24s	remaining: 5m 34s
700:	learn: 0.0098284	total: 9m 34s	remaining: 4m 5s
800:	learn: 0.0096980	total: 10m 44s	remaining: 2m 40s
900:	learn: 0.0096132	total: 11m 48s	remaining: 1m 17s
999:	learn: 0.0095410	total: 12m 52s	remaining: 0us


[I 2023-11-12 13:47:40,087] Trial 1 finished with value: 0.9839040767006031 and parameters: {'learning_rate': 0.1, 'depth': 16, 'l2_leaf_reg': 0.2, 'bagging_temperature': 0.25, 'grow_policy': 'Depthwise', 'strategy': 'constant', 'add_indicator': False, 'method': 'F', 'top_n_features': 50}. Best is trial 1 with value: 0.9839040767006031.


0:	learn: 0.6578863	total: 529ms	remaining: 8m 48s
100:	learn: 0.0748397	total: 1m 18s	remaining: 11m 42s
200:	learn: 0.0304250	total: 2m 40s	remaining: 10m 36s
300:	learn: 0.0243772	total: 4m	remaining: 9m 19s
400:	learn: 0.0226667	total: 5m 17s	remaining: 7m 54s
500:	learn: 0.0219485	total: 6m 30s	remaining: 6m 29s
600:	learn: 0.0215379	total: 7m 42s	remaining: 5m 6s
700:	learn: 0.0212978	total: 8m 51s	remaining: 3m 46s
800:	learn: 0.0211356	total: 10m 2s	remaining: 2m 29s
900:	learn: 0.0210157	total: 11m 12s	remaining: 1m 13s
999:	learn: 0.0209384	total: 12m 21s	remaining: 0us


[I 2023-11-12 14:01:26,075] Trial 2 finished with value: 0.9795421334632245 and parameters: {'learning_rate': 0.1, 'depth': 16, 'l2_leaf_reg': 0.2, 'bagging_temperature': 0.25, 'grow_policy': 'Depthwise', 'strategy': 'constant', 'add_indicator': False, 'method': 'F', 'top_n_features': 25}. Best is trial 1 with value: 0.9839040767006031.


0:	learn: 0.6573518	total: 637ms	remaining: 10m 35s
100:	learn: 0.1132987	total: 1m 9s	remaining: 10m 16s
200:	learn: 0.0597536	total: 2m 18s	remaining: 9m 10s
300:	learn: 0.0521342	total: 3m 24s	remaining: 7m 55s
400:	learn: 0.0494168	total: 4m 29s	remaining: 6m 42s
500:	learn: 0.0480831	total: 5m 31s	remaining: 5m 30s
600:	learn: 0.0472479	total: 6m 33s	remaining: 4m 21s
700:	learn: 0.0467464	total: 7m 31s	remaining: 3m 12s
800:	learn: 0.0463287	total: 8m 30s	remaining: 2m 6s
900:	learn: 0.0459750	total: 9m 30s	remaining: 1m 2s
999:	learn: 0.0457136	total: 10m 28s	remaining: 0us


[I 2023-11-12 14:12:58,353] Trial 3 finished with value: 0.849848435956858 and parameters: {'learning_rate': 0.1, 'depth': 16, 'l2_leaf_reg': 0.2, 'bagging_temperature': 0.25, 'grow_policy': 'Depthwise', 'strategy': 'constant', 'add_indicator': False, 'method': 'F', 'top_n_features': 10}. Best is trial 1 with value: 0.9839040767006031.


0:	learn: 0.6578863	total: 548ms	remaining: 9m 7s
100:	learn: 0.0739709	total: 1m 19s	remaining: 11m 43s
200:	learn: 0.0301397	total: 2m 41s	remaining: 10m 41s
300:	learn: 0.0241911	total: 4m 3s	remaining: 9m 26s
400:	learn: 0.0226005	total: 5m 20s	remaining: 7m 58s
500:	learn: 0.0219213	total: 6m 32s	remaining: 6m 31s
600:	learn: 0.0215201	total: 7m 42s	remaining: 5m 7s
700:	learn: 0.0212954	total: 8m 50s	remaining: 3m 46s
800:	learn: 0.0211355	total: 9m 59s	remaining: 2m 28s
900:	learn: 0.0210209	total: 11m 7s	remaining: 1m 13s
999:	learn: 0.0209405	total: 12m 15s	remaining: 0us


[I 2023-11-12 14:26:30,265] Trial 4 finished with value: 0.9795221843003413 and parameters: {'learning_rate': 0.1, 'depth': 16, 'l2_leaf_reg': 0.2, 'bagging_temperature': 0.25, 'grow_policy': 'Depthwise', 'strategy': 'constant', 'add_indicator': False, 'method': 'F', 'top_n_features': 25}. Best is trial 1 with value: 0.9839040767006031.


0:	learn: 0.6578869	total: 606ms	remaining: 10m 5s
100:	learn: 0.0755095	total: 1m 20s	remaining: 11m 52s
200:	learn: 0.0304373	total: 2m 44s	remaining: 10m 53s
300:	learn: 0.0242545	total: 4m 8s	remaining: 9m 37s
400:	learn: 0.0225932	total: 5m 26s	remaining: 8m 7s
500:	learn: 0.0219042	total: 6m 41s	remaining: 6m 39s
600:	learn: 0.0215185	total: 7m 50s	remaining: 5m 12s
700:	learn: 0.0212987	total: 8m 59s	remaining: 3m 50s
800:	learn: 0.0211338	total: 10m 8s	remaining: 2m 31s
900:	learn: 0.0210214	total: 11m 16s	remaining: 1m 14s
999:	learn: 0.0209405	total: 12m 24s	remaining: 0us


[I 2023-11-12 14:40:10,181] Trial 5 finished with value: 0.9796412990790111 and parameters: {'learning_rate': 0.1, 'depth': 16, 'l2_leaf_reg': 0.2, 'bagging_temperature': 0.25, 'grow_policy': 'Depthwise', 'strategy': 'constant', 'add_indicator': False, 'method': 'F', 'top_n_features': 25}. Best is trial 1 with value: 0.9839040767006031.


In [27]:
print("Number of finished trials: {}".format(len(study_class_transformation.trials)))

print("Best trial:")
trial = study_class_transformation.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 6
Best trial:
  Value: 0.9839040767006031
  Params: 
    learning_rate: 0.1
    depth: 16
    l2_leaf_reg: 0.2
    bagging_temperature: 0.25
    grow_policy: Depthwise
    strategy: constant
    add_indicator: False
    method: F
    top_n_features: 50


In [28]:
df_optuna_result = (
    study_class_transformation.trials_dataframe()
    .sort_values(by="value", ascending=False)
    .reset_index(drop=True)
)
df_optuna_result

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_add_indicator,params_bagging_temperature,params_depth,params_grow_policy,params_l2_leaf_reg,...,user_attrs_EST_bagging_temperature,user_attrs_EST_depth,user_attrs_EST_grow_policy,user_attrs_EST_l2_leaf_reg,user_attrs_EST_learning_rate,user_attrs_FS_add_indicator,user_attrs_FS_strategy,user_attrs_IMP_add_indicator,user_attrs_IMP_strategy,state
0,1,0.983904,2023-11-12 13:33:34.472919,2023-11-12 13:47:40.086589,0 days 00:14:05.613670,False,0.25,16,Depthwise,0.2,...,[0.25],[16],[Depthwise],[0.2],[0.1],"[10, 25, 50]","[F, KL]",[False],[constant],COMPLETE
1,5,0.979641,2023-11-12 14:26:30.266636,2023-11-12 14:40:10.180315,0 days 00:13:39.913679,False,0.25,16,Depthwise,0.2,...,[0.25],[16],[Depthwise],[0.2],[0.1],"[10, 25, 50]","[F, KL]",[False],[constant],COMPLETE
2,2,0.979542,2023-11-12 13:47:40.088334,2023-11-12 14:01:26.075273,0 days 00:13:45.986939,False,0.25,16,Depthwise,0.2,...,[0.25],[16],[Depthwise],[0.2],[0.1],"[10, 25, 50]","[F, KL]",[False],[constant],COMPLETE
3,4,0.979522,2023-11-12 14:12:58.354811,2023-11-12 14:26:30.264689,0 days 00:13:31.909878,False,0.25,16,Depthwise,0.2,...,[0.25],[16],[Depthwise],[0.2],[0.1],"[10, 25, 50]","[F, KL]",[False],[constant],COMPLETE
4,3,0.849848,2023-11-12 14:01:26.077085,2023-11-12 14:12:58.353008,0 days 00:11:32.275923,False,0.25,16,Depthwise,0.2,...,[0.25],[16],[Depthwise],[0.2],[0.1],"[10, 25, 50]","[F, KL]",[False],[constant],COMPLETE
5,0,0.846744,2023-11-12 13:22:07.436910,2023-11-12 13:33:34.471207,0 days 00:11:27.034297,False,0.25,16,Depthwise,0.2,...,[0.25],[16],[Depthwise],[0.2],[0.1],"[10, 25, 50]","[F, KL]",[False],[constant],COMPLETE


In [29]:
best_results = df_optuna_result.iloc[:2]
best_results

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_add_indicator,params_bagging_temperature,params_depth,params_grow_policy,params_l2_leaf_reg,...,user_attrs_EST_bagging_temperature,user_attrs_EST_depth,user_attrs_EST_grow_policy,user_attrs_EST_l2_leaf_reg,user_attrs_EST_learning_rate,user_attrs_FS_add_indicator,user_attrs_FS_strategy,user_attrs_IMP_add_indicator,user_attrs_IMP_strategy,state
0,1,0.983904,2023-11-12 13:33:34.472919,2023-11-12 13:47:40.086589,0 days 00:14:05.613670,False,0.25,16,Depthwise,0.2,...,[0.25],[16],[Depthwise],[0.2],[0.1],"[10, 25, 50]","[F, KL]",[False],[constant],COMPLETE
1,5,0.979641,2023-11-12 14:26:30.266636,2023-11-12 14:40:10.180315,0 days 00:13:39.913679,False,0.25,16,Depthwise,0.2,...,[0.25],[16],[Depthwise],[0.2],[0.1],"[10, 25, 50]","[F, KL]",[False],[constant],COMPLETE


# Submit

In [30]:
def train_all_data(X_train, trmnt_train, y_train, X_test, config_cb, config_imputer) -> pd.DataFrame:
    estimator = CatBoostClassifier(**config_cb)
    imputer = SimpleImputer(**config_imputer)

    ct_model = ClassTransformation(estimator=estimator)
    pipeline_uplift = Pipeline(
    steps=[
            ("imputer", imputer), 
            ("model", ct_model)
        ]
    )
    pipeline_uplift.fit(
        X=X_train,
        y=y_train,
        model__treatment=trmnt_train,
    )
    
    uplift_pred = pipeline_uplift.predict(X_test)
    submit = pd.DataFrame(
        {
            "id": [idx for idx in range(len(uplift_pred))],
            "predicted_uplift": uplift_pred
        }
    )
    
    return submit

In [None]:
k = 0
for idx in range(len(best_results)):
    parameters = best_results.iloc[idx]
    print(f" === Start fit:")
    CONFIG_CATBOOST = {
        'iterations': 1000,
        'learning_rate': parameters['params_learning_rate'],
        'depth': parameters['params_depth'],
        'l2_leaf_reg': parameters['params_l2_leaf_reg'],
        'bagging_temperature': parameters['params_bagging_temperature'],
        'grow_policy': parameters['params_grow_policy'],
        'verbose': 100,
        'thread_count': -1,
        'random_state': 17,
        'task_type': 'GPU',
        'devices': '0:1'
    }
    CONFIG_IMPUTER = {
        "missing_values": np.nan,
        "strategy": parameters['params_strategy'],
        "fill_value": 0,
        "add_indicator": parameters['params_add_indicator']
    }
    
    X_train_curr = X[FEATURES[parameters["params_method"]][parameters["params_top_n_features"]]].copy()
    X_val_curr = df_test[FEATURES[parameters["params_method"]][parameters["params_top_n_features"]]].copy()
    
    print(f"Uplift@5% on valudation: {parameters['value']}")
    print(
        f"=Catbost cfg: {CONFIG_CATBOOST}\n=Imputer cfg:{CONFIG_IMPUTER}\n"
        f"Feature selection method: {parameters['params_method']}\n"
        f"Top_n features: {parameters['params_top_n_features']}"
    )
    submit = train_all_data(
        X_train=X, trmnt_train=trmnt, y_train=target, 
        X_test=df_test, config_cb=CONFIG_CATBOOST, config_imputer=CONFIG_IMPUTER
    )
    
    submit.to_csv(f"/kaggle/working/Class_transformation_v{k}.csv", index=False)
    k += 1
    print("====================================================")
    print()

 === Start fit:
Uplift@5% on valudation: 0.9839040767006031
=Catbost cfg: {'iterations': 1000, 'learning_rate': 0.1, 'depth': 16, 'l2_leaf_reg': 0.2, 'bagging_temperature': 0.25, 'grow_policy': 'Depthwise', 'verbose': 100, 'thread_count': -1, 'random_state': 17, 'task_type': 'GPU', 'devices': '0:1'}
=Imputer cfg:{'missing_values': nan, 'strategy': 'constant', 'fill_value': 0, 'add_indicator': False}
Feature selection method: F
Top_n features: 50
0:	learn: 0.6519248	total: 925ms	remaining: 15m 23s
100:	learn: 0.0649487	total: 2m 14s	remaining: 19m 54s
200:	learn: 0.0153383	total: 4m 23s	remaining: 17m 27s
300:	learn: 0.0057319	total: 6m 11s	remaining: 14m 23s
400:	learn: 0.0028987	total: 7m 45s	remaining: 11m 35s
500:	learn: 0.0016660	total: 9m 16s	remaining: 9m 14s
600:	learn: 0.0011619	total: 10m 36s	remaining: 7m 2s
700:	learn: 0.0008920	total: 11m 51s	remaining: 5m 3s
800:	learn: 0.0007034	total: 13m 4s	remaining: 3m 15s
900:	learn: 0.0005870	total: 14m 16s	remaining: 1m 34s
999:	le