# ✅ Импорты

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import pandas as pd

from catboost import CatBoostClassifier, Pool

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklift.models import SoloModel
from sklift.models import TwoModels
from sklift.metrics import uplift_at_k
from sklift.datasets import fetch_hillstrom

from causalml.inference.meta.slearner import BaseSClassifier as CausalSoloModel
from causalml.inference.meta.tlearner import BaseTClassifier as CausalTwoModels

from upninja.pipelines import DataTransformers
from upninja.pipelines import BasePipeline
from upninja.utils.Score import upliftComparingHist, scoreUpliftAtK
from upninja.models import findBestParams, baseModelSelection
from upninja.models import Spaces

import matplotlib.pyplot as plt
import seaborn as sns

# ✅ Загрузка и обработка Kevin Hillstrom датасета

In [None]:
data = fetch_hillstrom()
X, y, t = data['data'], data['target'], data['treatment']
# упростим целевую группу - сократим до была рассылка/не было рассылки
t = t.map({'Womens E-Mail':1, 'Mens E-Mail':1, 'No E-Mail':0})

X_train, X_test, y_train, y_test, t_train, t_test = train_test_split(X, 
                                                                     y, t, 
                                                                     test_size=0.3, 
                                                                     random_state=42)

In [None]:
X_prepared = DataTransformers.HillstromTransformer().fit_transform(X_train)
y_prepared = y_train.copy()

In [None]:
log_reg_best = findBestParams(LogisticRegression,
                               X_prepared,
                               y_prepared,
                               Spaces.log_reg_hp_space
                              )

knn_best = findBestParams(KNeighborsClassifier,
                               X_prepared,
                               y_prepared,
                               Spaces.knn_hp_space
                              )

dt_best = findBestParams(DecisionTreeClassifier,
                               X_prepared,
                               y_prepared,
                               Spaces.dt_hp_space
                              )

rf_best = findBestParams(RandomForestClassifier,
                               X_prepared,
                               y_prepared,
                               Spaces.rf_hp_space
                              )

cb_best = findBestParams(CatBoostClassifier,
                               X_prepared,
                               y_prepared,
                               Spaces.cb_hp_space
                              )

In [None]:
# testing
models = {
    'LogisticRregressionSklearn': LogisticRegression(),
    'TreeClassifierSklearn': DecisionTreeClassifier(),
    'GradientBoostingCatBoost': CatBoostClassifier(logging_level='Silent')
}

res = baseModelSelection(models, X_prepared, y_prepared)

In [None]:
res

# ✅ Протестируем Scikit-uplifts

In [None]:
s_learner_model = SoloModel(CatBoostClassifier(logging_level='Silent'))

s_learner_pipeline = BasePipeline.BasePipeline([
    ('hilstrom-transformer', DataTransformers.HillstromTransformer()),
    ('slearner', s_learner_model)
])

In [None]:
%%time

s_learner_pipeline.fit(X_train, y_train, t_train)

In [None]:
two_models_model = TwoModels(
    CatBoostClassifier(logging_level='Silent'),
    CatBoostClassifier(logging_level='Silent'),
    method='vanilla'
)

two_models_pipeline = BasePipeline.BasePipeline([
    ('hilstrom-transformer', DataTransformers.HillstromTransformer()),
    ('two-models', two_models_model)
])

In [None]:
%%time

two_models_pipeline.fit(X_train, y_train, t_train)

In [None]:
models_w_preds = (
    ('s-learner', s_learner_pipeline.predict(X_test)),
    ('two-models', two_models_pipeline.predict(X_test))
)

In [None]:
res = upliftComparingHist(
    model_name_1=models_w_preds[0][0],
    model_predictions_1=models_w_preds[0][1],
    model_name_2=models_w_preds[1][0],
    model_predictions_2=models_w_preds[1][1]
)

In [None]:
scoreUpliftAtK(
    models_w_preds,
    y_test,
    t_test
)

# ✅ Протестируем Causal-ML

In [None]:
s_learner_model = CausalSoloModel(CatBoostClassifier(logging_level='Silent'))

s_learner_pipeline = BasePipeline.BasePipeline([
    ('hilstrom-transformer', DataTransformers.HillstromTransformer()),
    ('slearner', s_learner_model)
])

In [None]:
%%time

s_learner_pipeline.fit(X_train, y_train, t_train)

In [None]:
two_models_model = CausalTwoModels(
    CatBoostClassifier(logging_level='Silent'),
    CatBoostClassifier(logging_level='Silent')
)

two_models_pipeline = BasePipeline.BasePipeline([
    ('hilstrom-transformer', DataTransformers.HillstromTransformer()),
    ('two-models', two_models_model)
])

In [None]:
%%time

two_models_pipeline.fit(X_train, y_train, t_train)

In [None]:
models_w_preds = (
    ('s-learner', s_learner_pipeline.predict(X_test).flatten()),
    ('two-models', two_models_pipeline.predict(X_test).flatten())
)

In [None]:
res = upliftComparingHist(
    model_name_1=models_w_preds[0][0],
    model_predictions_1=models_w_preds[0][1],
    model_name_2=models_w_preds[1][0],
    model_predictions_2=models_w_preds[1][1]
)

In [None]:
scoreUpliftAtK(
    models_w_preds,
    y_test,
    t_test
)