In [1]:
import pandas as pd
import matplotlib.pyplot as plt

# загружаем данные с предсказанными значениями uplift
uplift_data = pd.read_csv('ab_results.csv')  # замените на путь к вашему файлу

In [2]:
print(uplift_data)

        usage_duration  number_of_bank_cards  money_spent_amount  \
0             4.675074                     1       463182.628027   
1             4.315465                     1       379881.205811   
2             5.135106                     1       528345.551898   
3             4.357843                     1       568219.437346   
4             4.518232                     1       340441.730023   
...                ...                   ...                 ...   
399995        6.591719                     1       529039.127112   
399996        1.389599                     1       525259.159120   
399997        4.220414                     1       519899.700921   
399998        6.497928                     1       548193.088969   
399999        3.576213                     2       371420.902973   

        visit_frequency  number_of_visited_shops  \
0              6.782820                        4   
1              6.947705                        3   
2              8.665648    

In [3]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklift.metrics import uplift_auc_score, qini_auc_score
from catboost import CatBoostClassifier

data = pd.read_csv("ab_results.csv")

# разделим данные на признаки и целевую переменную
X = data.drop(['target'], axis=1)  # все столбцы, кроме целевой переменной
y = data['target']  # целевая переменная (например, количество поездок)

# Разделим данные на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                    stratify=data[['target', 'treatment']],
                                    random_state=42)

# 1. Разделяем обучающую выборку на treatment=1 и treatment=0
X_treat = X_train[X_train['treatment'] == 1].drop('treatment', axis=1)
y_treat = y_train[X_train['treatment'] == 1]

X_control = X_train[X_train['treatment'] == 0].drop('treatment', axis=1)
y_control = y_train[X_train['treatment'] == 0]

# 2. Обучаем outcome-модели для treatment и control
outcome_treat = CatBoostClassifier(random_state=13, iterations=100, verbose=0)
outcome_control = CatBoostClassifier(random_state=13, iterations=100, verbose=0)

outcome_treat.fit(X_treat, y_treat)
outcome_control.fit(X_control, y_control)

# 3. Строим «псевдотаргеты» (imputed treatment effects)
# Для объектов из treatment-группы: tau0 = y1 - M0(x)
mu0_treat = outcome_control.predict_proba(X_treat)[:, 1]
tau0 = y_treat.values - mu0_treat

# Для объектов из контрольной группы: tau1 = M1(x) - y0
mu1_control = outcome_treat.predict_proba(X_control)[:, 1]
tau1 = mu1_control - y_control.values

# 4. Обучаем effect-модели на псевдотаргетах
effect_0 = LinearRegression()
effect_1 = LinearRegression()
effect_0.fit(X_control, tau1)
effect_1.fit(X_treat, tau0)

# 5. Оцениваем склонность к получению воздействия
propensity = CatBoostClassifier(random_state=13, iterations=100, verbose=0)
propensity.fit(X_train.drop('treatment', axis=1), X_train['treatment'])
e_test = propensity.predict_proba(X_test.drop('treatment', axis=1))[:, 1]

# 6. Получаем предсказания uplift для тестовой выборки
X_test_features = X_test.drop('treatment', axis=1)
tau0_pred = effect_0.predict(X_test_features)
tau1_pred = effect_1.predict(X_test_features)

uplift_pred = e_test * tau1_pred + (1 - e_test) * tau0_pred

# 7. Оценка метрик uplift
uplift_score = uplift_auc_score(
    y_test.values,
    uplift_pred,
    X_test['treatment'].values
)
qini_score = qini_auc_score(
    y_test.values,
    uplift_pred,
    X_test['treatment'].values
)

print(f"Uplift AUC: {uplift_score:.2f}")
print(f"Qini AUC: {qini_score:.2f}")

Uplift AUC: 0.16
Qini AUC: 0.20
