In [16]:
import pandas as pd
import joblib
from sklearn.linear_model import LogisticRegression

# Import the balanced dataset

path = "/Users/rohityadav/Desktop/Git Projects/ml-uplift-modeling-criteo/data/criteo-uplift-v2.1-100K-balanced.csv"
df = pd.read_csv(path)

# Define X,Y and T

feature_names = [c for c in df.columns if c.startswith("f")]
X = df[feature_names]
Y = df['visit'].astype(int)
T = df['treatment'].astype(int)

# Fit treated/control models

model_treated = LogisticRegression(max_iter = 2000)
model_control = LogisticRegression(max_iter = 2000)

model_treated.fit(X[T == 1], Y[T == 1])
model_control.fit(X[T == 0], Y[T == 0])

bundle = {
    'model_treated': model_treated,
    'model_control': model_control,
    'feature_names': feature_names
}

joblib.dump(bundle, '/Users/rohityadav/Desktop/Git Projects/ml-uplift-modeling-criteo/lambda_app/models/uplift_tlearner_bundle.joblib')
print('Saved -> uplift_tlearner_bundle.joblib')
print('Feature Names:', feature_names)

Saved -> uplift_tlearner_bundle.joblib
Feature Names: ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11']


In [17]:
# Quick local interference test (pre deployment)

import numpy as np

loaded = joblib.load('/Users/rohityadav/Desktop/Git Projects/ml-uplift-modeling-criteo/lambda_app/models/uplift_tlearner_bundle.joblib')
m1 = loaded['model_treated']
m0 = loaded['model_control']
cols = loaded['feature_names']

# Take 3 random rows as a test

sample = df.sample(3, random_state = 42)[cols]

p1 = m1.predict_proba(sample)[:, 1]
p0 = m0.predict_proba(sample)[:, 1]

uplift_test = p1 - p0

print('p_treated:', p1)
print('p_control:', p0)
print('Uplift:', uplift_test)

p_treated: [0.00963212 0.17479461 0.00991838]
p_control: [0.00844937 0.08550043 0.01129416]
Uplift: [ 0.00118275  0.08929418 -0.00137578]
