In [2]:
!pip install lightgbm


Defaulting to user installation because normal site-packages is not writeable
Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Downloading lightgbm-4.6.0-py3-none-win_amd64.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ------- -------------------------------- 0.3/1.5 MB ? eta -:--:--
   ------- -------------------------------- 0.3/1.5 MB ? eta -:--:--
   -------------- ------------------------- 0.5/1.5 MB 524.3 kB/s eta 0:00:02
   --------------------- ------------------ 0.8/1.5 MB 729.2 kB/s eta 0:00:01
   --------------------- ------------------ 0.8/1.5 MB 729.2 kB/s eta 0:00:01
   ---------------------------- ----------- 1.0/1.5 MB 68

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import lightgbm as lgb

# Load your dataset
data = pd.read_csv('criteo-uplift-v2.1.csv')  # Replace with your actual dataset path

# Define features and target variable
features = data.drop(columns=['conversion'])  # All columns except 'conversion'
target = data['conversion']  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [4]:
# Define preprocessing steps
numeric_features = features.select_dtypes(include=['float64', 'int64']).columns.tolist()
categorical_features = features.select_dtypes(include=['object']).columns.tolist()

numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create the model pipeline
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', lgb.LGBMRegressor(random_state=42))
])

# Train the model
model_pipeline.fit(X_train, y_train)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.474412 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1578
[LightGBM] [Info] Number of data points in the train set: 11183673, number of used features: 15
[LightGBM] [Info] Start training from score 0.002909


In [6]:
def simulate_changes(model, original_data, changes):
    """
    Simulate changes in the features and predict new conversion rates.
    
    Parameters:
    - model: Trained model pipeline
    - original_data: Original feature data (DataFrame)
    - changes: Dictionary of changes to apply to the features
    
    Returns:
    - new_conversion_rates: Predicted conversion rates after changes
    """
    # Create a copy of the original data to apply changes
    simulated_data = original_data.copy()
    
    # Apply changes to the specified features
    for feature, change in changes.items():
        if feature in simulated_data.columns:
            simulated_data[feature] += change
    
    # Predict new conversion rates
    new_conversion_rates = model.predict(simulated_data)
    return new_conversion_rates

# Example changes to simulate
hypothetical_changes = {
    'f0': 1,  # Increase f0 by 1
    'f1': -0.5,  # Decrease f1 by 0.5
    'f2': 2,  # Increase f2 by 2
    # Add more changes as needed
}

# Run simulation
predicted_conversion_rates = simulate_changes(model_pipeline, X_test, hypothetical_changes)

# Display the predicted conversion rates
print(predicted_conversion_rates)

[7.59348314e-06 7.59348314e-06 7.59348314e-06 ... 7.59348314e-06
 4.20818732e-02 7.59348314e-06]


In [7]:
# Calculate average conversion rates
original_average = y_test.mean()
simulated_average = predicted_conversion_rates.mean()

print(f"Original Average Conversion Rate: {original_average:.4f}")
print(f"Simulated Average Conversion Rate: {simulated_average:.4f}")

Original Average Conversion Rate: 0.0029
Simulated Average Conversion Rate: 0.0034
