Predict the likelihood of a user clicking on an ad

In [7]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Create a Pandas dataframe with the sample data
df = pd.DataFrame({
    'age': [25, 35, 45, 28, 42, 33],
    'income': [50000, 75000, 90000, 60000, 85000, 70000],
    'gender': ['Male', 'Female', 'Male', 'Female', 'Male', 'Female'],
    'interests': ['Sports', 'Music', 'Travel', 'Fashion', 'Sports', 'Travel'],
    'clicked_on_ad': [True, False, False, True, False, True]
})

# Label encode the categorical variables
encoder = LabelEncoder()
df['gender'] = encoder.fit_transform(df['gender'])
df['interests'] = encoder.fit_transform(df['interests'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop('clicked_on_ad', axis=1), df['clicked_on_ad'], test_size=0.2)

# Build a logistic regression model
lr = LogisticRegression()
lr.fit(X_train, y_train)

# Predict the likelihood of a user clicking on an ad
y_pred = lr.predict_proba(X_test)[:, 1]

# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred.round())
print('Accuracy:', accuracy)


Accuracy: 0.5


MTA

In [1]:
import pandas as pd

# Load sample data into a Pandas dataframe
df = pd.DataFrame({
    'user_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'source': ['Facebook', 'Facebook', 'Google', 'Google', 'Google', 'Twitter', 'Twitter', 'Twitter', 'Instagram', 'Instagram'],
    'conversion': [False, True, False, True, False, True, False, True, False, True]
})

# First-touch attribution model
first_touch = df.drop_duplicates(subset=['user_id'], keep='first')
first_touch_conversions = first_touch.groupby('source')['conversion'].sum()
first_touch_conversion_rate = first_touch_conversions / len(first_touch)

print('First-touch attribution model:')
print(first_touch_conversion_rate)

# Last-touch attribution model
last_touch = df.drop_duplicates(subset=['user_id'], keep='last')
last_touch_conversions = last_touch.groupby('source')['conversion'].sum()
last_touch_conversion_rate = last_touch_conversions / len(last_touch)

print('\nLast-touch attribution model:')
print(last_touch_conversion_rate)


First-touch attribution model:
source
Facebook     0.1
Google       0.1
Instagram    0.1
Twitter      0.2
Name: conversion, dtype: float64

Last-touch attribution model:
source
Facebook     0.1
Google       0.1
Instagram    0.1
Twitter      0.2
Name: conversion, dtype: float64


Incrementality-based Attribution

In [3]:
import pandas as pd
from sklearn.linear_model import LogisticRegression

# Define the ad data as a Pandas DataFrame
data = pd.DataFrame({
    'treatment': [0, 0, 1, 1, 1],
    'converted': [0, 0, 0, 1, 0],
    'revenue': [0, 0, 0, 100, 0],
    'cost': [50, 60, 70, 80, 75]
})

# Split data into treatment and control groups
treatment = data[data['treatment'] == 1]
control = data[data['treatment'] == 0]

# Compute the conversion rates for treatment and control groups
conversion_rate_treatment = treatment['converted'].mean()
conversion_rate_control = control['converted'].mean()

# Compute the incremental conversion rate and lift
incremental_conversion_rate = conversion_rate_treatment - conversion_rate_control
incremental_lift = incremental_conversion_rate / conversion_rate_control

# Train a logistic regression model to estimate incremental conversion probability
model = LogisticRegression(random_state=0).fit(data[['treatment']], data['converted'])

# Compute the incremental conversions and revenue
data['prob_control'] = model.predict_proba(control[['treatment']])[:, 1]
data['prob_treatment'] = model.predict_proba(treatment[['treatment']])[:, 1]
data['incremental_conversions'] = data['prob_treatment'] - data['prob_control']
data['incremental_revenue'] = data['incremental_conversions'] * data['revenue']

# Compute the incremental ROI
incremental_profit = data['incremental_revenue'].sum() - treatment['cost'].sum()
incremental_roi = incremental_profit / treatment['cost'].sum()

print('Incremental conversion rate:', incremental_conversion_rate)
print('Incremental lift:', incremental_lift)
print('Incremental ROI:', incremental_roi)




ValueError: Length of values (2) does not match length of index (5)