In [97]:
# Dataset: https://www.kaggle.com/datasets/faviovaz/marketing-ab-testing
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

In [99]:
df =  pd.read_csv(r"marketing_AB.csv")

In [100]:
df.columns

Index(['Unnamed: 0', 'user id', 'test group', 'converted', 'total ads',
       'most ads day', 'most ads hour'],
      dtype='object')

In [101]:
df.drop(columns=['Unnamed: 0','user id'],inplace=True)

In [102]:
df.isna().sum()

test group       0
converted        0
total ads        0
most ads day     0
most ads hour    0
dtype: int64

In [103]:
df['test group'].unique()

array(['ad', 'psa'], dtype=object)

In [111]:
# Group of people who saw the public service announcement
control_group = df[df['test group']=='psa']

In [113]:
experiment_group = df[df['test group']=='ad'] # Group of people who saw the ad

To apply a t-test for the two groups, the following conditions must be met:
* The two groups are independent.
* The data in each group are approximately normally distributed.
* Variances between the groups are equal (though a Welch's t-test can be used if variances are unequal).

The two groups are clearly independent given that they are different test groups. Moreover, by the Central Limit Theorem, the two groups are approximately normal. We test to see if the variances are equal. If not, we will apply Welch's t-test to account for this fact.

In [116]:
# Extract 'converted' column for both groups
control_group = df[df['test group'] == 'psa']['converted'].astype(int)
experiment_group = df[df['test group'] == 'ad']['converted'].astype(int)

# Perform Levene's test
stat, p_value = levene(control_group, experiment_group)

print(f"Levene’s Test Statistic: {stat}")
print(f"P-Value: {p_value}")

if p_value < 0.05:
    print("Variances are significantly different (heterogeneous variances).")
else:
    print("No significant difference in variances (homogeneous variances).")

Levene’s Test Statistic: 54.322884225785735
P-Value: 1.703305262368155e-13
Variances are significantly different (heterogeneous variances).


Since the variances are significantly different, we use Welch's t-test.

In [119]:
# Extract the 'converted' column for each group
control_group = df[df['test group'] == 'psa']['converted'].astype(int)  # Ensure binary values are 0/1
experiment_group = df[df['test group'] == 'ad']['converted'].astype(int)

# Perform an independent t-test
t_stat, p_value = ttest_ind(control_group, experiment_group, equal_var=False)  # Use Welch's t-test

print(f"T-Statistic: {t_stat}")
print(f"P-Value: {p_value}")

if p_value < 0.05:
    print("Reject the null hypothesis: Significant difference in conversion rates between groups.")
else:
    print("Fail to reject the null hypothesis: No significant difference in conversion rates between groups.")

T-Statistic: -8.657162314551679
P-Value: 5.107607598758945e-18
Reject the null hypothesis: Significant difference in conversion rates between groups.


****We conclude that there is a significant difference between the conversion rate of those who saw the psa and those who saw the ad. This suggests that the ad is more effective at converting customers and could be prioritized in future campaigns.****