In [38]:
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest

import warnings
warnings.filterwarnings("ignore")


# Load CSV
df = pd.read_csv("marketing_AB.csv")

# Optional: Drop index column
df.drop(columns=['Unnamed: 0'], inplace=True)

# Clean column names (strip spaces)
df.columns = df.columns.str.strip()

# Rename 'test group' to 'group' for convenience
df.rename(columns={'test group': 'group'}, inplace=True)

# Check for unique groups
print("Groups in dataset:", df['group'].unique())
print("Sample:\n", df.head())


Groups in dataset: ['ad' 'psa']
Sample:
    user id group  converted  total ads most ads day  most ads hour
0  1069124    ad      False        130       Monday             20
1  1119715    ad      False         93      Tuesday             22
2  1144181    ad      False         21      Tuesday             18
3  1435133    ad      False        355      Tuesday             10
4  1015700    ad      False        276       Friday             14


In [41]:
# Conversion rate by group
conversion_rate = df.groupby('group')['converted'].mean()
print("\nConversion Rate:\n", conversion_rate)



Conversion Rate:
 group
ad     0.025547
psa    0.017854
Name: converted, dtype: float64


In [44]:
#Performing A/B Hypothesis Test

In [46]:
# Counts for each group
control_converted = df[df['group'] == 'control']['converted'].sum()
control_total = df[df['group'] == 'control']['converted'].count()

treatment_converted = df[df['group'] == 'treatment']['converted'].sum()
treatment_total = df[df['group'] == 'treatment']['converted'].count()

# Z-test
z_stat, p_val = proportions_ztest(
    [treatment_converted, control_converted],
    [treatment_total, control_total]
)

print(f"\nZ-Statistic: {z_stat:.3f}")
print(f"P-Value: {p_val:.4f}")



Z-Statistic: nan
P-Value: nan


In [49]:
#Exporting as Cleaned CSV

In [51]:
df.to_csv("campaign_analysis_cleaned.csv", index=False)