In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

# Load the dataset
df = pd.read_csv('ab_data.csv')

print("--- Initial Data Inspection ---")
print(f"Dataset shape: {df.shape}")
print(f"Number of unique users: {df.user_id.nunique()}")
print(df.head())

# --- Data Cleaning ---

# Check for users who are in the treatment group but saw the old page, and vice versa
mismatched_users = df[((df['group'] == 'treatment') & (df['landing_page'] == 'old_page')) |
                      ((df['group'] == 'control') & (df['landing_page'] == 'new_page'))]

print(f"\nFound {len(mismatched_users)} mismatched rows to remove.")

# Remove these mismatched rows
df.drop(mismatched_users.index, inplace=True)

# Check for and remove any duplicate user_id entries
duplicate_users = df[df.duplicated('user_id', keep=False)]
print(f"Found {len(duplicate_users)} duplicate user entries to remove.")
df.drop_duplicates(subset='user_id', keep='first', inplace=True)


print("\n--- Data Cleaning Complete ---")
print(f"Final dataset shape: {df.shape}")

--- Initial Data Inspection ---
Dataset shape: (294478, 5)
Number of unique users: 290584
   user_id                   timestamp      group landing_page  converted
0   851104  2017-01-21 22:11:48.556739    control     old_page          0
1   804228  2017-01-12 08:01:45.159739    control     old_page          0
2   661590  2017-01-11 16:55:06.154213  treatment     new_page          0
3   853541  2017-01-08 18:28:03.143765  treatment     new_page          0
4   864975  2017-01-21 01:52:26.210827    control     old_page          1

Found 3893 mismatched rows to remove.
Found 2 duplicate user entries to remove.

--- Data Cleaning Complete ---
Final dataset shape: (290584, 5)


In [2]:
# Separate the data into control and treatment groups
control_group = df[df['group'] == 'control']
treatment_group = df[df['group'] == 'treatment']

# Calculate key metrics for each group
control_users = len(control_group)
control_conversions = control_group['converted'].sum()
control_conversion_rate = control_conversions / control_users

treatment_users = len(treatment_group)
treatment_conversions = treatment_group['converted'].sum()
treatment_conversion_rate = treatment_conversions / treatment_users

print("--- A/B Test Metrics ---")
print(f"Control Group Users: {control_users}")
print(f"Control Group Conversions: {control_conversions}")
print(f"Control Group Conversion Rate: {control_conversion_rate:.4f}\n")

print(f"Treatment Group Users: {treatment_users}")
print(f"Treatment Group Conversions: {treatment_conversions}")
print(f"Treatment Group Conversion Rate: {treatment_conversion_rate:.4f}")

--- A/B Test Metrics ---
Control Group Users: 145274
Control Group Conversions: 17489
Control Group Conversion Rate: 0.1204

Treatment Group Users: 145310
Treatment Group Conversions: 17264
Treatment Group Conversion Rate: 0.1188


In [3]:
# Prepare the data for the Z-test
conversions = np.array([treatment_conversions, control_conversions])
nobs = np.array([treatment_users, control_users]) # nobs = number of observations

# Perform the Z-test
z_stat, p_value = proportions_ztest(conversions, nobs=nobs, alternative='two-sided')

print("--- Statistical Test Results ---")
print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")

--- Statistical Test Results ---
Z-statistic: -1.3109
P-value: 0.1899


In [4]:
alpha = 0.05

print("--- Conclusion ---")
if p_value <= alpha:
    print("Result: Reject the Null Hypothesis.")
    print("Conclusion: The new design has a statistically significant different conversion rate compared to the old design.")
else:
    print("Result: Fail to Reject the Null Hypothesis.")
    print("Conclusion: We do not have enough evidence to say that the new design has a different conversion rate. The observed difference is likely due to random chance.")

--- Conclusion ---
Result: Fail to Reject the Null Hypothesis.
Conclusion: We do not have enough evidence to say that the new design has a different conversion rate. The observed difference is likely due to random chance.
