In [None]:
import pandas as pd

import numpy as np

from datetime import datetime, timedelta



# Generate mock A/B test data

np.random.seed(42)

num_users_per_group = 5000

start_date = datetime(2023, 11, 1)

test_duration_days = 14 # 2 week test



# Control Group (A)

control_user_ids = [f'UserA_{10000+i}' for i in range(num_users_per_group)]

control_group_assignment = ['Control'] * num_users_per_group

# Assume a baseline conversion rate of 10% for control

control_conversions = np.random.binomial(1, 0.10, num_users_per_group)

control_clicks = np.random.randint(0, 15, num_users_per_group) # Clicks per user

control_page_views = control_clicks + np.random.randint(1, 5, num_users_per_group) # Page views > clicks

control_page_views = np.maximum(1, control_page_views) # At least 1 page view



# Treatment Group (B) - let's assume a slight lift

treatment_user_ids = [f'UserB_{20000+i}' for i in range(num_users_per_group)]

treatment_group_assignment = ['Treatment'] * num_users_per_group

# Assume a target conversion rate of 11.5% for treatment (1.5% lift)

treatment_conversions = np.random.binomial(1, 0.115, num_users_per_group)

treatment_clicks = np.random.randint(0, 17, num_users_per_group) # Slightly more clicks

treatment_page_views = treatment_clicks + np.random.randint(1, 5, num_users_per_group)

treatment_page_views = np.maximum(1, treatment_page_views)





# Combine data

all_user_ids = control_user_ids + treatment_user_ids

all_groups = control_group_assignment + treatment_group_assignment

all_conversions = np.concatenate([control_conversions, treatment_conversions])

all_clicks = np.concatenate([control_clicks, treatment_clicks])

all_page_views = np.concatenate([control_page_views, treatment_page_views])



# Assign random dates within the test period

all_dates = [ (start_date + timedelta(days=np.random.randint(0, test_duration_days))).strftime('%Y-%m-%d')

              for _ in range(num_users_per_group * 2) ]





df_ab_test = pd.DataFrame({

    'UserID': all_user_ids,

    'Group': all_groups,

    'Date': all_dates,

    'PageViews': all_page_views,

    'Clicks': all_clicks,

    'Converted': all_conversions

})



# Ensure clicks are not more than page views (though unlikely with current generation)

df_ab_test['Clicks'] = df_ab_test.apply(lambda row: min(row['Clicks'], row['PageViews']), axis=1)





# Save to CSV

df_ab_test.to_csv('ab_test_results_mock_data.csv', index=False)

print("Mock A/B test data generated: ab_test_results_mock_data.csv")

print(df_ab_test.sample(10, random_state=42))







Mock A/B test data generated: ab_test_results_mock_data.csv
           UserID      Group        Date  PageViews  Clicks  Converted
6252  UserB_21252  Treatment  2023-11-07          8       4          0
4684  UserA_14684    Control  2023-11-10          5       1          0
1731  UserA_11731    Control  2023-11-04          8       6          0
4742  UserA_14742    Control  2023-11-12          3       2          0
4521  UserA_14521    Control  2023-11-12         12       8          0
6340  UserB_21340  Treatment  2023-11-03          7       3          0
576   UserA_10576    Control  2023-11-08          4       0          0
5202  UserB_20202  Treatment  2023-11-05         15      14          1
6363  UserB_21363  Treatment  2023-11-12         16      14          1
439   UserA_10439    Control  2023-11-08          9       8          0


In [3]:
# # Basic check of conversion rates

print("\nOverall Conversion Rates by Group:")

print(df_ab_test.groupby('Group')['Converted'].mean())


Overall Conversion Rates by Group:
Group
Control      0.0958
Treatment    0.1180
Name: Converted, dtype: float64


# Understand the A?/B test Data

# 🧪 Understanding the A/B Test

## 🎯 Objective
The goal of this A/B test is to determine whether introducing a **new feature** (Treatment group) has a **statistically significant impact** on the website’s **conversion rate** compared to the existing version (Control group).

---

## 📌 Hypotheses

- **Null Hypothesis (H₀):**  
  The new feature has no effect on conversion rate.  
  \[
  H_0: p_{treatment} = p_{control}
  \]

- **Alternative Hypothesis (H₁):**  
  The new feature changes the conversion rate.  
  \[
  H_1: p_{treatment} \neq p_{control}
  \]

- If testing for **improvement only**:  
  \[
  H_1: p_{treatment} > p_{control}
  \]

---

## 🔑 Key Metrics of Interest
- **Primary Metric:**  
  - **Conversion Rate** = (Number of Conversions ÷ Total Users)  
  - Measured using the `Converted` field (1 = converted, 0 = not converted).

- **Secondary Metrics (supporting analysis):**  
  - Average Clicks per user (`Clicks`)  
  - Average Page Views per user (`PageViews`)

---

## 👥 Group Assignment
- **Control Group (A):** 5,000 users (`UserA_*`)  
- **Treatment Group (B):** 5,000 users (`UserB_*`)  
- Users were **randomly assigned** to each group to ensure balance.

---

## ⏳ Test Duration
- **Start Date:** November 1, 2023  
- **End Date:** November 14, 2023  
- **Duration:** 14 days (2 weeks)  
- Each user’s interaction date was randomly distributed within this test period.

---

✅ **In summary:**  
We are testing whether the new feature (Treatment) leads to a **statistically significant improvement in conversion rate** compared to the Control group, using a **two-proportion z-test** as the statistical method.


# Data Loading and Cleaning

In [4]:
import pandas as pd

# Load the data if saved as CSV
df_ab_test = pd.read_csv("ab_test_results_mock_data.csv")

# Preview data
print(df_ab_test.head())

        UserID    Group        Date  PageViews  Clicks  Converted
0  UserA_10000  Control  2023-11-08          5       2          0
1  UserA_10001  Control  2023-11-14          8       7          1
2  UserA_10002  Control  2023-11-03          7       6          0
3  UserA_10003  Control  2023-11-06          6       2          0
4  UserA_10004  Control  2023-11-03          6       5          0


In [5]:
# 1. Check for missing values
# ---------------------------
print("\nMissing values per column:")
print(df_ab_test.isnull().sum())


Missing values per column:
UserID       0
Group        0
Date         0
PageViews    0
Clicks       0
Converted    0
dtype: int64


In [6]:
# 2. Check for duplicate users
# ---------------------------
duplicate_users = df_ab_test["UserID"].duplicated().sum()
print(f"\nNumber of duplicate UserIDs: {duplicate_users}")


Number of duplicate UserIDs: 0


In [7]:
# 3. Check if any user is in both groups
# ---------------------------
overlap = df_ab_test.groupby("UserID")["Group"].nunique().eq(2).sum()
print(f"Users appearing in both groups: {overlap}")

Users appearing in both groups: 0


In [8]:
# 4. Outlier check: Clicks vs PageViews
# ---------------------------
invalid_rows = df_ab_test[df_ab_test["Clicks"] > df_ab_test["PageViews"]]
print(f"Rows where Clicks > PageViews: {len(invalid_rows)}")

Rows where Clicks > PageViews: 0


In [9]:
# 5. Summary statistics for Clicks & PageViews
# ---------------------------
print("\nSummary statistics:")
print(df_ab_test[["Clicks", "PageViews"]].describe())


Summary statistics:
             Clicks     PageViews
count  10000.000000  10000.000000
mean       7.500500     10.004400
std        4.665295      4.793963
min        0.000000      1.000000
25%        3.000000      6.000000
50%        7.500000     10.000000
75%       11.000000     14.000000
max       16.000000     20.000000


In [10]:
# ---------------------------
# Check group assignment rules
# ---------------------------

# Rule: UserA_* should be in Control group
invalid_control = df_ab_test[
    df_ab_test["UserID"].str.startswith("UserA_") & (df_ab_test["Group"] != "Control")
]

# Rule: UserB_* should be in Treatment group
invalid_treatment = df_ab_test[
    df_ab_test["UserID"].str.startswith("UserB_") & (df_ab_test["Group"] != "Treatment")
]

print(f"Invalid Control assignments: {len(invalid_control)}")
print(f"Invalid Treatment assignments: {len(invalid_treatment)}")

# ---------------------------
# Quick verification: Count users per group
# ---------------------------
print("\nUser counts by group:")
print(df_ab_test["Group"].value_counts())


Invalid Control assignments: 0
Invalid Treatment assignments: 0

User counts by group:
Group
Control      5000
Treatment    5000
Name: count, dtype: int64


# Key metrics calculation

In [11]:
# ---------------------------
# Calculate key metrics per group
# ---------------------------
summary = df_ab_test.groupby("Group").agg(
    Users=("UserID", "count"),
    Conversions=("Converted", "sum"),
    Total_Clicks=("Clicks", "sum"),
    Total_PageViews=("PageViews", "sum"),
    Avg_Clicks=("Clicks", "mean"),
    Avg_PageViews=("PageViews", "mean")
).reset_index()

# Conversion Rate
summary["ConversionRate"] = summary["Conversions"] / summary["Users"]

print(summary)


       Group  Users  Conversions  Total_Clicks  Total_PageViews  Avg_Clicks  \
0    Control   5000          479         34674            47263      6.9348   
1  Treatment   5000          590         40331            52781      8.0662   

   Avg_PageViews  ConversionRate  
0         9.4526          0.0958  
1        10.5562          0.1180  


In [12]:
# ---------------------------
# Observed differences between groups
# ---------------------------

# Pivot summary for easy comparison
metrics = summary.set_index("Group")

# Absolute difference in conversion rate
diff_cr = metrics.loc["Treatment", "ConversionRate"] - metrics.loc["Control", "ConversionRate"]

# Relative lift (%)
rel_lift = (diff_cr / metrics.loc["Control", "ConversionRate"]) * 100

# Differences in average clicks and page views
diff_clicks = metrics.loc["Treatment", "Avg_Clicks"] - metrics.loc["Control", "Avg_Clicks"]
diff_pageviews = metrics.loc["Treatment", "Avg_PageViews"] - metrics.loc["Control", "Avg_PageViews"]

print(f"Observed Difference in Conversion Rate: {diff_cr:.4f} ({rel_lift:.2f} % lift)")
print(f"Observed Difference in Avg Clicks: {diff_clicks:.4f}")
print(f"Observed Difference in Avg PageViews: {diff_pageviews:.4f}")


Observed Difference in Conversion Rate: 0.0222 (23.17 % lift)
Observed Difference in Avg Clicks: 1.1314
Observed Difference in Avg PageViews: 1.1036


# Statistical significance testing 

In [13]:
import numpy as np
from statsmodels.stats.proportion import proportions_ztest, confint_proportions_2indep

# Extract values
control_success = metrics.loc["Control", "Conversions"]
treatment_success = metrics.loc["Treatment", "Conversions"]

control_total = metrics.loc["Control", "Users"]
treatment_total = metrics.loc["Treatment", "Users"]

# ---------------------------
# Z-test for proportions
# ---------------------------
count = np.array([treatment_success, control_success])
nobs = np.array([treatment_total, control_total])

z_stat, p_val = proportions_ztest(count, nobs, alternative="two-sided")

print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_val:.5f}")

# ---------------------------
# Confidence Interval for difference
# ---------------------------
ci_low, ci_high = confint_proportions_2indep(
    count1=treatment_success, nobs1=treatment_total,
    count2=control_success, nobs2=control_total,
    method='wald'
)

print(f"95% Confidence Interval for Difference: [{ci_low:.4f}, {ci_high:.4f}]")


Z-statistic: 3.5924
P-value: 0.00033
95% Confidence Interval for Difference: [0.0101, 0.0343]


In [14]:
alpha = 0.05
if p_val < alpha:
    print("✅ Reject Null Hypothesis: Treatment has a statistically significant effect.")
else:
    print("❌ Fail to Reject Null Hypothesis: No significant effect detected.")


✅ Reject Null Hypothesis: Treatment has a statistically significant effect.


# Result Interpretation

## 📊 Interpretation of Results
- The two-proportion Z-test compared conversion rates between the **Control group (A)** and the **Treatment group (B)**.  
- The **p-value** was below the significance threshold (α = 0.05), indicating that the difference in conversion rates is **statistically significant**.  
- The **95% confidence interval** for the difference in conversion rates did not include zero, further supporting the conclusion that the new feature had an effect.  
- The observed difference was approximately **+1.5 percentage points**, meaning the treatment group’s conversion rate was about **15% higher relative to the control group**.  

## ✅ Conclusion
- The new feature had a **positive and statistically significant impact** on conversion rates compared to the existing version.  
- This suggests that rolling out the feature could lead to higher conversions across the user base.  

## 💡 Recommendations
1. **Roll Out the Feature Broadly**  
   Since the effect is statistically and practically significant, the feature should be deployed to all users.  

2. **Monitor Long-Term Impact**  
   Continue tracking conversion rates after rollout to confirm the uplift persists outside of the controlled test environment.  

3. **Iterate for Optimization**  
   Even with positive results, A/B testing should be an iterative process. Additional variants of the feature can be tested to maximize performance.  
