In [1]:
#Project: A/B Testing for Homepage Promotion Optimization
#Module: Data Simulation for A/B Experiment

#Description:
#This script simulates user-level exposure data for a homepage promotion A/B test
in a mobile coffee ordering app. The data-generating process reflects realistic
business assumptions, including user segmentation, click-through behavior,
and conversion dynamics.

#Author: Nan Zhang


In [None]:
import numpy as np
import pandas as pd

In [2]:

#1.Experiment parameters

N_USERS = 30000                   # total users
RANDOM_SEED = 42

BASELINE_CTR_A = 0.085            # Control click-through rate
BASELINE_CTR_B = 0.10             # Treatment click-through rate

np.random.seed(RANDOM_SEED)


In [3]:
#2.Information about users

#IDs
user_id = np.arange(1, N_USERS + 1)

#Experiment group (50/50 split)
experiment_group = np.random.choice(
    ['A', 'B'],
    size=N_USERS,
    p=[0.5, 0.5]
)

#User type
user_type = np.random.choice(
    ['new', 'returning'],
    size=N_USERS,
    p=[0.4, 0.6]
)

#Time slot
time_slot = np.random.choice(
    ['morning', 'afternoon', 'evening'],
    size=N_USERS,
    p=[0.35, 0.40, 0.25]
)


In [4]:
#3.Mock real click
def get_click_prob(group):
    return BASELINE_CTR_A if group == 'A' else BASELINE_CTR_B

click_prob = np.array([get_click_prob(g) for g in experiment_group])
is_clicked = np.random.binomial(1, click_prob)


In [5]:
#4.Mock conversional rate
def conversion_probability(group, user_type, time_slot):
    base = 0.20 if group == 'A' else 0.24   # B 组更高转化效率
    
    if user_type == 'new':
        base -= 0.05
    if time_slot == 'morning':
        base += 0.03
    elif time_slot == 'evening':
        base += 0.01
    
    return min(max(base, 0.01), 0.95)


In [6]:
conversion_prob = np.array([
    conversion_probability(g, u, t)
    for g, u, t in zip(experiment_group, user_type, time_slot)
])

#Only clicked users can convert
is_converted = np.where(
    is_clicked == 1,
    np.random.binomial(1, conversion_prob),
    0
)


In [7]:
#5.order amount and subsidy

order_amount = np.where(
    is_converted == 1,
    np.round(np.random.normal(loc=22, scale=4, size=N_USERS), 2),
    0
)

subsidy = np.where(
    is_converted == 1,
    np.round(np.random.uniform(2, 5, size=N_USERS), 2),
    0
)


In [8]:
#6.generate table
df = pd.DataFrame({
    'user_id': user_id,
    'experiment_group': experiment_group,
    'user_type': user_type,
    'time_slot': time_slot,
    'is_clicked': is_clicked,
    'is_converted': is_converted,
    'order_amount': order_amount,
    'subsidy': subsidy
})

df.head()


Unnamed: 0,user_id,experiment_group,user_type,time_slot,is_clicked,is_converted,order_amount,subsidy
0,1,A,returning,afternoon,0,0,0.0,0.0
1,2,B,returning,evening,1,0,0.0,0.0
2,3,B,returning,afternoon,0,0,0.0,0.0
3,4,B,new,morning,0,0,0.0,0.0
4,5,A,returning,morning,0,0,0.0,0.0


In [10]:

df.to_csv('luckin_ab_test_simulated_data.csv', index=False)



In [12]:
from statsmodels.stats.proportion import proportions_ztest


In [13]:

#7.Overall A/B test

summary = (
    df.groupby("experiment_group")
      .agg(
          users=("user_id", "count"),
          clicks=("is_clicked", "sum"),
          conversions=("is_converted", "sum")
      )
)

# CTR & CVR
summary["CTR"] = summary["clicks"] / summary["users"]
summary["CVR"] = summary["conversions"] / summary["users"]

print(summary)


                  users  clicks  conversions       CTR       CVR
experiment_group                                                
A                 14990    1274          254  0.084990  0.016945
B                 15010    1422          345  0.094737  0.022985


In [14]:
#7.1)CTR Z-test
z_ctr, p_ctr = proportions_ztest(
    count=summary["clicks"],
    nobs=summary["users"]
)

print(f"CTR z-score: {z_ctr:.3f}, p-value: {p_ctr:.4f}")


CTR z-score: -2.952, p-value: 0.0032


In [15]:
#7.2)CVR Z-test
z_cvr, p_cvr = proportions_ztest(
    count=summary["conversions"],
    nobs=summary["users"]
)

print(f"CVR z-score: {z_cvr:.3f}, p-value: {p_cvr:.4f}")


CVR z-score: -3.739, p-value: 0.0002


In [16]:
#7.segmentation A/B test
#7.1) sum seg info
seg_summary = (
    df.groupby(["user_type", "experiment_group"])
      .agg(
          users=("user_id", "count"),
          conversions=("is_converted", "sum")
      )
      .reset_index()
)

seg_summary["CVR"] = seg_summary["conversions"] / seg_summary["users"]
print(seg_summary)


   user_type experiment_group  users  conversions       CVR
0        new                A   6019           84  0.013956
1        new                B   6018          110  0.018278
2  returning                A   8971          170  0.018950
3  returning                B   8992          235  0.026134


In [17]:
#7.2) z-test
seg_results = []

for user_type in ["new", "returning"]:
    subset = df[df["user_type"] == user_type]
    
    agg = (
        subset.groupby("experiment_group")
              .agg(
                  users=("user_id", "count"),
                  conversions=("is_converted", "sum")
              )
    )
    
    z, p = proportions_ztest(
        count=agg["conversions"],
        nobs=agg["users"]
    )
    
    seg_results.append({
        "user_type": user_type,
        "A_users": agg.loc["A", "users"],
        "B_users": agg.loc["B", "users"],
        "A_CVR": agg.loc["A", "conversions"] / agg.loc["A", "users"],
        "B_CVR": agg.loc["B", "conversions"] / agg.loc["B", "users"],
        "z_score": z,
        "p_value": p
    })

seg_ztest = pd.DataFrame(seg_results)
print(seg_ztest)


   user_type  A_users  B_users     A_CVR     B_CVR   z_score   p_value
0        new     6019     6018  0.013956  0.018278 -1.883085  0.059689
1  returning     8971     8992  0.018950  0.026134 -3.243120  0.001182
