In [4]:
# Project: A/B Testing for Homepage Promotion Optimization
# Module: Data Simulation for A/B Experiment 

# Research goal / hypotheses:
# - Variant B adds a "low-price alert" on the homepage.
# - Hypothesis H1 (engagement): The alert increases attention and thus CTR (more users click into the product page).
# - Hypothesis H2 (conversion): The alert may not increase conversion:
#     (a) Users click because price looks attractive, but then discover constraints (e.g., minimum delivery threshold),
#         leading to higher clicks but lower post-click conversion; OR
#     (b) Price-sensitive segments (often tier3&4 cities) benefit more from the alert than tier1&2 cities.
#
# What this notebook includes:
# 1) Stable hash-based traffic allocation .
# 2) An AA test sanity check .
# 3) City tier dimension + heterogeneous treatment effect analysis.


## Experiment Goal
This experiment evaluates a redesigned product listing page (variant **B**) that introduces a *low-price alert*.

**Key question:** Does highlighting low prices increase clicks and overall conversion at same time?

We decompose conversion into **CTR** (click-through rate) and **PCVR** (post-click conversion rate) to distinguish these effects.

## Metrics
- **CTR** = clicks / exposed users (page attractiveness)
- **PCVR** = conversions / clicks (post-click efficiency)
- **CVR** = conversions / exposed users (overall business outcome)

We report all three metrics to understand where gains or losses occur along the funnel.

## Overall Results (A/B Test)
We first compare overall performance between control and treatment.

- CTR captures the immediate impact of the new page design.
- PCVR helps identify potential post-click friction.
- CVR reflects the net effect on conversions.

Statistical significance is assessed using two-sample z-tests.

## Segmentation Analysis
To understand heterogeneous treatment effects, we analyze results by key business segments **independently**:

- **City tier**: tier 1–2 vs. tier 3–4
- **User type**: new vs. returning users

Segments are analyzed separately to preserve statistical power and interpretability.

## Interpretation Notes
- An increase in CTR without a corresponding PCVR lift suggests improved attraction but unchanged or weaker post-click efficiency.
- Segment-level results help identify where the low-price alert is most effective and where alternative optimizations may be needed.

All segmentation analyses are exploratory and used for insight generation rather than strict decision making.

## Limitations
- The data are simulated, and the true treatment effects are known by construction.
- Results are intended to demonstrate experimental design and analysis practices rather than discover unknown effects.
- Long-term effects and behavioral changes are not modeled.

In [2]:
import numpy as np
import pandas as pd
import hashlib

from statsmodels.stats.proportion import proportions_ztest
from scipy.stats import chisquare


In [3]:
# 1) Experiment parameters

N_USERS = 30000
RANDOM_SEED = 42

# Baseline metrics (control A)
BASELINE_CTR_A = 0.085            # impression -> click
BASELINE_PCVR_A = 0.20            # click -> conversion (post-click conversion rate)

# Variant B intent: low-price alert increases CTR but may hurt post-click conversion for some segments
BASELINE_CTR_B = 0.095
BASELINE_PCVR_B = 0.19

# Hash split configuration (stable assignment by user_id)
SPLIT_B = 0.50
SALT = "luckin_homepage_low_price_alert_v1"

np.random.seed(RANDOM_SEED)


In [8]:
# 2) User-level attributes + stable hash-based assignment

# IDs
user_id = np.arange(1, N_USERS + 1)

def assign_group_by_hash(uid: int, salt: str = SALT, split_b: float = SPLIT_B) -> str:
    """Deterministic assignment: md5(user_id + salt) -> [0,1) -> A/B.

    Why hash?
    - Stable across reruns (important for real experimentation platforms)
    - Easy to reproduce and avoids accidental re-randomization
    """
    h = hashlib.md5(f"{uid}_{salt}".encode("utf-8")).hexdigest()
    bucket = int(h[:8], 16) / 16**8  # uniform in [0,1)
    return "B" if bucket < split_b else "A"

experiment_group = np.array([assign_group_by_hash(uid) for uid in user_id])

# City tier (proxy for price sensitivity)
# - tier_1: generally less price-sensitive
# - tier_2: medium
# - tier_3_4: more price-sensitive
city_tier = np.random.choice(
    ["tier_1", "tier_2", "tier_3_4"],
    size=N_USERS,
    p=[0.25, 0.35, 0.40]
)

# User type
user_type = np.random.choice(
    ["new", "returning"],
    size=N_USERS,
    p=[0.4, 0.6]
)

# Time slot
time_slot = np.random.choice(
    ["morning", "afternoon", "evening"],
    size=N_USERS,
    p=[0.35, 0.40, 0.25]
)


In [14]:
# 3) Simulate clicks (CTR)
def click_probability(group: str, city_tier: str) -> float:
    base = BASELINE_CTR_A if group == "A" else BASELINE_CTR_B

    # Heterogeneous uplift: price-sensitive users react more to "low-price alert"
    if group == "B":
        if city_tier == "tier_3_4":
            base += 0.020
        elif city_tier == "tier_2":
            base += 0.010
        else:  # tier_1
            base += 0.005

    return min(max(base, 0.001), 0.95)

click_prob = np.array([click_probability(g, c) for g, c in zip(experiment_group, city_tier)])
is_clicked = np.random.binomial(1, click_prob)


In [15]:
# 4) Simulate conversions (post-click conversion rate, PCVR)
def post_click_conversion_probability(group: str, user_type: str, time_slot: str, city_tier: str) -> float:
    # Base: variant B may slightly reduce post-click conversion due to min-delivery constraints / mismatch
    base = BASELINE_PCVR_A if group == "A" else BASELINE_PCVR_B

    # Segment effects
    if user_type == "new":
        base -= 0.05

    if time_slot == "morning":
        base += 0.03
    elif time_slot == "evening":
        base += 0.01

    # City-tier interaction: tier_3_4 are more likely to click for price, but may be blocked by threshold => lower PCVR
    if group == "B":
        if city_tier == "tier_3_4":
            base -= 0.03
        elif city_tier == "tier_2":
            base -= 0.01
        else:
            base += 0.00  # tier_1 roughly neutral here

    return min(max(base, 0.01), 0.95)


In [16]:
pcvr = np.array([
    post_click_conversion_probability(g, u, t, c)
    for g, u, t, c in zip(experiment_group, user_type, time_slot, city_tier)
])

# Only clicked users can convert
is_converted = np.where(
    is_clicked == 1,
    np.random.binomial(1, pcvr),
    0
)


In [17]:
# 5) Order amount and subsidy (only for converted users)
# Optional realism: order basket and subsidy can differ by city tier.

mean_by_city = {"tier_1": 24, "tier_2": 22, "tier_3_4": 20}
std_amount = 4

mean_amount = np.array([mean_by_city[c] for c in city_tier])

order_amount = np.where(
    is_converted == 1,
    np.round(np.random.normal(loc=mean_amount, scale=std_amount, size=N_USERS), 2),
    0
)

# Subsidy: slightly higher in lower-tier cities (more promotion)
subsidy_base = np.array([3.0 if c == "tier_1" else (3.5 if c == "tier_2" else 4.0) for c in city_tier])
subsidy = np.where(
    is_converted == 1,
    np.round(np.random.normal(loc=subsidy_base, scale=0.8, size=N_USERS), 2),
    0
)
subsidy = np.clip(subsidy, 0, None)


In [18]:
# 6) Generate table
df = pd.DataFrame({
    "user_id": user_id,
    "experiment_group": experiment_group,
    "city_tier": city_tier,
    "user_type": user_type,
    "time_slot": time_slot,
    "is_clicked": is_clicked,
    "is_converted": is_converted,
    "order_amount": order_amount,
    "subsidy": subsidy
})

df.head()


Unnamed: 0,user_id,experiment_group,city_tier,user_type,time_slot,is_clicked,is_converted,order_amount,subsidy
0,1,A,tier_3_4,new,morning,0,0,0.0,0.0
1,2,A,tier_3_4,returning,afternoon,0,0,0.0,0.0
2,3,A,tier_2,returning,evening,0,0,0.0,0.0
3,4,A,tier_1,returning,evening,0,0,0.0,0.0
4,5,A,tier_1,returning,evening,0,0,0.0,0.0


In [19]:
df.to_csv("luckin_ab_test_simulated_data.csv", index=False)
print("Saved: luckin_ab_test_simulated_data.csv  | rows =", len(df))


Saved: luckin_ab_test_simulated_data.csv  | rows = 30000


In [20]:
# 7) Utility functions: SRM / proportion z-tests

def srm_chisquare_test(df, group_col="experiment_group", expected_split=(0.5, 0.5)):
    """Check whether observed traffic split significantly deviates from expected (Sample Ratio Mismatch)."""
    counts = df[group_col].value_counts().reindex(["A", "B"]).fillna(0).astype(int)
    observed = counts.values
    expected = np.array(expected_split) * observed.sum()
    chi2, p = chisquare(f_obs=observed, f_exp=expected)
    return counts.to_dict(), chi2, p

def proportion_z_test(success_a, n_a, success_b, n_b):
    counts = np.array([success_a, success_b])
    nobs = np.array([n_a, n_b])
    z, p = proportions_ztest(count=counts, nobs=nobs)
    return z, p


In [21]:
# 8) Overall A/B test summary + SRM check

summary = (
    df.groupby("experiment_group")
      .agg(
          users=("user_id", "count"),
          clicks=("is_clicked", "sum"),
          conversions=("is_converted", "sum"),
          revenue=("order_amount", "sum"),
          subsidy=("subsidy", "sum")
      )
)

summary["CTR"] = summary["clicks"] / summary["users"]
summary["CVR"] = summary["conversions"] / summary["users"]                       # overall conversion per exposed user
summary["PCVR"] = summary["conversions"] / summary["clicks"].replace(0, np.nan) # post-click conversion

print(summary)

# SRM check (should NOT be significant)
counts_dict, chi2, p_srm = srm_chisquare_test(df)
print("\nSRM check (A vs B traffic split):", counts_dict, f"chi2={chi2:.3f}, p={p_srm:.4f}")


                  users  clicks  conversions  revenue  subsidy       CTR  \
experiment_group                                                           
A                 15053    1267          228  4980.94   814.08  0.084169   
B                 14947    1650          280  5947.48  1017.30  0.110390   

                       CVR      PCVR  
experiment_group                      
A                 0.015146  0.179953  
B                 0.018733  0.169697  

SRM check (A vs B traffic split): {'A': 15053, 'B': 14947} chi2=0.375, p=0.5405


In [22]:
# 8.1) CTR z-test (A vs B)

a = summary.loc["A"]
b = summary.loc["B"]

z_ctr, p_ctr = proportion_z_test(a["clicks"], a["users"], b["clicks"], b["users"])
print(f"CTR | z={z_ctr:.3f}, p={p_ctr:.4f}  | A={a['CTR']:.4%}, B={b['CTR']:.4%}")


CTR | z=-7.664, p=0.0000  | A=8.4169%, B=11.0390%


In [23]:
# 8.2) Overall conversion z-test (CVR, per exposed user)

z_cvr, p_cvr = proportion_z_test(a["conversions"], a["users"], b["conversions"], b["users"])
print(f"CVR | z={z_cvr:.3f}, p={p_cvr:.4f}  | A={a['CVR']:.4%}, B={b['CVR']:.4%}")

# 8.3) Post-click conversion z-test (PCVR) - only among clicked users
z_pcvr, p_pcvr = proportion_z_test(a["conversions"], a["clicks"], b["conversions"], b["clicks"])
print(f"PCVR | z={z_pcvr:.3f}, p={p_pcvr:.4f}  | A={a['PCVR']:.4%}, B={b['PCVR']:.4%}")


CVR | z=-2.407, p=0.0161  | A=1.5146%, B=1.8733%
PCVR | z=0.724, p=0.4691  | A=17.9953%, B=16.9697%


In [24]:
# 9) AA Test (Sanity Check)
# Purpose:
# - Validate randomization (hash split) and metric computation pipeline.
# - In an AA test, there is NO true treatment effect; p-values should be ~Uniform(0,1).
#   Therefore, with alpha=0.05 we EXPECT ~5% false positives over many repeated AA runs.

AA_SALT = "luckin_homepage_AA_sanity_v1"

def assign_aa_group(uid: int, salt: str = AA_SALT) -> str:
    h = hashlib.md5(f"{uid}_{salt}".encode("utf-8")).hexdigest()
    bucket = int(h[:8], 16) / 16**8
    return "A2" if bucket < 0.5 else "A1"

aa_group = np.array([assign_aa_group(uid) for uid in user_id])

# Pre-compute CONTROL PCVR by user (same model, group forced to A)
aa_pcvr = np.array([
    post_click_conversion_probability("A", u, t, c)
    for u, t, c in zip(user_type, time_slot, city_tier)
])

def aa_run(seed: int = 0):
    rng = np.random.default_rng(seed)
    aa_is_clicked = rng.binomial(1, BASELINE_CTR_A, size=N_USERS)
    aa_is_converted = np.where(
        aa_is_clicked == 1,
        rng.binomial(1, aa_pcvr),
        0
    )
    tmp = pd.DataFrame({
        "aa_group": aa_group,
        "is_clicked": aa_is_clicked,
        "is_converted": aa_is_converted
    })
    agg = tmp.groupby("aa_group").agg(users=("aa_group","count"),
                                      clicks=("is_clicked","sum"),
                                      conversions=("is_converted","sum"))
    a1 = agg.loc["A1"]; a2 = agg.loc["A2"]
    z_ctr, p_ctr = proportion_z_test(a1["clicks"], a1["users"], a2["clicks"], a2["users"])
    z_cvr, p_cvr = proportion_z_test(a1["conversions"], a1["users"], a2["conversions"], a2["users"])
    return p_ctr, p_cvr, agg

# Show one AA run (seed fixed for reproducibility)
p_ctr_1, p_cvr_1, agg_1 = aa_run(seed=2025)
agg_1["CTR"] = agg_1["clicks"]/agg_1["users"]
agg_1["CVR"] = agg_1["conversions"]/agg_1["users"]
print(agg_1)
print(f"One AA run (seed=2025) | CTR p={p_ctr_1:.4f} | CVR p={p_cvr_1:.4f}")

# Monte-Carlo: verify false positive rate ~ 5%
p_ctrs, p_cvrs = [], []
for s in range(200):
    p_ctr, p_cvr, _ = aa_run(seed=s)
    p_ctrs.append(p_ctr); p_cvrs.append(p_cvr)

print("\nAA Monte Carlo (200 runs):")
print(f"CTR:  mean p={np.mean(p_ctrs):.3f},  frac(p<0.05)={np.mean(np.array(p_ctrs)<0.05):.3f}")
print(f"CVR:  mean p={np.mean(p_cvrs):.3f},  frac(p<0.05)={np.mean(np.array(p_cvrs)<0.05):.3f}")


          users  clicks  conversions       CTR       CVR
aa_group                                                
A1        15053    1262          248  0.083837  0.016475
A2        14947    1230          242  0.082291  0.016191
One AA run (seed=2025) | CTR p=0.6275 | CVR p=0.8458

AA Monte Carlo (200 runs):
CTR:  mean p=0.513,  frac(p<0.05)=0.030
CVR:  mean p=0.516,  frac(p<0.05)=0.055


In [16]:
# 10) Segmentation / heterogeneous treatment effect (HTE)
# We keep segmentation *simple and decision-oriented* given total N=30,000:
# - City tier collapsed into 2 bins as a proxy for price sensitivity: tier_1-2 vs tier_3-4
# - User type: new vs returning
# NOTE: We intentionally avoid cross-segmentation (city x user_type) here to reduce cell sparsity and multiple-comparison noise.

# 10.1) By city tier (collapsed into 2 bins)
df["city_bin"] = np.where(df["city_tier"].isin(["tier_1", "tier_2"]), "tier_1_2", "tier_3_4")

city_bin_summary = (
    df.groupby(["city_bin", "experiment_group"])
      .agg(
          users=("user_id", "count"),
          clicks=("is_clicked", "sum"),
          conversions=("is_converted", "sum")
      )
      .reset_index()
)

city_bin_summary["CTR"] = city_bin_summary["clicks"] / city_bin_summary["users"]
city_bin_summary["CVR"] = city_bin_summary["conversions"] / city_bin_summary["users"]
city_bin_summary["PCVR"] = city_bin_summary["conversions"] / city_bin_summary["clicks"].replace(0, np.nan)

print("=== By city_bin (tier_1-2 vs tier_3-4) ===")
print(city_bin_summary.sort_values(["city_bin", "experiment_group"]))

# 10.2) By user type (new vs returning)
user_summary = (
    df.groupby(["user_type", "experiment_group"])
      .agg(
          users=("user_id", "count"),
          clicks=("is_clicked", "sum"),
          conversions=("is_converted", "sum")
      )
      .reset_index()
)

user_summary["CTR"] = user_summary["clicks"] / user_summary["users"]
user_summary["CVR"] = user_summary["conversions"] / user_summary["users"]
user_summary["PCVR"] = user_summary["conversions"] / user_summary["clicks"].replace(0, np.nan)

print("\n=== By user_type (new vs returning) ===")
print(user_summary.sort_values(["user_type", "experiment_group"]))


   user_type experiment_group  users  conversions       CVR
0        new                A   6019           84  0.013956
1        new                B   6018          110  0.018278
2  returning                A   8971          170  0.018950
3  returning                B   8992          235  0.026134


In [17]:
# 10.3) Statistical tests by city_bin and by user_type (CTR / CVR / PCVR)

def segment_ztests(df_sub, segment_name: str, segment_value: str):
    agg = (df_sub.groupby("experiment_group")
              .agg(users=("user_id","count"), clicks=("is_clicked","sum"), conversions=("is_converted","sum")))
    a = agg.loc["A"]
    b = agg.loc["B"]

    z_ctr, p_ctr = proportion_z_test(a["clicks"], a["users"], b["clicks"], b["users"])
    z_cvr, p_cvr = proportion_z_test(a["conversions"], a["users"], b["conversions"], b["users"])
    # PCVR among clicked users (handle zero-click edge case)
    if a["clicks"] > 0 and b["clicks"] > 0:
        z_pcvr, p_pcvr = proportion_z_test(a["conversions"], a["clicks"], b["conversions"], b["clicks"])
        a_pcvr = a["conversions"]/a["clicks"]
        b_pcvr = b["conversions"]/b["clicks"]
    else:
        z_pcvr, p_pcvr, a_pcvr, b_pcvr = np.nan, np.nan, np.nan, np.nan

    return {
        segment_name: segment_value,
        "A_users": int(a["users"]),
        "B_users": int(b["users"]),
        "A_CTR": a["clicks"]/a["users"],
        "B_CTR": b["clicks"]/b["users"],
        "p_CTR": p_ctr,
        "A_CVR": a["conversions"]/a["users"],
        "B_CVR": b["conversions"]/b["users"],
        "p_CVR": p_cvr,
        "A_PCVR": a_pcvr,
        "B_PCVR": b_pcvr,
        "p_PCVR": p_pcvr
    }

# --- City bin tests ---
city_bin_results = []
for cb in ["tier_1_2", "tier_3_4"]:
    city_bin_results.append(segment_ztests(df[df["city_bin"] == cb], "city_bin", cb))

city_bin_tests = pd.DataFrame(city_bin_results)
print("=== Z-tests by city_bin ===")
print(city_bin_tests)

# --- User type tests ---
user_results = []
for ut in ["new", "returning"]:
    user_results.append(segment_ztests(df[df["user_type"] == ut], "user_type", ut))

user_tests = pd.DataFrame(user_results)
print("\n=== Z-tests by user_type ===")
print(user_tests)


   user_type  A_users  B_users     A_CVR     B_CVR   z_score   p_value
0        new     6019     6018  0.013956  0.018278 -1.883085  0.059689
1  returning     8971     8992  0.018950  0.026134 -3.243120  0.001182
