In [1]:
import numpy as np, pandas as pd
from statsmodels.stats.proportion import proportions_ztest

rng = np.random.default_rng(7)
N = 10_000

segment = rng.choice(['free','basic','pro'], size=N, p=[0.5, 0.3, 0.2])
region  = rng.choice(['NE','SE','MW','W'], size=N, p=[0.2, 0.3, 0.3, 0.2])
age     = rng.normal(30, 8, N).clip(18, 65)

# True pass probability has real segment/region effects (creates risk of bias)
base = 0.40 + (segment=='pro')*0.12 + (segment=='basic')*0.05 + (region=='W')*0.03 + (age>40)*(-0.02)
p = np.clip(base, 0.05, 0.95)
passed = rng.binomial(1, p, N).astype(bool)

df_pop = pd.DataFrame({'segment':segment, 'region':region, 'age':age, 'passed':passed})
pop_rates = df_pop['passed'].mean()
pop_mix   = df_pop['segment'].value_counts(normalize=True).rename('pop_share').sort_index()
pop_mix_reg = df_pop['region'].value_counts(normalize=True).rename('pop_share').sort_index()

print(f"Population pass rate: {pop_rates:.3f}")
pop_mix


Population pass rate: 0.444


segment
basic    0.2964
free     0.4983
pro      0.2053
Name: pop_share, dtype: float64

In [3]:
df_pop

Unnamed: 0,segment,region,age,passed
0,basic,MW,21.492670,False
1,pro,W,33.891262,True
2,basic,W,28.375208,False
3,free,SE,25.701928,True
4,free,NE,35.298793,True
...,...,...,...,...
9995,free,MW,32.992896,False
9996,free,NE,21.277348,True
9997,pro,NE,35.914968,True
9998,free,NE,28.240867,False
