In [1]:
import numpy as np
import pandas as pd

# Simulated user IDs
user_ids = np.arange(1000)


In [2]:
# Randomly assign users to A (50%) or B (50%)
np.random.seed(42)
assignment = np.random.choice(['A', 'B'], size=len(user_ids))


In [3]:
# Create a DataFrame to track assignments
user_assignments = pd.DataFrame({'user_id': user_ids, 'variant': assignment})


In [4]:
# Dummy implementations for illustration purposes
def model_A_recommendations(user_id):
    # Generate recommendations using model A
    return ['product1', 'product2', 'product3']

def model_B_recommendations(user_id):
    # Generate recommendations using model B
    return ['product4', 'product5', 'product6']


In [5]:
def get_recommendations(variant, user_id):
    if variant == 'A':
        return model_A_recommendations(user_id)
    elif variant == 'B':
        return model_B_recommendations(user_id)


In [6]:

# Simulate user interactions
import random
interactions = []

for index, row in user_assignments.iterrows():
    user_id = row['user_id']
    variant = row['variant']
    
    recommendations = get_recommendations(variant, user_id)
    
    click_probability = 0.3  # 30% chance to click any recommendation
    clicked_product = None
    
    for product in recommendations:
        if random.random() < click_probability:
            clicked_product = product
            break
    
    interactions.append({'user_id': user_id, 'variant': variant, 'clicked_product': clicked_product})
interaction_df = pd.DataFrame(interactions)


In [7]:
interaction_df

Unnamed: 0,user_id,variant,clicked_product
0,0,A,product1
1,1,B,product5
2,2,A,
3,3,A,product1
4,4,A,product1
...,...,...,...
995,995,A,product2
996,996,A,product3
997,997,B,
998,998,B,product5


In [9]:
# Calculate CTR for each variant with realistic simulation
clicked_interactions = interaction_df[interaction_df['clicked_product'].notnull()]
# Calculate CTR for each variant
clicks_A = clicked_interactions[clicked_interactions['variant'] == 'A'].shape[0]
clicks_B = clicked_interactions[clicked_interactions['variant'] == 'B'].shape[0]

users_A = user_assignments[user_assignments['variant'] == 'A'].shape[0]
users_B = user_assignments[user_assignments['variant'] == 'B'].shape[0]

ctr_A = clicks_A / users_A
ctr_B = clicks_B / users_B

print(f'CTR for Variant A: {ctr_A}')
print(f'CTR for Variant B: {ctr_B}')


CTR for Variant A: 0.6755102040816326
CTR for Variant B: 0.6764705882352942


In [10]:
from statsmodels.stats.proportion import proportions_ztest

In [11]:
# Conduct two-proportion z-test
count = np.array([clicks_A, clicks_B])
nobs = np.array([users_A, users_B])

z_stat, p_value = proportions_ztest(count, nobs)
print(f'Z-statistic: {z_stat}')
print(f'P-value: {p_value}')

# Interpret the result
alpha = 0.05
if p_value < alpha:
    print("The difference in CTRs is statistically significant.")
else:
    print("The difference in CTRs is not statistically significant.")

Z-statistic: -0.03244010569744088
P-value: 0.9741210795848765
The difference in CTRs is not statistically significant.
