In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

np.random.seed(42)
random.seed(42)

n_users = 5000
start_date = datetime(2025, 4, 1)
groups = ['A', 'B']
group_probs = [0.5, 0.5]

user_ids = np.arange(100000, 100000 + n_users)
user_groups = np.random.choice(groups, size=n_users, p=group_probs)

def generate_events(user_id, group):
    events = []
    t = start_date + timedelta(minutes=np.random.randint(0, 60*24*30))  # 30天内
    events.append((user_id, group, 'view', t))
    
    click_prob = 0.30 if group == 'A' else 0.38
    if np.random.rand() < click_prob:
        t += timedelta(seconds=np.random.randint(5, 300))
        events.append((user_id, group, 'click', t))
        purchase_prob = 0.08 if group == 'A' else 0.12
        if np.random.rand() < purchase_prob:
            t += timedelta(seconds=np.random.randint(10, 600))
            events.append((user_id, group, 'purchase', t))
    
    return events

all_events = []
for uid, grp in zip(user_ids, user_groups):
    all_events.extend(generate_events(uid, grp))

df = pd.DataFrame(all_events, columns=["user_id", "group", "event_type", "timestamp"])
df = df.sort_values(by="timestamp").reset_index(drop=True)

df.to_csv("ab_test_event_data.csv", index=False)
print("✅ 已保存 ab_test_event_data.csv")
