Test data generation

In [None]:
import pandas as pd
import numpy as np

# Parameters of generation
n_users = 1000
days = pd.date_range(start='2025-05-01', periods=7, freq='D')
groups = np.random.choice(['A', 'B'], size=n_users, p=[0.5, 0.5])
user_ids = [f'user_{i}' for i in range(n_users)]
user_start_days = np.random.choice(days, size=n_users)


df = pd.DataFrame({
    'user_id': user_ids,
    'group': groups,
    'day': user_start_days
})

# Push sending 10:00 - 20:00
minutes = np.random.randint(600, 1200, size=n_users)
df['notification_sent_at'] = pd.to_datetime(df['day']) + pd.to_timedelta(minutes, unit='m')

# Possibility of opening for groups
open_probs = {'A': 0.25, 'B': 0.35}

df['opened_app'] = df['group'].map(lambda g: np.random.rand() < open_probs[g])

timestamp_open_app = []
for sent_time, opened in zip(df['notification_sent_at'], df['opened_app']):
    if opened:
        delay_minutes = np.random.exponential(scale=60)
        open_time = sent_time + pd.Timedelta(minutes=delay_minutes)
        timestamp_open_app.append(open_time)
    else:
        timestamp_open_app.append(pd.NaT)

df['timestamp_open_app'] = timestamp_open_app


df = df[['user_id', 'group', 'day', 'notification_sent_at', 'timestamp_open_app']]

# df.to_csv('ab_test_push_notifications.csv', index=False)

df.head()


Unnamed: 0,user_id,group,day,notification_sent_at,timestamp_open_app
0,user_0,B,2025-05-01,2025-05-01 15:41:00,NaT
1,user_1,B,2025-05-04,2025-05-04 10:53:00,NaT
2,user_2,A,2025-05-05,2025-05-05 16:15:00,NaT
3,user_3,A,2025-05-02,2025-05-02 18:24:00,NaT
4,user_4,B,2025-05-05,2025-05-05 12:19:00,NaT


In [None]:
df.to_csv('ab_test_push_notifications.csv', index=False)