In [1]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Simulate user and time range
n_users = 5000
dates = pd.date_range("2024-01-01", periods=30)

# Initialize list to hold all records
data = []

# Simulate daily user activity for both campaigns
for date in dates:
    for user_id in range(n_users):
        campaign = np.random.choice(['A', 'B'])  # equally distributed
        impressions = np.random.randint(5, 100)  # users see 5–100 ads

        # Campaign B has slightly better click-through rate
        ctr = 0.20 if campaign == 'A' else 0.25
        clicks = np.random.binomial(impressions, ctr)

        # Install rate from clicks
        install_rate = 0.30
        installs = np.random.binomial(clicks, install_rate)

        # Purchase rate from installs
        purchase_rate = 0.10
        purchases = np.random.binomial(installs, purchase_rate)

        # Revenue per purchase: random value €5–25
        revenue = purchases * np.random.uniform(5, 25)

        # Retention Day 7: Campaign B retains better
        retained_day_7 = np.random.binomial(1, 0.20 if campaign == 'A' else 0.30)

        data.append([
            user_id,
            campaign,
            date,
            impressions,
            clicks,
            installs,
            purchases,
            round(revenue, 2),
            retained_day_7
        ])

# Create DataFrame
df = pd.DataFrame(data, columns=[
    'user_id',
    'campaign',
    'date',
    'impressions',
    'clicks',
    'installs',
    'purchases',
    'revenue',
    'retained_day_7'
])

# Save to CSV
output_path = "../data/mobile_funnel_data.csv"
df.to_csv(output_path, index=False)
print(f"✅ Data generated and saved to: {output_path}")

# Preview
df.head()

✅ Data generated and saved to: ../data/mobile_funnel_data.csv


Unnamed: 0,user_id,campaign,date,impressions,clicks,installs,purchases,revenue,retained_day_7
0,0,A,2024-01-01,56,16,6,1,8.12,0
1,1,A,2024-01-01,79,20,6,1,5.41,1
2,2,B,2024-01-01,34,6,1,0,0.0,0
3,3,B,2024-01-01,93,21,7,0,0.0,0
4,4,B,2024-01-01,66,19,4,0,0.0,0
