In [3]:
# pip install ace_tools

Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Seed for reproducibility
random.seed(42)
np.random.seed(42)

# Generate synthetic data
num_rows = 20000
start_date = datetime(2024, 1, 1)

def generate_time():
    day_offset = np.random.randint(0, 90)
    hour = np.random.randint(0, 24)
    minute = np.random.randint(0, 60)
    return start_date + timedelta(days=day_offset, hours=hour, minutes=minute)

categories = ['Food', 'Grocery', 'Books', 'Electronics', 'Clothing']
notification_types = ['Reminder', 'New Arrival', 'Discount', 'Urgent']
user_types = ['New', 'Returning', 'Loyal']
device_types = ['Mobile', 'Tablet', 'Desktop']
os_types = ['Android', 'iOS']
urgency_words = ['Limited Time', 'Hurry', 'Exclusive', '']
emoji_used = [True, False]

data = []

for i in range(num_rows):
    notification_time = generate_time()
    session_delay = np.random.randint(0, 180)  # in minutes
    session_time = notification_time + timedelta(minutes=session_delay)

    row = {
        'UserID': np.random.randint(1000, 10000),
        'SessionTime': session_time,
        'ProductID': np.random.randint(100, 1000),
        'Category': np.random.choice(categories),
        'NotificationType': np.random.choice(notification_types),
        'NotificationTime': notification_time,
        'Response': np.random.choice(['Yes', 'No'], p=[0.6, 0.4]),
        'UserType': np.random.choice(user_types, p=[0.3, 0.4, 0.3]),
        'DeviceType': np.random.choice(device_types),
        'OS': np.random.choice(os_types),
        'DiscountOffered': np.random.choice([0, 10, 20, 30, 50], p=[0.2, 0.2, 0.3, 0.2, 0.1]),
        'UrgencyWordUsed': np.random.choice(urgency_words),
        'EmojiUsed': np.random.choice(emoji_used)
    }

    # Enrich time-based features
    row['DayOfWeek'] = notification_time.strftime('%A')
    row['HourOfDay'] = notification_time.hour
    row['IsWeekend'] = row['DayOfWeek'] in ['Saturday', 'Sunday']
    row['TimeDelayMin'] = session_delay
    row['NotificationTextLength'] = np.random.randint(20, 150)
    
    data.append(row)

df = pd.DataFrame(data)
df.to_csv("enhanced_push_notification_data.csv", index=False)
