In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set the random seed for reproducibility
np.random.seed(42)

# Number of records to generate
num_records = 50000

# Time frame from 01.01.2021 to 31.12.2023
start_date = datetime(2021, 1, 1)
end_date = datetime(2021, 6, 30)

# Generate non-unique user IDs
user_ids = np.random.randint(1, 4100, size=num_records)

# Initialize a dictionary to store registration dates for each unique user_id
registration_dates_dict = {}

# Generate random data for table columns
age = np.random.randint(18, 60, size=num_records)
gender = np.random.choice(['Male', 'Female'], size=num_records)
location = np.random.choice(['USA', 'Europe', 'Asia'], size=num_records)
session_duration = np.random.uniform(5, 120, size=num_records)
num_sessions = np.random.randint(1, 50, size=num_records)
level = np.random.randint(1, 100, size=num_records)
tasks_completed = np.random.randint(0, 50, size=num_records)
revenue = np.random.uniform(0, 100, size=num_records)
purchases = np.random.randint(0, 2, size=num_records)
average_revenue_per_user = revenue / (purchases + 1)
game_version = np.random.choice(['1.0', '2.0', '3.0'], size=num_records)
platform = np.random.choice(['iOS', 'Android'], size=num_records)
os_version = np.random.choice(['10.0', '11.0', '12.0'], size=num_records)
ad_campaign_cost = np.random.uniform(0, 50, size=num_records)
new_users_from_ads = np.random.randint(0, 50, size=num_records)
retention_rate = np.random.uniform(0.1, 0.9, size=num_records)
game_rating = np.random.uniform(1, 5, size=num_records)
player_reviews = np.random.randint(0, 100, size=num_records)

# Generate random registration dates tied to user_id, ensuring consistency for the same user_id
registration_dates = [registration_dates_dict.setdefault(user_id, start_date + timedelta(days=np.random.randint((end_date - start_date).days))) for user_id in user_ids]

# Generate unique random event dates for each user
event_dates = start_date + pd.to_timedelta(np.random.randint((end_date - start_date).days, size=num_records), unit='D')
user_events = np.random.choice(['Logged In', 'Clicked Play Button', 'Clicked Shop Button'], size=num_records)

# Calculate the start date of each season
df = pd.DataFrame({
    'user_id': user_ids,
    'age': age,
    'gender': gender,
    'location': location,
    'session_duration': session_duration,
    'num_sessions': num_sessions,
    'level': level,
    'tasks_completed': tasks_completed,
    'revenue': revenue,
    'purchases': purchases,
    'game_rating': game_rating,
    'registration_date': registration_dates,
    'event_date': [event_dates[user_id] for user_id in user_ids],
    'user_event': [user_events[user_id] for user_id in user_ids]
})

size_id = 600
# Generate additional users with only one event
additional_users = pd.DataFrame({
    'user_id': range(4101, 4101+size_id),  # Adjust the range to match the number of users (200)
    'age': np.random.randint(18, 60, size=size_id),
    'gender': np.random.choice(['Male', 'Female'], size=size_id),
    'location': np.random.choice(['USA', 'Europe', 'Asia'], size=size_id),
    'session_duration': np.random.uniform(5, 120, size=size_id),
    'num_sessions': np.random.randint(1, 50, size=size_id),
    'level': np.random.randint(1, 100, size=size_id),
    'tasks_completed': np.random.randint(0, 50, size=size_id),
    'revenue': np.random.uniform(0, 100, size=size_id),
    'purchases': np.random.randint(0, 2, size=size_id),
    'game_rating': np.random.uniform(1, 5, size=size_id),
    'registration_date': start_date + pd.to_timedelta(np.random.randint((end_date - start_date).days, size=size_id), unit='D'),
    'event_date': start_date + pd.to_timedelta(np.random.randint((end_date - start_date).days, size=size_id), unit='D'),
    'user_event': np.random.choice(['Logged In', 'Clicked Play Button', 'Clicked Shop Button'], size=size_id)
})

# Add the new columns to the DataFrame
df['event_date'] = event_dates
df['user_event'] = user_events

# Concatenate the additional users to the main DataFrame
df = pd.concat([df, additional_users], ignore_index=True)

# Save to CSV file
df.to_csv('mobile_game_analytics_data.csv', index=False)