In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
# Setting a random seed for reproducibility
np.random.seed(42)

# Number of records to generate
num_records = 50000

start_date = datetime(2021, 1, 1)
end_date = datetime(2021, 6, 30)

user_ids = np.random.randint(1, 3000, size=num_records)

# Initialize a dictionary to store registration dates for each unique user_id
registration_dates_dict = {}

# Generating random data for table columns
age = np.random.randint(18, 60, size=num_records)
gender = np.random.choice(['Male', 'Female'], size=num_records)
location = np.random.choice(['USA', 'Europe', 'Asia'], size=num_records)
session_duration = np.random.uniform(5, 120, size=num_records)
num_sessions = np.random.randint(1, 50, size=num_records)
level = np.random.randint(1, 100, size=num_records)
tasks_completed = np.random.randint(0, 50, size=num_records)
revenue = np.random.uniform(0, 250, size=num_records)
purchases = np.random.randint(0, 5, size=num_records)
platform = np.random.choice(['iOS', 'Android'], size=num_records)
ad_campaign_cost = np.random.uniform(0, 50, size=num_records)
game_rating = np.random.uniform(1, 5, size=num_records)

# Generate random registration dates tied to user_id, ensuring consistency for the same user_id
registration_dates = [registration_dates_dict.setdefault(user_id, start_date + timedelta(days=np.random.randint((end_date - start_date).days))) for user_id in user_ids]

In [3]:
# Generate random registration dates tied to user_id, ensuring consistency for the same user_id
registration_dates = [registration_dates_dict.setdefault(user_id, start_date + timedelta(days=np.random.randint((end_date - start_date).days))) for user_id in user_ids]

# Generate unique random event dates for each user
event_dates = start_date + pd.to_timedelta(np.random.randint((end_date - start_date).days, size=num_records), unit='D')
user_events = np.random.choice(['Logged In', 'Clicked Play Button', 'Clicked Shop Button'], size=num_records)

In [4]:
# Calculating the number of seasons and their names
df = pd.DataFrame({
    'user_id': user_ids,
    'age': age,
    'gender': gender,
    'location': location,
    'session_duration': session_duration,
    'num_sessions': num_sessions,
    'level': level,
    'tasks_completed': tasks_completed,
    'revenue': revenue,
    'purchases': purchases,
    'game_rating': game_rating,
    'registration_date': [registration_dates[user_id] for user_id in user_ids],
    'event_date': [event_dates[user_id] for user_id in user_ids],
    'user_event': [user_events[user_id] for user_id in user_ids]
})

In [5]:
# Add the new columns to the DataFrame
df['event_date'] = event_dates
df['user_event'] = user_events

In [6]:
# Generate additional users with only one event
additional_users = pd.DataFrame({
    'user_id': range(100, 110),
    'age': np.random.randint(18, 60, size=10),
    'gender': np.random.choice(['Male', 'Female'], size=10),
    'location': np.random.choice(['USA', 'Europe', 'Asia'], size=10),
    'session_duration': np.random.uniform(5, 120, size=10),
    'num_sessions': np.random.randint(1, 50, size=10),
    'level': np.random.randint(1, 100, size=10),
    'tasks_completed': np.random.randint(0, 50, size=10),
    'revenue': np.random.uniform(0, 100, size=10),
    'purchases': np.random.randint(0, 2, size=10),
    'registration_date': start_date + pd.to_timedelta(np.random.randint((end_date - start_date).days, size=10), unit='D'),
    'event_date': start_date + pd.to_timedelta(np.random.randint((end_date - start_date).days, size=10), unit='D'),
    'user_event': np.random.choice(['Logged In', 'Clicked Play Button', 'Clicked Shop Button'], size=10)
})

# Concatenate the additional users to the main DataFrame
df = pd.concat([df, additional_users], ignore_index=True)

In [7]:
df.to_csv('mobile_game_analytics_data.csv', index=False)