In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
# Setting a random seed for reproducibility
np.random.seed(42)

# Number of records to generate
num_records = 20000

start_date = datetime(2021, 1, 1)
end_date = datetime(2021, 12, 31)

user_ids = np.random.randint(1, 4000, size=num_records)

# Initialize a dictionary to store registration dates for each unique user_id
registration_dates_dict = {}

# Generating random data for table columns
age = np.random.randint(18, 60, size=num_records)
gender = np.random.choice(['Male', 'Female'], size=num_records)
location = np.random.choice(['USA', 'Europe', 'Asia'], size=num_records)
session_duration = np.random.uniform(5, 120, size=num_records)
num_sessions = np.random.randint(1, 50, size=num_records)
level = np.random.randint(1, 100, size=num_records)
tasks_completed = np.random.randint(0, 50, size=num_records)
revenue = np.random.uniform(0, 100, size=num_records)
purchases = np.random.randint(0, 2, size=num_records)
average_revenue_per_user = revenue / (purchases + 1)
game_version = np.random.choice(['1.0', '2.0', '3.0'], size=num_records)
platform = np.random.choice(['iOS', 'Android'], size=num_records)
os_version = np.random.choice(['10.0', '11.0', '12.0'], size=num_records)
ad_campaign_cost = np.random.uniform(0, 50, size=num_records)
new_users_from_ads = np.random.randint(0, 50, size=num_records)
game_rating = np.random.uniform(1, 5, size=num_records)
player_reviews = np.random.randint(0, 100, size=num_records)

# Generate random registration dates tied to user_id, ensuring consistency for the same user_id
registration_dates = [registration_dates_dict.setdefault(user_id, start_date + timedelta(days=np.random.randint((end_date - start_date).days))) for user_id in user_ids]

In [3]:
# Generating random registration dates for users, tied to user_id
registration_dates = {user_id: start_date + timedelta(days=np.random.randint((end_date - start_date).days)) for user_id in user_ids}

# Generating random event dates for users
event_dates = {user_id: start_date + timedelta(days=np.random.randint((end_date - start_date).days)) for user_id in user_ids}
user_events = {user_id: np.random.choice(['Logged In', 'Clicked Play Button', 'Clicked Shop Button']) for user_id in user_ids}

In [4]:
# Calculating the number of seasons and their names
df = pd.DataFrame({
    'user_id': user_ids,
    'age': age,
    'gender': gender,
    'location': location,
    'session_duration': session_duration,
    'num_sessions': num_sessions,
    'level': level,
    'tasks_completed': tasks_completed,
    'revenue': revenue,
    'purchases': purchases,
    'game_version': game_version,
    'platform': platform,
    'os_version': os_version,
    'game_rating': game_rating,
    'registration_date': [registration_dates[user_id] for user_id in user_ids],
    'event_date': [event_dates[user_id] for user_id in user_ids],
    'user_event': [user_events[user_id] for user_id in user_ids]
})

In [5]:
# Define unique start dates for each season
season_start_dates = {
    'Season 1': datetime(2021, 1, 1),
    'Season 2': datetime(2021, 4, 1),
    'Season 3': datetime(2021, 7, 1),
    'Season 4': datetime(2021, 10, 1)
}

# Assign each row a random season and the corresponding unique start date
df['season'] = np.random.choice(list(season_start_dates.keys()), size=num_records)
df['start_season'] = df['season'].map(season_start_dates)

In [6]:
df.to_csv('mobile_game_analytics_data.csv', index=False)