In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
# Setting a random seed for reproducibility
np.random.seed(42)

# Number of records to generate
num_records = 20000

# Replacing timestamp data from 01.01.2021 to 31.12.2023
start_date = datetime(2021, 1, 1)
end_date = datetime(2023, 12, 31)

# Generating unique user identifiers
user_ids = np.arange(1, num_records + 1)

# Generating random data for table columns
age = np.random.randint(18, 60, size=num_records)
gender = np.random.choice(['Male', 'Female'], size=num_records)
location = np.random.choice(['USA', 'Europe', 'Asia'], size=num_records)
session_duration = np.random.uniform(5, 120, size=num_records)
num_sessions = np.random.randint(1, 50, size=num_records)
level = np.random.randint(1, 100, size=num_records)
tasks_completed = np.random.randint(0, 50, size=num_records)
revenue = np.random.uniform(0, 100, size=num_records)
purchases = np.random.randint(0, 2, size=num_records)
average_revenue_per_user = revenue / (purchases + 1)
game_version = np.random.choice(['1.0', '2.0', '3.0'], size=num_records)
platform = np.random.choice(['iOS', 'Android'], size=num_records)
os_version = np.random.choice(['10.0', '11.0', '12.0'], size=num_records)
ad_campaign_cost = np.random.uniform(0, 50, size=num_records)
new_users_from_ads = np.random.randint(0, 50, size=num_records)
retention_rate = np.random.uniform(0.1, 0.9, size=num_records)
game_rating = np.random.uniform(1, 5, size=num_records)
player_reviews = np.random.randint(0, 100, size=num_records)

In [3]:
# Generating random registration dates for users, tied to user_id
registration_dates = {user_id: start_date + timedelta(days=np.random.randint((end_date - start_date).days)) for user_id in user_ids}

# Generating random event dates for users
event_dates = {user_id: start_date + timedelta(days=np.random.randint((end_date - start_date).days)) for user_id in user_ids}
user_events = {user_id: np.random.choice(['Logged In', 'Clicked Play Button', 'Clicked Shop Button']) for user_id in user_ids}

In [4]:
# Calculating the number of seasons and their names
df = pd.DataFrame({
    'user_id': user_ids,
    'age': age,
    'gender': gender,
    'location': location,
    'session_duration': session_duration,
    'num_sessions': num_sessions,
    'level': level,
    'tasks_completed': tasks_completed,
    'revenue': revenue,
    'purchases': purchases,
    'average_revenue_per_user': average_revenue_per_user,
    'game_version': game_version,
    'platform': platform,
    'os_version': os_version,
    'ad_campaign_cost': ad_campaign_cost,
    'new_users_from_ads': new_users_from_ads,
    'retention_rate': retention_rate,
    'game_rating': game_rating,
    'player_reviews': player_reviews,
    'registration_date': [registration_dates[user_id] for user_id in user_ids],
    'event_date': [event_dates[user_id] for user_id in user_ids],
    'user_event': [user_events[user_id] for user_id in user_ids]
})

In [5]:
# Generating random metrics for 5 seasons
df['seasons'] = ((df['registration_date'] - start_date) // timedelta(days=90)) % 4 + 1
df['season_name'] = 'Season ' + df['seasons'].astype(str)

# Dependency for the number of registered players
df['registered_players'] = np.random.uniform(0.8, 1.2, size=num_records) * df['new_users_from_ads']

# Dependency for the number of returning users
df['returned_players'] = np.random.uniform(0.8, 1.2, size=num_records) * df['new_users_from_ads']

In [6]:
df.to_csv('mobile_game_analytics_data.csv', index=False)