# Video Game purchase generator

In [2]:
import random
import pandas as pd
from datetime import datetime, timedelta

# Predefined list of 30 unique game titles
game_titles_list = [
    "dragon_quest", "cyber_racer", "galaxy_battle", "pixel_hero", "dungeon_crawler",
    "mystic_quest", "retro_runner", "space_invaderz", "alien_attack", "cyber_ninja",
    "knights_valor", "shadow_strike", "blaze_of_glory", "thunder_force", "phantom_fighter",
    "arcade_master", "pixel_warrior", "galaxy_defender", "retro_blaster", "legend_of_shadows",
    "mystic_warriors", "blade_runner", "cosmic_adventure", "star_fighter", "quest_of_legends",
    "dungeon_master", "alien_invaders", "battle_arena", "sky_heroes", "cyber_force"
]

genres = ["action", "adventure", "rpg", "puzzle", "shooter", "platformer", "strategy"]
platforms = ["arcade", "nes", "sega_genesis", "pc", "atari", "snes", "commodore_64"]
developers = ["pixel_forge_studios", "retro_wave_entertainment", "high_score_games", "8_bit_games_inc"]
publishers = ["8_bit_games_inc", "high_score_productions", "arcade_legends", "synth_wave_records"]
country_codes = ["us", "jp", "de", "fr", "gb", "it", "es", "ca", "au", "ru"]
devices = ["console", "pc", "mobile"]
payment_methods = ["credit_card", "check", "cash"]

# Mapping of country codes to continents
country_to_continent = {
    "us": "north_america", "ca": "north_america", "gb": "europe", "fr": "europe", 
    "de": "europe", "it": "europe", "es": "europe", "jp": "asia", "au": "australia", 
    "ru": "europe_asia"
}

# Generate the dataset
def generate_dataset(num_records=1000000):
    data = []
    for i in range(num_records):
        game_title = random.choice(game_titles_list)
        
        # Generate purchase and review dates in the 1980s and 1990s
        purchase_year = random.randint(1980, 1999)
        purchase_date = datetime(purchase_year, random.randint(1, 12), random.randint(1, 28))
        review_date = purchase_date + timedelta(days=random.randint(0, 60))
        
        price_range = random.uniform(19.99, 59.99)
        country_code = random.choice(country_codes)
        continent = country_to_continent[country_code]
        
        game = {
            "purchase_id": i + 1,
            "game_title": game_title,
            "genre": random.choice(genres),
            "platform": random.choice(platforms),
            "release_year": random.randint(1980, 1999),
            "developer_name": random.choice(developers),
            "publisher_name": random.choice(publishers),
            "game_description": f"A thrilling {random.choice(genres)} game where you {random.choice(['fight', 'solve_puzzles', 'race', 'explore_dungeons', 'save_the_world'])}.",
            "purchase_date": purchase_date.strftime('%Y-%m-%d'),
            "price_at_purchase": round(random.uniform(price_range - 10, price_range + 10), 2),
            "user_rating": round(random.uniform(1.0, 5.0), 1),
            "review_date": review_date.strftime('%Y-%m-%d'),
            "game_keywords": random.choice(["8_bit", "multiplayer", "side_scroller", "fantasy", "sci_fi", "adventure"]),
            "legacy_score": random.randint(1, 10),
            "country_code": country_code,
            "continent": continent,
            "purchase_device": random.choice(devices),
            "payment_method": random.choice(payment_methods),
            "discount_applied": random.choice([True, False]),
        }
        data.append(game)
    
    df = pd.DataFrame(data)
    return df

# Generate the dataset
df = generate_dataset(500000)

# Display the first few rows of the dataset
df.head()

# Save the dataset to a CSV file
df.to_csv('retro_arcade_purchases_dataset.csv', index=False)