In [1]:
import requests
import polars as pl
import re
import json
import os
import time

In [2]:
def get_steam_gamesIDs():
    url = f"http://api.steampowered.com/ISteamApps/GetAppList/v2/"
    response = requests.get(url)
    data = response.json()
    return data['applist']['apps']  # This is a list of dictionaries with 'appid' and 'name'

In [3]:
games_list = get_steam_gamesIDs()

In [4]:
print(len(games_list))

198470


In [5]:
def clean_app_list(games_list):
    # List of keywords to exclude
    excluded = ['test', 'client', 'server', 'soundtrack', 'demo']
    
    # RegEx pattern for checking European characters
    european_chars_pattern = re.compile(r'^[a-zA-Z0-9 \-\'!@#$%^&*()_+={}[\]|\\:;"<>,.?/~`€£±§]+$')
    
    # Filter the list by removing dictionaries whose 'name' is empty,
    # contains 'test', 'client', 'server', 'soundtrack', or non-European characters.
    filtered_games = [
        app for app in games_list 
        if app['name'] and all(exclude not in app['name'].lower() for exclude in excluded)
        and european_chars_pattern.match(app['name'])
    ]
    return filtered_games

In [6]:
filtered_games = clean_app_list(games_list)

In [7]:
print(len(filtered_games))

150501


In [8]:
filtered_games[-10:]

[{'appid': 2927880, 'name': 'H.E.N.T.A.L.K.E.R.'},
 {'appid': 1569820, 'name': 'Shades Of Rayna'},
 {'appid': 2418490, 'name': 'Trakonius'},
 {'appid': 1313140, 'name': 'Cult of the Lamb'},
 {'appid': 2012260, 'name': 'Blade Runner 2033: Labyrinth'},
 {'appid': 2676440, 'name': 'Glitch Daddy'},
 {'appid': 2471090, 'name': 'Gah!'},
 {'appid': 1954130, 'name': 'Meifumado'},
 {'appid': 490110, 'name': 'The Precinct'},
 {'appid': 2958730, 'name': 'Project Rope Tool'}]

In [10]:
def find_apps_by_term(apps, term):
    # Überprüfe, ob der Begriff im 'name' jedes Dictionaries enthalten ist. Ignoriere dabei Groß- und Kleinschreibung.
    matching_apps = [app for app in apps if term.lower() in app['name'].lower()]
    return matching_apps

In [11]:
search = find_apps_by_term(filtered_games, " ")
search[:5]

[{'appid': 2944660,
  'name': 'Fantasy Grounds - Savage Saturday Cinema: Thunder on the Mountain'},
 {'appid': 2944690, 'name': 'Oh, Flock!'},
 {'appid': 2944780, 'name': 'Souls Chess'},
 {'appid': 2944990, 'name': 'Reality patrol: Prologue'},
 {'appid': 2945110, 'name': 'Skill Random Defense'}]

In [12]:
def find_game_name(games, appid):
    for game in games:
        if game['appid'] == appid:
            return game['name']
    return "Spiel mit dieser AppID nicht gefunden."

In [40]:
appid = 10
find_game_name(filtered_games, appid)

'Spiel mit dieser AppID nicht gefunden.'

In [14]:
def save_progress(game_list, games_details, filename='data\\game_progress.json'):
    """Saves the current processing state including unprocessed games and details of processed games."""
    data_to_save = {
        'remaining_games': game_list,
        'processed_details': games_details
    }
    with open(filename, 'w') as f:
        json.dump(data_to_save, f)

In [15]:
def load_progress(filename='data\\game_progress.json'):
    """Loads the saved processing state if it exists, including both unprocessed and processed games."""
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            return json.load(f)
    return None

In [16]:
def get_nested(dictionary, keys, default=None):
    """Safely retrieves a nested value from a dictionary given a list of keys."""
    for key in keys:
        dictionary = dictionary.get(key) if dictionary is not None else None
        if dictionary is None:
            return default
    return dictionary

In [28]:
def format_game_data(game_info, game_name, app_id):
    """Formats and extracts the necessary fields from the game info data, including the appid, using safe dictionary access for top-level data and handling nested data where applicable."""
    data = game_info.get('data', {})
    return {
        "name": game_name,
        "appid": app_id,
        "required_age": data.get("required_age", 0),
        "is_free": data.get("is_free", False),
        "detailed_description": data.get("detailed_description", ""),
        "supported_languages": [lang.split('<')[0] for lang in data.get("supported_languages", "").split(',')],
        "developers": data.get("developers", []),
        "publishers": data.get("publishers", []),
        "price": 0 if data.get("is_free", False) else get_nested(data, ["price_overview", "final"], 0) / 100,
        "platforms": [key for key, value in data.get("platforms", {}).items() if value],
        "metacritic_score": get_nested(data, ["metacritic", "score"], None),
        "categories": [category["description"] for category in data.get("categories", [])],
        "genres": [genre["description"] for genre in data.get("genres", [])],
        "release_date": get_nested(data, ["release_date", "date"], ""),
        "content_descriptors": data.get("content_descriptors", {}).get("notes", ""),
        "usk_rating": get_nested(data, ["ratings", "usk", "rating"], None),
        "number_of_reviews": get_nested(data, ["recommendations", "total"], 0)  # Use get_nested to safely access nested data
    }


In [47]:
def fetch_recommendations(game_list):
    """Fetches information for each game in the list and updates the game dictionaries with the data."""
    base_url = "https://store.steampowered.com/api/appdetails"
    games_details = []  # List to store all processed games details
    processed_count = 0

    while game_list:  # Process until the list is empty
        game = game_list.pop(0)  # Remove and return the first game from the list
        app_id = game['appid']
        
        # Construct the URL for the API request
        params = {'appids': app_id}

        # Make the API request
        try:
            response = requests.get(base_url, params=params)
            response.raise_for_status()  # Raises an HTTPError for bad responses
            data = response.json()
            game_info = data[str(app_id)]
            game_name = data.get('name')
            if game_info['success']:
                games_details.append(format_game_data(game_info, game_name, app_id))
                processed_count += 1

                if processed_count % 10 == 0:
                    print(f"Processed {processed_count} games so far.")

        except requests.RequestException as e:
            print(f"Failed to fetch data for {game_name}: {str(e)}")
            exit()

        # Save progress periodically to avoid data loss
        if processed_count % 10 == 0:
            save_progress(game_list, games_details)
            time.sleep(5)

    # Save final progress
    save_progress(game_list, games_details)

In [41]:
topseller = pl.read_json('data/SteamTopSellers.json').to_dicts()

In [46]:
# Start of the script execution
saved_data = load_progress()
if saved_data:
    game_list = saved_data['remaining_games']
    processed_games = saved_data['processed_details']
else:
    # Define your initial list of games, replace with actual game list
    game_list = topseller  # Ensure 'filtered_games' is defined elsewhere in your project
    processed_games = []

# Begin fetching additional data
fetch_recommendations(game_list)

Processed 10 games so far.
Processed 20 games so far.
Processed 30 games so far.
Processed 40 games so far.
Processed 50 games so far.
Processed 60 games so far.
Processed 70 games so far.
Processed 80 games so far.
Processed 90 games so far.
Processed 100 games so far.
Processed 110 games so far.
Processed 120 games so far.
Processed 130 games so far.
Processed 140 games so far.
Processed 150 games so far.
Processed 160 games so far.
Processed 170 games so far.
Processed 180 games so far.
Processed 190 games so far.
Processed 200 games so far.
Processed 210 games so far.
Processed 220 games so far.
Failed to fetch data for None: 429 Client Error: Too Many Requests for url: https://store.steampowered.com/api/appdetails?appids=1182900
Failed to fetch data for None: 429 Client Error: Too Many Requests for url: https://store.steampowered.com/api/appdetails?appids=431240
Failed to fetch data for None: 429 Client Error: Too Many Requests for url: https://store.steampowered.com/api/appdetail

KeyboardInterrupt: 