# 3. Extract Games Data<a id='extract'></a>

Lastly, it is time to start extracting the necessary data for our project.

## 3.2. Extract Data<a id='final_extract'></a>

Before we start extracting the data, we thought this was going to be a long process so we wanted to prevent any harm that may come. To do so, we decided to create a safenet that let's us continue where the computer left of before the algortihm went down. Now, we are ready to start extracting data.

In [1]:
import polars as pl
from helper_functions import save_data_to_json
import time
import requests

ModuleNotFoundError: No module named 'utils'

In [None]:
def get_nested(dictionary, keys, default=None):
    """Safely retrieves a nested value from a dictionary given a list of keys."""
    for key in keys:
        dictionary = dictionary.get(key) if dictionary is not None else None
        if dictionary is None:
            return default
    return dictionary

In [None]:
def format_game_data(game_info, game_name, app_id):
    """Formats and extracts the necessary fields from the game info data, including the appid, using safe dictionary access
    for top-level data and handling nested data where applicable."""
    data = game_info.get('data', {})
    return {
        "name": game_name,
        "appid": app_id,
        "required_age": data.get("required_age", 0),
        "is_free": data.get("is_free", False),
        "detailed_description": data.get("detailed_description", ""),
        "supported_languages": [lang.split('<')[0] for lang in data.get("supported_languages", "").split(',')],
        "developers": data.get("developers", []),
        "publishers": data.get("publishers", []),
        "price": 0 if data.get("is_free", False) else get_nested(data, ["price_overview", "final"], 0) / 100,
        "platforms": [key for key, value in data.get("platforms", {}).items() if value],
        "metacritic_score": get_nested(data, ["metacritic", "score"], None),
        "categories": [category["description"] for category in data.get("categories", [])],
        "genres": [genre["description"] for genre in data.get("genres", [])],
        "release_date": get_nested(data, ["release_date", "date"], ""),
        "content_descriptors": data.get("content_descriptors", {}).get("notes", ""),
        "usk_rating": get_nested(data, ["ratings", "usk", "rating"], None),
        "number_of_reviews": get_nested(data, ["recommendations", "total"], 0)  # Use get_nested to safely access nested data
    }

In [None]:
def fetch_game_details(game_list, file_path):
    """Fetches information for each game in the list and updates the game dictionaries with the data."""
    base_url = "https://store.steampowered.com/api/appdetails"
    games_details = []  # List to store all processed games details
    processed_count = 0

    while game_list:  # Process until the list is empty
        game = game_list.pop(0)  # Remove and return the first game from the list
        app_id = game['appid']
        game_name = game["name"]

        # Construct the URL for the API request
        params = {'appids': app_id}

        # Make the API request
        try:
            response = requests.get(base_url, params=params)
            response.raise_for_status()  # Raises an HTTPError for bad responses
            data = response.json()
            game_info = data[str(app_id)]
            
            if game_info['success']:
                games_details.append(format_game_data(game_info, game_name, app_id))
                processed_count += 1

                if processed_count % 10 == 0:
                    print(f"Processed {processed_count} games so far.")

        except requests.RequestException as e:
            print(f"Failed to fetch data for {game_name}: {str(e)}")
            exit()

        time.sleep(1)

    save_data_to_json(games_details, file_path)

## 3.1. Select Games<a id='select_data'></a>  

In [None]:
topseller = pl.read_json('data/jsons/SteamTopSellers.json').to_dicts()
file_path = "data/game_details/SteamTopSellers_game_details.json"

fetch_game_details(topseller, file_path)

In [None]:
mostplayed = pl.read_json('data/jsons/SteamMostPlayed.json').to_dicts()
file_path = "data/game_details/SteamMostPlayed_game_details.json"

fetch_game_details(mostplayed, file_path)