In [2]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime
import json
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed

Example of loading details from steamspy and steamcharts by id:

In [None]:
id = 1086940 #bg3  
details = json.loads(requests.get(f"https://steamspy.com/api.php?request=appdetails&appid={id}").text)
print("details:", details)
timestamps = json.loads(requests.get(f"https://steamcharts.com/app/{id}/chart-data.json").text)
print(f"{len(timestamps)} timestamps total, from {datetime.fromtimestamp(timestamps[0][0]/1000)}, to {datetime.fromtimestamp(timestamps[-1][0]/1000)}")

Finding max threads:

In [12]:
executor = ThreadPoolExecutor()
print(executor._max_workers)

20


Crawl csv's from steamspy and steamcharts:

In [19]:
def try_parse_date(date):
    for fmt in ["%d %b, %Y", "%d %b. %Y", "%b %Y"]:
        try:
            return datetime.strptime(date, fmt)
        except ValueError:
            pass
    return None

def load_game(id):
    try:
        details = json.loads(requests.get(f"https://steamspy.com/api.php?request=appdetails&appid={id}").text)
        owners = details['owners'].replace(',', '').split(' .. ')
        details['min_owners'] = int(owners[0])
        details['max_owners'] = int(owners[1])
        details['genre'] = details['genre'].split(', ')
        details['languages'] = details['languages'].split(', ')
        details['tags'] = list(details['tags'].keys()) if len(details['tags']) > 0 else []
        details['price'] = int(details['price']) if details['price'] is not None else np.NaN
        details['initialprice'] = int(details['initialprice']) if details['initialprice'] is not None else np.NaN
        details['discount'] = int(details['discount']) if details['discount'] is not None else np.NaN
        for k in ['score_rank', 'userscore', 'owners']:
            details.pop(k, None)
        # try:
        #     info = json.loads(requests.get(f"https://store.steampowered.com/api/appdetails?appids={id}").text)[str(id)]['data']
        #     details['platforms'] = info['platforms'].keys() if 'platforms' in info else None
        #     details['metacritic_score'] = info['metacritic']['score'] if 'metacritic' in info else None
        #     details['recommendations'] = info['recommendations']['total'] if 'recommendations' in info else None
        #     details['relese_date'] = try_parse_date(info['release_date']['date']) if 'release_date' in info else None
        #     details['required_age'] = info['required_age'] if 'required_age' in info else None
        #     details['controller_support'] = info['controller_support'] if 'controller_support' in info else None
        # except Exception as e:
        #     print(f"(steampowered), id: {id}", e)

    except Exception as e:
        details = None
        print(f"(steamspy), id: {id}", e)
    try:
        timestamps = json.loads(requests.get(f"https://steamcharts.com/app/{id}/chart-data.json").text)
        monthly_avgs = defaultdict(list)
        for pair in timestamps:
            date = datetime.fromtimestamp(pair[0] / 1000)
            monthly_avgs[datetime(date.year, date.month, 1)].append(pair[1])
        timestamps = {m: float(np.max(v)) for m, v in monthly_avgs.items()}
    except Exception as e:
        timestamps = None
        print(f"(steamcharts), id: {id}", e)
    return id, details, timestamps

print(load_game(4000))

(4000, {'appid': 4000, 'name': "Garry's Mod", 'developer': 'Facepunch Studios', 'publisher': 'Valve', 'positive': 988692, 'negative': 33403, 'average_forever': 11183, 'average_2weeks': 362, 'median_forever': 1406, 'median_2weeks': 106, 'price': 999, 'initialprice': 999, 'discount': 0, 'ccu': 23545, 'languages': ['English', 'French', 'Italian', 'German', 'Spanish - Spain', 'Bulgarian', 'Czech', 'Danish', 'Dutch', 'Finnish', 'Greek', 'Hungarian', 'Japanese', 'Korean', 'Norwegian', 'Polish', 'Portuguese - Portugal', 'Portuguese - Brazil', 'Russian', 'Simplified Chinese', 'Swedish', 'Thai', 'Traditional Chinese', 'Turkish', 'Ukrainian'], 'genre': ['Casual', 'Indie', 'Simulation'], 'tags': ['Sandbox', 'Physics', 'Moddable', 'Multiplayer', 'Building', 'Casual', 'Singleplayer', 'Co-op', 'Online Co-Op', 'Simulation', 'First-Person', 'FPS', 'Action', 'Shooter', 'Comedy', 'Funny', 'Indie', 'PvP', 'Realistic', 'Exploration'], 'min_owners': 20000000, 'max_owners': 50000000}, {datetime.datetime(201

In [20]:
def load(page=0, overwrite = True, max_workers=10, details_name = "details.csv", timestamps_name = "timestamps.csv"):
    if overwrite:
        details_df = pd.DataFrame(columns=load_game(70)[1].keys())  # https://steamspy.com/api.php
        timestamps_df = pd.DataFrame(columns=pd.date_range(start=datetime(2012, 7, 1), end=datetime.now(), freq='MS'), dtype=int)  # https://steamcharts.com/about
    else:
        details_df = pd.read_csv(details_name, index_col=0)
        timestamps_df = pd.read_csv(timestamps_name, index_col=0)
    print(details_df.shape, timestamps_df.shape)

    while (True):
        details_df.to_csv(details_name)
        timestamps_df.to_csv(timestamps_name)
        print("Starting to load page:", page)
        request = requests.get(f"https://steamspy.com/api.php?request=all&page={page}")
        if request.status_code != 200:
            print("Exiting on page:", page)
            return details_df, timestamps_df
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            for future in as_completed({executor.submit(load_game, id): id for id in json.loads(request.text).keys()}):
                try:
                    id, details, timestamps = future.result()
                    if details is not None:
                        details_df.loc[id] = details
                    if timestamps is not None:
                        timestamps_df.loc[id] = timestamps
                except Exception as e:
                    print("(processing)",future, e)

        page += 1
        

details_df, timestamps_df = load(max_workers = 20, details_name="test_details.csv", timestamps_name="test_timestamps.csv")



print(details_df.dtypes)
print(timestamps_df.dtypes)
print("---------------------------------------------------------------")
print(details_df)
print("---------------------------------------------------------------")
print(timestamps_df)

(0, 19) (0, 138)
Starting to load page: 0
(steamcharts), id: 901583 Expecting value: line 1 column 1 (char 0)
(steamcharts), id: 774171 Expecting value: line 1 column 1 (char 0)
(steamcharts), id: 900883 Expecting value: line 1 column 1 (char 0)
(steamcharts), id: 56437 Expecting value: line 1 column 1 (char 0)
(steamcharts), id: 774861 Expecting value: line 1 column 1 (char 0)
(steamcharts), id: 774361 Expecting value: line 1 column 1 (char 0)
Starting to load page: 1
(steamcharts), id: 33229 Expecting value: line 1 column 1 (char 0)
(steamcharts), id: 774241 Expecting value: line 1 column 1 (char 0)
(steamcharts), id: 773951 Expecting value: line 1 column 1 (char 0)


KeyboardInterrupt: 

In [None]:
# from steam import Steam
# from os import environ
# from dotenv import load_dotenv

# load_dotenv()
# KEY = environ["STEAM_API_KEY"]
# steam = Steam(KEY)

# print(steam.apps.get_app_details(70))
# print(steam.apps.search_games("baldurs gate 3"))

In [14]:
# id = 70
# r = requests.get(f"https://steamspy.com/app/{id}").text

(steampowered), id: 70 'dict_values' object is not subscriptable
(70, {'appid': 70, 'name': 'Half-Life', 'developer': 'Valve', 'publisher': 'Valve', 'score_rank': '', 'positive': 110243, 'negative': 3856, 'userscore': 0, 'owners': '10,000,000 .. 20,000,000', 'average_forever': 564, 'average_2weeks': 105, 'median_forever': 106, 'median_2weeks': 62, 'price': 999, 'initialprice': 999, 'discount': 0, 'ccu': 3847, 'languages': ['English', 'French', 'German', 'Italian', 'Spanish - Spain', 'Simplified Chinese', 'Traditional Chinese', 'Korean'], 'genre': ['Action'], 'tags': ['FPS', 'Classic', 'Sci-fi', "1990's", 'Multiplayer', 'Singleplayer', 'Action', 'Shooter', 'First-Person', 'Aliens', 'Silent Protagonist', 'Story Rich', 'Atmospheric', 'Moddable', 'Adventure', 'Gore', 'Retro', 'Action-Adventure', 'Difficult', 'PvP'], 'min_owners': 10000000, 'max_owners': 20000000}, {datetime.datetime(2012, 7, 1, 0, 0): 674.0, datetime.datetime(2012, 8, 1, 0, 0): 419.0, datetime.datetime(2012, 9, 1, 0, 0): 4