In [2]:
# standard library imports
import csv
import datetime as dt
import json
import os
import statistics
import time

# third-party imports
import numpy as np
import pandas as pd
import requests

# customisations - ensure tables show all columns
pd.set_option("display.max_columns", 100)

In [18]:
def get_request(url, parameters=None):
    """Return json-formatted response of a get request using optional parameters.
    
    Parameters
    ----------
    url : string
    parameters : {'parameter': 'value'}
        parameters to pass as part of get request
    
    Returns
    -------
    json_data
        json-formatted response (dict-like)
    """
    try:
        response = requests.get(url=url, params=parameters)
    except SSLError as s:
        print('SSL Error:', s)
        
        for i in range(30, 0, -1):
            print('\rWaiting... ({})'.format(i), end='')
            time.sleep(1)
        print('\rRetrying.' + ' '*10)
        
        # recusively try again
        return get_request(url, parameters)
    
    if response:
        return response.json()
    else:
        # response is none usually means too many requests. Wait and try again 
        print('No response, waiting 10 seconds...')
        time.sleep(50)
        print('Retrying.')
        return get_request(url, parameters)

In [19]:
url = "https://steamspy.com/api.php"
parameters = {"request": "all"}

# request 'all' from steam spy and parse into dataframe
json_data = get_request(url, parameters=parameters)
steam_spy_all = pd.DataFrame.from_dict(json_data, orient='index')

# generate sorted app_list from steamspy data
app_list = steam_spy_all[['appid', 'name']].sort_values('appid').reset_index(drop=True)

# export disabled to keep consistency across download sessions
# app_list.to_csv('../data/download/app_list.csv', index=False)

# instead read from stored csv
#app_list = pd.read_csv('../data/download/app_list.csv')

# display first few rows
app_list

Unnamed: 0,appid,name
0,33760,The Search for Amelia Earhart
1,34275,Gain Ground
2,34277,Shinobi III: Return of the Ninja Master
3,34317,Columns III
4,36900,Angle of Attack
...,...,...
995,2196100,Barren Depths
996,2196150,Castle Defense Battles
997,2196370,Cherry Lady
998,2196440,Gray


In [6]:
def get_app_data(start, stop, parser, pause):
    """Return list of app data generated from parser.
    
    parser : function to handle request
    """
    app_data = []
    
    # iterate through each row of app_list, confined by start and stop
    for index, row in app_list[start:stop].iterrows():
        print('Current index: {}'.format(index), end='\r')
        
        appid = row['appid']
        name = row['name']

        # retrive app data for a row, handled by supplied parser, and append to list
        data = parser(appid, name)
        app_data.append(data)

        time.sleep(pause) # prevent overloading api with requests
    
    return app_data


def process_batches(parser, app_list, download_path, data_filename, index_filename,
                    columns, begin=0, end=-1, batchsize=100, pause=1):
    """Process app data in batches, writing directly to file.
    
    parser : custom function to format request
    app_list : dataframe of appid and name
    download_path : path to store data
    data_filename : filename to save app data
    index_filename : filename to store highest index written
    columns : column names for file
    
    Keyword arguments:
    
    begin : starting index (get from index_filename, default 0)
    end : index to finish (defaults to end of app_list)
    batchsize : number of apps to write in each batch (default 100)
    pause : time to wait after each api request (defualt 1)
    
    returns: none
    """
    print('Starting at index {}:\n'.format(begin))
    
    # by default, process all apps in app_list
    if end == -1:
        end = len(app_list) + 1
    
    # generate array of batch begin and end points
    batches = np.arange(begin, end, batchsize)
    batches = np.append(batches, end)
    
    apps_written = 0
    batch_times = []
    
    for i in range(len(batches) - 1):
        start_time = time.time()
        
        start = batches[i]
        stop = batches[i+1]
        
        app_data = get_app_data(start, stop, parser, pause)
        
        rel_path = os.path.join(download_path, data_filename)
        
        # writing app data to file
        with open(rel_path, 'a', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=columns, extrasaction='ignore')
            
            for j in range(3,0,-1):
                print("\rAbout to write data, don't stop script! ({})".format(j), end='')
                time.sleep(0.5)
            
            writer.writerows(app_data)
            print('\rExported lines {}-{} to {}.'.format(start, stop-1, data_filename), end=' ')
            
        apps_written += len(app_data)
        
        idx_path = os.path.join(download_path, index_filename)
        
        # writing last index to file
        with open(idx_path, 'w') as f:
            index = stop
            print(index, file=f)
            
        # logging time taken
        end_time = time.time()
        time_taken = end_time - start_time
        
        batch_times.append(time_taken)
        mean_time = statistics.mean(batch_times)
        
        est_remaining = (len(batches) - i - 2) * mean_time
        
        remaining_td = dt.timedelta(seconds=round(est_remaining))
        time_td = dt.timedelta(seconds=round(time_taken))
        mean_td = dt.timedelta(seconds=round(mean_time))
        
        print('Batch {} time: {} (avg: {}, remaining: {})'.format(i, time_td, mean_td, remaining_td))
            
    print('\nProcessing batches complete. {} apps written'.format(apps_written))

In [7]:
def reset_index(download_path, index_filename):
    """Reset index in file to 0."""
    rel_path = os.path.join(download_path, index_filename)
    
    with open(rel_path, 'w') as f:
        print(0, file=f)
        

def get_index(download_path, index_filename):
    """Retrieve index from file, returning 0 if file not found."""
    try:
        rel_path = os.path.join(download_path, index_filename)

        with open(rel_path, 'r') as f:
            index = int(f.readline())
    
    except FileNotFoundError:
        index = 0
        
    return index


def prepare_data_file(download_path, filename, index, columns):
    """Create file and write headers if index is 0."""
    if index == 0:
        rel_path = os.path.join(download_path, filename)

        with open(rel_path, 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=columns)
            writer.writeheader()

In [20]:
def parse_steam_request(appid, name):
    """Unique parser to handle data from Steam Store API.
    
    Returns : json formatted data (dict-like)
    """
    url = "http://store.steampowered.com/api/appdetails/"
    parameters = {"appids": appid}
    
    json_data = get_request(url, parameters=parameters)
    json_app_data = json_data[str(appid)]
    
    if json_app_data['success']:
        data = json_app_data['data']
    else:
        data = {'name': name, 'steam_appid': appid}
        
    return data


# Set file parameters
download_path = 'data'
steam_app_data = 'steam_app_data.csv'
steam_index = 'steam_index.txt'

steam_columns = [
    'type', 'name', 'steam_appid', 'required_age', 'is_free', 'controller_support',
    'dlc', 'detailed_description', 'about_the_game', 'short_description', 'fullgame',
    'supported_languages', 'header_image', 'website', 'pc_requirements', 'mac_requirements',
    'linux_requirements', 'legal_notice', 'drm_notice', 'ext_user_account_notice',
    'developers', 'publishers', 'demos', 'price_overview', 'packages', 'package_groups',
    'platforms', 'metacritic', 'reviews', 'categories', 'genres', 'screenshots',
    'movies', 'recommendations', 'achievements', 'release_date', 'support_info',
    'background', 'content_descriptors'
]

# Overwrites last index for demonstration (would usually store highest index so can continue across sessions)
reset_index(download_path, steam_index)

# Retrieve last index downloaded from file
index = get_index(download_path, steam_index)

# Wipe or create data file and write headers if index is 0
prepare_data_file(download_path, steam_app_data, index, steam_columns)

# Set end and chunksize for demonstration - remove to run through entire app list
process_batches(
    parser=parse_steam_request,
    app_list=app_list,
    download_path=download_path,
    data_filename=steam_app_data,
    index_filename=steam_index,
    columns=steam_columns,
    begin=index,
    end=30,
    batchsize=5
)

Starting at index 0:

Exported lines 0-4 to steam_app_data.csv. Batch 0 time: 0:00:30 (avg: 0:00:30, remaining: 0:02:29)
Exported lines 5-9 to steam_app_data.csv. Batch 1 time: 0:00:15 (avg: 0:00:22, remaining: 0:01:29)
Exported lines 10-14 to steam_app_data.csv. Batch 2 time: 0:00:31 (avg: 0:00:25, remaining: 0:01:16)
Exported lines 15-19 to steam_app_data.csv. Batch 3 time: 0:00:20 (avg: 0:00:24, remaining: 0:00:48)
Exported lines 20-24 to steam_app_data.csv. Batch 4 time: 0:00:26 (avg: 0:00:24, remaining: 0:00:24)
Exported lines 25-29 to steam_app_data.csv. Batch 5 time: 0:00:17 (avg: 0:00:23, remaining: 0:00:00)

Processing batches complete. 30 apps written


In [22]:
# inspect downloaded data
pd.read_csv('data/steam_app_data.csv')

Unnamed: 0,type,name,steam_appid,required_age,is_free,controller_support,dlc,detailed_description,about_the_game,short_description,fullgame,supported_languages,header_image,website,pc_requirements,mac_requirements,linux_requirements,legal_notice,drm_notice,ext_user_account_notice,developers,publishers,demos,price_overview,packages,package_groups,platforms,metacritic,reviews,categories,genres,screenshots,movies,recommendations,achievements,release_date,support_info,background,content_descriptors
0,game,The Search for Amelia Earhart,33760,0,False,,,Follow Amelia Earhart’s life through space and...,Follow Amelia Earhart’s life through space and...,Follow Amelia Earhart’s life through space and...,,English,https://cdn.akamai.steamstatic.com/steam/apps/...,,"{'minimum': '<ul class=""bb_ul""><li><strong>OS:...",[],[],,,,['Bamtang'],['Cosmi Valusoft'],,"{'currency': 'USD', 'initial': 699, 'final': 6...",[3009],"[{'name': 'default', 'title': 'Buy The Search ...","{'windows': True, 'mac': False, 'linux': False}",,“The Search for Amelia Earhart offers a fun wa...,"[{'id': 2, 'description': 'Single-player'}]","[{'id': '4', 'description': 'Casual'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,,,"{'coming_soon': False, 'date': '22 Feb, 2010'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
1,game,Gain Ground™,34275,0,False,,,The battle-simulation game has gone haywire!<b...,The battle-simulation game has gone haywire!<b...,The battle-simulation game has gone haywire! A...,,English,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.sega.com,{'minimum': '<strong>Minimum:</strong><br>\t\t...,{'recommended': 'OS: OS Sierra Version 10.12.6...,{'recommended': 'OS: Ubuntu 16.10 or higher<br...,"Europe: © SEGA. SEGA, the SEGA logo and GAIN G...",,,['SEGA'],['SEGA'],,"{'currency': 'USD', 'initial': 99, 'final': 99...",[4395],"[{'name': 'default', 'title': 'Buy Gain Ground...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '2', 'description': 'Strategy'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...","[{'id': 256663650, 'name': 'SMDC PEGI', 'thumb...",,,"{'coming_soon': False, 'date': '1 Jun, 2010'}","{'url': 'https://support.sega.co.uk', 'email':...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
2,game,Shinobi™ III: Return of the Ninja Master,34277,0,False,,,"Joe Musashi’s sworn enemy, the Neo Zeed are se...","Joe Musashi’s sworn enemy, the Neo Zeed are se...","Joe Musashi’s sworn enemy, the Neo Zeed are se...",,English,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.sega.com,{'minimum': '<strong>Minimum:</strong><br>\t\t...,{'recommended': 'OS: OS Sierra Version 10.12.6...,{'recommended': 'OS: Ubuntu 16.10 or higher<br...,"Europe: © SEGA. SEGA, the SEGA logo and SHINOB...",,,['SEGA'],['SEGA'],,"{'currency': 'USD', 'initial': 99, 'final': 99...",[4397],"[{'name': 'default', 'title': 'Buy Shinobi™ II...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...","[{'id': 256663652, 'name': 'SMDC ESRB', 'thumb...",,,"{'coming_soon': False, 'date': '1 Jun, 2010'}","{'url': 'https://support.sega.co.uk', 'email':...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
3,game,Columns™ III,34317,0,False,,,Arrange the blocks of jewels falling from the ...,Arrange the blocks of jewels falling from the ...,Arrange the blocks of jewels falling from the ...,,English,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.sega.com,{'minimum': '<strong>Minimum:</strong><br>\t\t...,{'recommended': 'OS: OS Sierra Version 10.12.6...,{'recommended': 'OS: Ubuntu 16.10 or higher<br...,"Europe: © SEGA. SEGA, the SEGA logo and COLUMN...",,,['SEGA'],['SEGA'],,"{'currency': 'USD', 'initial': 99, 'final': 99...",[6489],"[{'name': 'default', 'title': 'Buy Columns™ II...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '4', 'description': 'Casual'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,,,"{'coming_soon': False, 'date': '26 Oct, 2010'}","{'url': 'https://support.sega.co.uk', 'email':...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
4,game,Angle of Attack,36900,0,False,,,The Terrans have been at war with the Gammulan...,The Terrans have been at war with the Gammulan...,Angle Of Attack is the aerial combat action co...,,English,https://cdn.akamai.steamstatic.com/steam/apps/...,http://3000ad.com/games/angle-of-attack/,{'minimum': '<strong>Minimum:</strong><br>\t\t...,[],[],"Angle Of Attack © 2009, 3000AD, Inc",,,['3000AD'],['3000AD'],"[{'appid': 36930, 'description': ''}]","{'currency': 'USD', 'initial': 999, 'final': 9...",[1939],"[{'name': 'default', 'title': 'Buy Angle of At...","{'windows': True, 'mac': False, 'linux': False}",,&quot;Those who enjoy flight simulations but n...,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}, {'id': ...","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...","[{'id': 2031371, 'name': 'Air Combat', 'thumbn...",,,"{'coming_soon': False, 'date': '17 Aug, 2009'}","{'url': 'http://3000ad.com/support/', 'email':...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
5,game,All Aspect Warfare,36910,0,False,,,An elite strike team — with a thermo-nuclear w...,An elite strike team — with a thermo-nuclear w...,All Aspect Warfare is a thrilling planetary co...,,English,https://cdn.akamai.steamstatic.com/steam/apps/...,http://3000ad.com/games/all-aspect-warfare/,{'minimum': '<strong>Minimum:</strong><br>\t\t...,[],[],"All Aspect Warfare © 2009, 3000AD, Inc",,,['3000AD'],['3000AD'],"[{'appid': 36920, 'description': ''}]","{'currency': 'USD', 'initial': 999, 'final': 9...",[1938],"[{'name': 'default', 'title': 'Buy All Aspect ...","{'windows': True, 'mac': False, 'linux': False}",,&quot;It's a welcome change from shooters-on-r...,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}, {'id': ...","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...","[{'id': 900984, 'name': 'Gameplay Tutorial 1',...",,,"{'coming_soon': False, 'date': '17 Aug, 2009'}","{'url': 'http://3000ad.com/support/', 'email':...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
6,game,Hamilton's Great Adventure - Retro Fever DLC,42147,0,False,,,This map pack contains new levels that bring b...,This map pack contains new levels that bring b...,This map pack contains new levels that bring b...,,"English, French, Italian, German, Spanish - Sp...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://hamilton.fatshark.se/,{'minimum': '<strong>Minimum:</strong><br><ul ...,[],[],©Fatshark AB. 2011 ALL RIGHTS RESERVED.,,,['Fatshark'],['Fatshark'],,"{'currency': 'USD', 'initial': 99, 'final': 99...",[11813],"[{'name': 'default', 'title': ""Buy Hamilton's ...","{'windows': True, 'mac': False, 'linux': False}",,,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '25', 'description': 'Adventure'}, {'i...","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,,,"{'coming_soon': False, 'date': '3 Oct, 2011'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
7,game,Hearts of Iron III: Semper Fi,42900,0,False,,"[42905, 42903, 42902]",It’s time to dust off the field marshal's bato...,It’s time to dust off the field marshal's bato...,It’s time to dust off the field marshal's bato...,,English,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.heartsofirongame.com,"{'minimum': '<ul class=""bb_ul""><li><strong>OS:...",[],[],© 2009 Paradox Interactive AB. Hearts of Iron ...,,,['Paradox Development Studio'],['Paradox Interactive'],,"{'currency': 'USD', 'initial': 499, 'final': 4...",[4484],"[{'name': 'default', 'title': 'Buy Hearts of I...","{'windows': True, 'mac': False, 'linux': False}","{'score': 65, 'url': 'https://www.metacritic.c...",,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '2', 'description': 'Strategy'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,,,"{'coming_soon': False, 'date': '7 Jun, 2010'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
8,game,ToeJam & Earl in Panic on Funkotron,71167,0,False,,,<h1>Just Updated</h1><p>All SEGA MegaDrive/Gen...,The peaceful planet of Funkotron has been inva...,The peaceful planet of Funkotron has been inva...,,English,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.sega.com,{'minimum': '<strong>Minimum:</strong><br>\t\t...,{'recommended': 'OS: OS Sierra Version 10.12.6...,{'recommended': 'OS: Ubuntu 16.10 or higher<br...,"Europe: © TOE JAM & EARL PRODUCTIONS, INC. © S...",,,['SEGA'],['SEGA'],,"{'currency': 'USD', 'initial': 99, 'final': 99...",[7093],"[{'name': 'default', 'title': 'Buy ToeJam & Ea...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 2, 'description': 'Single-player'}, {'...",,"[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,,,"{'coming_soon': False, 'date': '14 Mar, 2012'}","{'url': 'https://support.sega.co.uk', 'email':...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
9,game,The Revenge of Shinobi,211207,0,False,,,"After being reduced to rubble by Joe Musashi, ...","After being reduced to rubble by Joe Musashi, ...","After being reduced to rubble by Joe Musashi, ...",,English,https://cdn.akamai.steamstatic.com/steam/apps/...,,{'minimum': '<strong>Minimum:</strong><br>\t\t...,{'recommended': 'OS: OS Sierra Version 10.12.6...,{'recommended': 'OS: Ubuntu 16.10 or higher<br...,© SEGA. All rights reserved. SEGA is registere...,,,,['SEGA'],,"{'currency': 'USD', 'initial': 99, 'final': 99...",[14453],"[{'name': 'default', 'title': 'Buy The Revenge...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 2, 'description': 'Single-player'}]",,"[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,,,"{'coming_soon': False, 'date': '2 May, 2012'}","{'url': 'https://support.sega.co.uk', 'email':...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
