In [58]:
# standard library imports
import csv
import datetime as dt
import json
import os
import statistics
import time

# third-party imports
import numpy as np
import pandas as pd
import requests

# ensure tables show all columns
pd.set_option("max_columns", 100)

In [59]:
def get_request(url, parameters=None):
    """Return json-formatted response of a get request using optional parameters.
    
    Parameters
    ----------
    url : string
    parameters : {'parameter': 'value'}
        parameters to pass as part of get request
    
    Returns
    -------
    json_data
        json-formatted response (dict-like)
    """
    try:
        response = requests.get(url=url, params=parameters)
    except SSLError as s:
        print('SSL Error:', s)
        
        for i in range(5, 0, -1):
            print('\rWaiting... ({})'.format(i), end='')
            time.sleep(1)
        print('\rRetrying.' + ' '*10)
        
        # recusively try again
        return get_request(url, parameters)
    
    if response:
        return response.json()
    else:
        # Iresponse is none usually means too many requests. Wait and try again 
        print('No response, waiting 10 seconds...')
        time.sleep(10)
        print('Retrying.')
        return get_request(url, parameters)

In [60]:
url = "https://steamspy.com/api.php?request=all"
parameters = {"request": "all"}

# request 'all' from steam spy and parse into dataframe
json_data = get_request(url, parameters=parameters)
steam_spy_all = pd.DataFrame.from_dict(json_data, orient='index')

# generate sorted app_list from steamspy data
app_list = steam_spy_all[['appid', 'name']].sort_values('appid').reset_index(drop=True)

# export disabled to keep consistency across download sessions
app_list.to_csv('steam_appid_list.csv', index=False)

# instead read from stored csv
app_list = pd.read_csv('steam_appid_list.csv')

# display first few rows
app_list.head()

Unnamed: 0,appid,name
0,1313,SiN: Gold
1,1630,Disciples II: Rise of the Elves
2,1640,Disciples II: Gallean's Return
3,1690,Space Empires V
4,2340,QUAKE II Mission Pack: Ground Zero


In [61]:
def get_app_data(start, stop, parser, pause):
    """Return list of app data generated from parser.
    
    parser : function to handle request
    """
    app_data = []
    
    # iterate through each row of app_list, confined by start and stop
    for index, row in app_list[start:stop].iterrows():
        print('Current index: {}'.format(index), end='\r')
        
        appid = row['appid']
        name = row['name']

        # retrive app data for a row, handled by supplied parser, and append to list
        data = parser(appid, name)
        app_data.append(data)

        time.sleep(pause) # prevent overloading api with requests
    
    return app_data


def process_batches(parser, app_list, download_path, data_filename, index_filename,
                    columns, begin=0, end=-1, batchsize=100, pause=1):
    """Process app data in batches, writing directly to file.
    
    parser : custom function to format request
    app_list : dataframe of appid and name
    download_path : path to store data
    data_filename : filename to save app data
    index_filename : filename to store highest index written
    columns : column names for file
    
    Keyword arguments:
    
    begin : starting index (get from index_filename, default 0)
    end : index to finish (defaults to end of app_list)
    batchsize : number of apps to write in each batch (default 100)
    pause : time to wait after each api request (defualt 1)
    
    returns: none
    """
    print('Starting at index {}:\n'.format(begin))
    
    # by default, process all apps in app_list
    if end == -1:
        end = len(app_list) + 1
    
    # generate array of batch begin and end points
    batches = np.arange(begin, end, batchsize)
    batches = np.append(batches, end)
    
    apps_written = 0
    batch_times = []
    
    for i in range(len(batches) - 1):
        start_time = time.time()
        
        start = batches[i]
        stop = batches[i+1]
        
        app_data = get_app_data(start, stop, parser, pause)
        
        rel_path = os.path.join(download_path, data_filename)
        
        # writing app data to file
        with open(rel_path, 'a', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=columns, extrasaction='ignore')
            
            for j in range(3,0,-1):
                print("\rAbout to write data, don't stop script! ({})".format(j), end='')
                time.sleep(0.5)
            
            writer.writerows(app_data)
            print('\rExported lines {}-{} to {}.'.format(start, stop-1, data_filename), end=' ')
            
        apps_written += len(app_data)
        
        idx_path = os.path.join(download_path, index_filename)
        
        # writing last index to file
        with open(idx_path, 'w') as f:
            index = stop
            print(index, file=f)
            
        # logging time taken
        end_time = time.time()
        time_taken = end_time - start_time
        
        batch_times.append(time_taken)
        mean_time = statistics.mean(batch_times)
        
        est_remaining = (len(batches) - i - 2) * mean_time
        
        remaining_td = dt.timedelta(seconds=round(est_remaining))
        time_td = dt.timedelta(seconds=round(time_taken))
        mean_td = dt.timedelta(seconds=round(mean_time))
        
        print('Batch {} time: {} (avg: {}, remaining: {})'.format(i, time_td, mean_td, remaining_td))
            
    print('\nProcessing batches complete. {} apps written'.format(apps_written))

In [62]:
def reset_index(download_path, index_filename):
    """Reset index in file to 0."""
    rel_path = os.path.join(download_path, index_filename)
    
    with open(rel_path, 'w') as f:
        print(0, file=f)
        

def get_index(download_path, index_filename):
    """Retrieve index from file, returning 0 if file not found."""
    try:
        rel_path = os.path.join(download_path, index_filename)

        with open(rel_path, 'r') as f:
            index = int(f.readline())
    
    except FileNotFoundError:
        index = 0
        
    return index


def prepare_data_file(download_path, filename, index, columns):
    """Create file and write headers if index is 0."""
    if index == 0:
        rel_path = os.path.join(download_path, filename)

        with open(rel_path, 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=columns)
            writer.writeheader()

In [54]:
def parse_steam_request(appid, name):
    """Unique parser to handle data from Steam Store API.
    
    Returns : json formatted data (dict-like)
    """
    url = "http://store.steampowered.com/api/appdetails/"
    parameters = {"appids": appid}
    
    json_data = get_request(url, parameters=parameters)
    json_app_data = json_data[str(appid)]
    
    if json_app_data['success']:
        data = json_app_data['data']
    else:
        data = {'name': name, 'steam_appid': appid}
        
    return data


# Set file parameters
download_path = 'F:\VSCode Final\env'
steam_app_data = 'steam_app_data.csv'
steam_index = 'steam_index.txt'

steam_columns = [
    'type', 'name', 'steam_appid', 'required_age', 'is_free', 'controller_support',
    'dlc', 'detailed_description', 'about_the_game', 'short_description', 'fullgame',
    'supported_languages', 'header_image', 'website', 'pc_requirements', 'mac_requirements',
    'linux_requirements', 'legal_notice', 'drm_notice', 'ext_user_account_notice',
    'developers', 'publishers', 'demos', 'price_overview', 'packages', 'package_groups',
    'platforms', 'metacritic', 'reviews', 'categories', 'genres', 'screenshots',
    'movies', 'recommendations', 'achievements', 'release_date', 'support_info',
    'background', 'content_descriptors'
]

# Overwrites last index for demonstration (would usually store highest index so can continue across sessions)
#KEEP DISABLED
#reset_index(download_path, steam_index)

# Retrieve last index downloaded from file
index = get_index(download_path, steam_index)

# Wipe or create data file and write headers if index is 0
prepare_data_file(download_path, steam_app_data, index, steam_columns)

# Set end and chunksize for demonstration - remove to run through entire app list
process_batches(
    parser=parse_steam_request,
    app_list=app_list,
    download_path=download_path,
    data_filename=steam_app_data,
    index_filename=steam_index,
    columns=steam_columns,
    begin=index,
    batchsize=20
)

Starting at index 0:

Exported lines 0-19 to steam_app_data_new4.csv. Batch 0 time: 0:00:31 (avg: 0:00:31, remaining: 0:25:42)
Exported lines 20-39 to steam_app_data_new4.csv. Batch 1 time: 0:00:29 (avg: 0:00:30, remaining: 0:24:27)
Exported lines 40-59 to steam_app_data_new4.csv. Batch 2 time: 0:00:31 (avg: 0:00:30, remaining: 0:24:12)
Exported lines 60-79 to steam_app_data_new4.csv. Batch 3 time: 0:00:29 (avg: 0:00:30, remaining: 0:23:32)
Exported lines 80-99 to steam_app_data_new4.csv. Batch 4 time: 0:00:30 (avg: 0:00:30, remaining: 0:22:58)
Exported lines 100-119 to steam_app_data_new4.csv. Batch 5 time: 0:00:29 (avg: 0:00:30, remaining: 0:22:23)
Exported lines 120-139 to steam_app_data_new4.csv. Batch 6 time: 0:00:30 (avg: 0:00:30, remaining: 0:21:53)
Exported lines 140-159 to steam_app_data_new4.csv. Batch 7 time: 0:00:30 (avg: 0:00:30, remaining: 0:21:23)
Exported lines 160-179 to steam_app_data_new4.csv. Batch 8 time: 0:00:30 (avg: 0:00:30, remaining: 0:20:55)
Exported lines 18

In [55]:
# inspect downloaded data
pd.read_csv('steam_app_data.csv').head()

Unnamed: 0,type,name,steam_appid,required_age,is_free,controller_support,dlc,detailed_description,about_the_game,short_description,fullgame,supported_languages,header_image,website,pc_requirements,mac_requirements,linux_requirements,legal_notice,drm_notice,ext_user_account_notice,developers,publishers,demos,price_overview,packages,package_groups,platforms,metacritic,reviews,categories,genres,screenshots,movies,recommendations,achievements,release_date,support_info,background,content_descriptors
0,game,SiN Episodes: Emergence,1300,0,False,,,"You are John Blade, commander of HardCorps, an...","You are John Blade, commander of HardCorps, an...","You are John Blade, commander of HardCorps, an...",,"English, Russian, French",https://cdn.akamai.steamstatic.com/steam/apps/...,,{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,[],[],,,,['Ritual Entertainment'],['Ritual Entertainment'],,"{'currency': 'USD', 'initial': 999, 'final': 9...",[443649],"[{'name': 'default', 'title': 'Buy SiN Episode...","{'windows': True, 'mac': False, 'linux': False}","{'score': 75, 'url': 'https://www.metacritic.c...",,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 445},,"{'coming_soon': False, 'date': 'May 10, 2006'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
1,game,Heretic: Shadow of the Serpent Riders,2390,0,False,,,"<p>In a twisted medieval dimension, undead cre...","<p>In a twisted medieval dimension, undead cre...","In a twisted medieval dimension, undead creatu...",,English,https://cdn.akamai.steamstatic.com/steam/apps/...,,{'minimum': '<p><strong>Minimum: </strong>A 10...,[],[],,,,['Raven Software'],['id Software'],,"{'currency': 'USD', 'initial': 499, 'final': 4...","[435, 439]","[{'name': 'default', 'title': 'Buy Heretic: Sh...","{'windows': True, 'mac': False, 'linux': False}",,,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 637},,"{'coming_soon': False, 'date': 'Aug 3, 2007'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
2,game,Bloody Good Time,2450,0,False,,,"<p>Congratulations, you’ve just been cast in y...","<p>Congratulations, you’ve just been cast in y...","Congratulations, you’ve just been cast in your...",,English,https://cdn.akamai.steamstatic.com/steam/apps/...,,{'minimum': '<strong>Minimum</strong>\n\t\t\t\...,[],[],© 2010 Ubisoft Entertainment. All Rights Reser...,,,['Outerlight Ltd.'],['Ubisoft'],,"{'currency': 'USD', 'initial': 499, 'final': 4...",[6530],"[{'name': 'default', 'title': 'Buy Bloody Good...","{'windows': True, 'mac': False, 'linux': False}","{'score': 73, 'url': 'https://www.metacritic.c...",,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 790},"{'total': 12, 'highlighted': [{'name': 'First ...","{'coming_soon': False, 'date': 'Oct 29, 2010'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
3,game,RIP - Trilogy™,2540,0,False,,,With the completion of the third title in the ...,With the completion of the third title in the ...,With the completion of the third title in the ...,,English,https://cdn.akamai.steamstatic.com/steam/apps/...,,{'minimum': '<strong>Minimum: </strong>Windows...,[],[],,,,['Elephant Games'],[''],,"{'currency': 'USD', 'initial': 499, 'final': 4...",[346],"[{'name': 'default', 'title': 'Buy RIP - Trilo...","{'windows': True, 'mac': False, 'linux': False}",,<strong>GameTunnel gives RIP 3 a 73%</strong><...,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '4', 'description': 'Casual'}, {'id': ...","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 234},,"{'coming_soon': False, 'date': 'Jun 1, 2007'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
4,game,GUN™,2610,17,False,,,When life robs Colton White of all that matter...,When life robs Colton White of all that matter...,When life robs Colton White of all that matter...,,English,https://cdn.akamai.steamstatic.com/steam/apps/...,,{'minimum': '<strong>Minimum</strong> 3D hardw...,[],[],,,,['Neversoft'],['Activision'],,"{'currency': 'USD', 'initial': 1999, 'final': ...",[174],"[{'name': 'default', 'title': 'Buy GUN™', 'des...","{'windows': True, 'mac': False, 'linux': False}","{'score': 76, 'url': 'https://www.metacritic.c...",,"[{'id': 2, 'description': 'Single-player'}]","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 1059},,"{'coming_soon': False, 'date': 'Oct 13, 2006'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"


In [56]:
def parse_steamspy_request(appid, name):
    """Parser to handle SteamSpy API data."""
    url = "https://steamspy.com/api.php"
    parameters = {"request": "appdetails", "appid": appid}
    
    json_data = get_request(url, parameters)
    return json_data


# set files and columns
download_path = 'F:\VSCode Final\env'
steamspy_data = 'steamspy_data.csv'
steamspy_index = 'steamspy_index.txt'

steamspy_columns = [
    'appid', 'name', 'developer', 'publisher', 'score_rank', 'positive',
    'negative', 'userscore', 'owners', 'average_forever', 'average_2weeks',
    'median_forever', 'median_2weeks', 'price', 'initialprice', 'discount',
    'languages', 'genre', 'ccu', 'tags'
]

# Overwrites last index for demonstration (would usually store highest index so can continue across sessions)
#KEEP DISABLED
#reset_index(download_path, steamspy_index)

# Retrieve last index downloaded from file

index = get_index(download_path, steamspy_index)

# Wipe data file if index is 0
prepare_data_file(download_path, steamspy_data, index, steamspy_columns)

process_batches(
    parser=parse_steamspy_request,
    app_list=app_list,
    download_path=download_path, 
    data_filename=steamspy_data,
    index_filename=steamspy_index,
    columns=steamspy_columns,
    begin=index,
    batchsize=20,
    pause=0.3
)

Starting at index 0:

Exported lines 0-19 to steamspy_data_new4.csv. Batch 0 time: 0:00:13 (avg: 0:00:13, remaining: 0:11:05)
Exported lines 20-39 to steamspy_data_new4.csv. Batch 1 time: 0:00:14 (avg: 0:00:14, remaining: 0:11:12)
Exported lines 40-59 to steamspy_data_new4.csv. Batch 2 time: 0:00:16 (avg: 0:00:14, remaining: 0:11:30)
Exported lines 60-79 to steamspy_data_new4.csv. Batch 3 time: 0:00:14 (avg: 0:00:14, remaining: 0:11:10)
Exported lines 80-99 to steamspy_data_new4.csv. Batch 4 time: 0:00:13 (avg: 0:00:14, remaining: 0:10:42)
Exported lines 100-119 to steamspy_data_new4.csv. Batch 5 time: 0:00:14 (avg: 0:00:14, remaining: 0:10:31)
Exported lines 120-139 to steamspy_data_new4.csv. Batch 6 time: 0:00:13 (avg: 0:00:14, remaining: 0:10:11)
Exported lines 140-159 to steamspy_data_new4.csv. Batch 7 time: 0:00:14 (avg: 0:00:14, remaining: 0:09:56)
Exported lines 160-179 to steamspy_data_new4.csv. Batch 8 time: 0:00:13 (avg: 0:00:14, remaining: 0:09:39)
Exported lines 180-199 to 

In [57]:
# inspect downloaded steamspy data
pd.read_csv('steamspy_data.csv').head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount,languages,genre,ccu,tags
0,1300,SiN Episodes: Emergence,Ritual Entertainment,Ritual Entertainment,,672,81,0,"100,000 .. 200,000",2,0,2,0,999,999,0,"English, Russian, French",Action,3,"{'Action': 94, 'FPS': 69, 'Cyberpunk': 48, 'Sc..."
1,2390,Heretic: Shadow of the Serpent Riders,Raven Software,id Software,,646,36,0,"100,000 .. 200,000",978,0,1954,0,499,499,0,English,Action,11,"{'Classic': 67, 'FPS': 64, 'Action': 54, 'Fant..."
2,2450,Bloody Good Time,Outerlight Ltd.,Ubisoft,,711,220,0,"100,000 .. 200,000",431,0,645,0,499,499,0,English,Action,4,"{'Action': 74, 'Multiplayer': 36, 'FPS': 34, '..."
3,2540,RIP - Trilogy,Elephant Games,Dreamatrix,,209,144,0,"100,000 .. 200,000",30,0,34,0,499,499,0,English,"Casual, Indie",0,"{'Indie': 40, 'Casual': 37, 'Arcade': 17, 'Sho..."
4,2610,GUN,Neversoft,Activision,,1061,118,0,"100,000 .. 200,000",85,0,85,0,1999,1999,0,English,Action,8,"{'Western': 125, 'Action': 82, 'Open World': 7..."
