In [11]:
import csv
import datetime as dt
import os
import statistics
import time
from ssl import SSLError

import numpy as np
import pandas as pd
import requests

pd.set_option("display.max_columns", 100)

In [12]:
def get_request(url, parameters = None):
##Returns json formatted response of a get request
    try:
        response = requests.get(url=url, params=parameters)
    except SSLError as s:
        print("SSL Error:", s)

        for i in range(5, 0, -1):
            print("\rWaiting.. ({})".format(i), end = "")
            time.sleep(1)
        print("\rRetrying." + " " * 10)

        ##use recursion to try again
        return get_request(url, parameters)

    if response:
        return response.json()
    else:
        ##response is none means too many requests happening, program will wait and retry
        print("No response, Waiting 10 seconds...")
        time.sleep(10)
        print("Retrying.")
        return get_request(url, parameters)



In [13]:
##Getting actual data from steram spy
url = "https://steamspy.com/api.php"
parameters = {"request": "all"}

# request 'all' from steam spy and parse into dataframe
json_data = get_request(url, parameters = parameters)
steam_spy_all = pd.DataFrame.from_dict(json_data, orient = "index")

# generate sorted app_list from steamspy data
app_list = steam_spy_all[["appid", "name"]].sort_values("appid").reset_index(drop = True)

# export to keep consistency across download sessions
app_list.to_csv("downloads/steam_appid_list.csv", index = False)

# read from stored csv
app_list = pd.read_csv("downloads/steam_appid_list.csv")

# display first few rows
print(app_list.head())


   appid                       name
0     10             Counter-Strike
1     20      Team Fortress Classic
2     30              Day of Defeat
3     40         Deathmatch Classic
4     50  Half-Life: Opposing Force


In [14]:
##Getting actual data from steram spy
url = "https://steamspy.com/api.php"
parameters = {"request": "all"}

# request 'all' from steam spy and parse into dataframe
json_data = get_request(url, parameters = parameters)
steam_spy_all = pd.DataFrame.from_dict(json_data, orient = "index")

# generate sorted app_list from steamspy data
app_list = steam_spy_all[["appid", "name"]].sort_values("appid").reset_index(drop = True)

# export to keep consistency across download sessions
app_list.to_csv("downloads/steam_appid_list.csv", index = False)

# read from stored csv
app_list = pd.read_csv("downloads/steam_appid_list.csv")

# display first few rows
print(app_list.head())


   appid                       name
0     10             Counter-Strike
1     20      Team Fortress Classic
2     30              Day of Defeat
3     40         Deathmatch Classic
4     50  Half-Life: Opposing Force


In [15]:
def get_app_data(start, stop, parser, pause):
    #Return list of app data generated from parser.

    app_data = []
    
    #Iterate through each row of app_list, confined by start and stop
    for index, row in app_list[start:stop].iterrows():
        print("Current index: {}".format(index), end = "\r")
        
        appid = row["appid"]
        name = row["name"]

        # retrive app data for a row, handled by supplied parser, and append to list
        data = parser(appid, name)
        app_data.append(data)

        time.sleep(pause) # prevent overloading api with requests
    
    return app_data


def process_batches(parser, app_list, download_path, data_filename, index_filename, columns, begin = 0, end = -1, batchsize = 100, pause = 1):
    """Process app data in batches, writing directly to file.
    
    parser : custom function to format request
    app_list : dataframe of appid and name
    download_path : path to store data
    data_filename : filename to save app data
    index_filename : filename to store highest index written
    columns : column names for file
    
    Keyword arguments:
    
    begin : starting index (get from index_filename, default 0)
    end : index to finish (defaults to end of app_list)
    batchsize : number of apps to write in each batch (default 100)
    pause : time to wait after each api request (defualt 1)
    
    returns: none
    """
    print("Starting at index {}:\n".format(begin))
    
    # by default, process all apps in app_list
    if end == -1:
        end = len(app_list) + 1
    
    # generate array of batch begin and end points
    batches = np.arange(begin, end, batchsize)
    batches = np.append(batches, end)
    
    apps_written = 0
    batch_times = []
    
    for i in range(len(batches) - 1):
        start_time = time.time()
        
        start = batches[i]
        stop = batches[i+1]
        
        app_data = get_app_data(start, stop, parser, pause)
        
        rel_path = os.path.join(download_path, data_filename)
        
        # writing app data to file
        with open(rel_path, "a", newline = "", encoding = "utf-8") as f:
            writer = csv.DictWriter(f, fieldnames = columns, extrasaction = "ignore")
            
            for j in range(3,0,-1):
                print("\rAbout to write data, don't stop script! ({})".format(j), end="")
                time.sleep(0.5)
            
            writer.writerows(app_data)
            print('\rExported lines {}-{} to {}.'.format(start, stop-1, data_filename), end = " ")
            
        apps_written += len(app_data)
        
        idx_path = os.path.join(download_path, index_filename)
        
        # writing last index to file
        with open(idx_path, "w") as f:
            index = stop
            print(index, file = f)
            
        # logging time taken
        end_time = time.time()
        time_taken = end_time - start_time
        
        batch_times.append(time_taken)
        mean_time = statistics.mean(batch_times)
        
        est_remaining = (len(batches) - i - 2) * mean_time
        
        remaining_td = dt.timedelta(seconds = round(est_remaining))
        time_td = dt.timedelta(seconds = round(time_taken))
        mean_td = dt.timedelta(seconds = round(mean_time))
        
        print("Batch {} time: {} (avg: {}, remaining: {})".format(i, time_td, mean_td, remaining_td))
            
    print("\nProcessing batches complete. {} apps written".format(apps_written))

In [16]:
def reset_index(download_path, index_filename):
    """Reset index in file to 0."""
    rel_path = os.path.join(download_path, index_filename)
    
    with open(rel_path, "w") as f:
        print(0, file = f)
        

def get_index(download_path, index_filename):
    """Retrieve index from file, returning 0 if file not found."""
    try:
        rel_path = os.path.join(download_path, index_filename)

        with open(rel_path, "r") as f:
            index = int(f.readline())
    
    except FileNotFoundError:
        index = 0
        
    return index


def prepare_data_file(download_path, filename, index, columns):
    """Create file and write headers if index is 0."""
    if index == 0:
        rel_path = os.path.join(download_path, filename)

        with open(rel_path, "w", newline = "") as f:
            writer = csv.DictWriter(f, fieldnames=columns)
            writer.writeheader()

In [17]:
def parse_steam_request(appid, name):
    """Unique parser to handle data from Steam Store API.
    
    Returns : json formatted data (dict-like)
    """
    url = "http://store.steampowered.com/api/appdetails/"
    parameters = {"appids": appid}
    
    json_data = get_request(url, parameters=parameters)
    json_app_data = json_data[str(appid)]
    
    if json_app_data["success"]:
        data = json_app_data["data"]
    else:
        data = {"name": name, "steam_appid": appid}
        
    return data


# Set file parameters
download_path = "Downloads"
steam_app_data = "steam_app_data.csv"
steam_index = "steam_index.txt"

steam_columns = [
    "type", "name", "steam_appid", "required_age", "is_free", "controller_support", "dlc", 
    "detailed_description", "about_the_game", "short_description", "fullgame", "supported_languages", 
    "header_image", "website", "pc_requirements", "mac_requirements", "linux_requirements", "legal_notice", 
    "drm_notice", "ext_user_account_notice", "developers", "publishers", "demos", "price_overview", 
    "packages", "package_groups", "platforms", "metacritic", "reviews", "categories", "genres", "screenshots", 
    "movies", "recommendations", "achievements", "release_date", "support_info", "background", "content_descriptors"
    ]

# Overwrites last index for demonstration (would usually store highest index so can continue across sessions)
reset_index(download_path, steam_index)

# Retrieve last index downloaded from file
index = get_index(download_path, steam_index)

# Wipe or create data file and write headers if index is 0
prepare_data_file(download_path, steam_app_data, index, steam_columns)

# Set end and chunksize for demonstration - remove to run through entire app list
process_batches(
    parser = parse_steam_request,
    app_list = app_list,
    download_path = download_path,
    data_filename = steam_app_data,
    index_filename = steam_index,
    columns = steam_columns,
    begin = index,
    ##The following two lines "end=10" and "batchsize=5" can be commented/Uncommented out to either download the full data or just  small amount of data
    ##having the lines in the code will downlaod the small data. while removing them will downlaod all the data
    ##Its recommended that you have these two lines so that the downloading process doesnt take too long
    ## A zip file including all the downlaoded data will also be provided. you can unzip the file and copy the contents into their respective folder.
    end=50,
    batchsize=5

)

Starting at index 0:

Exported lines 0-4 to steam_app_data.csv. Batch 0 time: 0:00:11 (avg: 0:00:11, remaining: 0:01:35)
Exported lines 5-9 to steam_app_data.csv. Batch 1 time: 0:00:10 (avg: 0:00:10, remaining: 0:01:23)
Exported lines 10-14 to steam_app_data.csv. Batch 2 time: 0:00:10 (avg: 0:00:10, remaining: 0:01:13)
Exported lines 15-19 to steam_app_data.csv. Batch 3 time: 0:00:10 (avg: 0:00:10, remaining: 0:01:02)
Exported lines 20-24 to steam_app_data.csv. Batch 4 time: 0:00:10 (avg: 0:00:10, remaining: 0:00:52)
Exported lines 25-29 to steam_app_data.csv. Batch 5 time: 0:00:10 (avg: 0:00:10, remaining: 0:00:41)
Exported lines 30-34 to steam_app_data.csv. Batch 6 time: 0:00:10 (avg: 0:00:10, remaining: 0:00:31)
Exported lines 35-39 to steam_app_data.csv. Batch 7 time: 0:00:11 (avg: 0:00:10, remaining: 0:00:21)
Exported lines 40-44 to steam_app_data.csv. Batch 8 time: 0:00:11 (avg: 0:00:10, remaining: 0:00:10)
Exported lines 45-49 to steam_app_data.csv. Batch 9 time: 0:00:10 (avg: 0

In [18]:
# inspect downloaded data
pd.read_csv("Downloads/steam_app_data.csv").head()

Unnamed: 0,type,name,steam_appid,required_age,is_free,controller_support,dlc,detailed_description,about_the_game,short_description,fullgame,supported_languages,header_image,website,pc_requirements,mac_requirements,linux_requirements,legal_notice,drm_notice,ext_user_account_notice,developers,publishers,demos,price_overview,packages,package_groups,platforms,metacritic,reviews,categories,genres,screenshots,movies,recommendations,achievements,release_date,support_info,background,content_descriptors
0,game,Counter-Strike,10,0,False,,,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,,"English<strong>*</strong>, French<strong>*</st...",https://cdn.akamai.steamstatic.com/steam/apps/...,,{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,{'minimum': 'Minimum: OS X Snow Leopard 10.6....,"{'minimum': 'Minimum: Linux Ubuntu 12.04, Dual...",,,,['Valve'],['Valve'],,"{'currency': 'EUR', 'initial': 819, 'final': 8...","[574941, 7]","[{'name': 'default', 'title': 'Buy Counter-Str...","{'windows': True, 'mac': True, 'linux': True}","{'score': 88, 'url': 'https://www.metacritic.c...",,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 148903},,"{'coming_soon': False, 'date': '1 Nov, 2000'}","{'url': 'http://steamcommunity.com/app/10', 'e...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [2, 5], 'notes': 'Includes intense vio..."
1,game,Team Fortress Classic,20,0,False,,,One of the most popular online action games of...,One of the most popular online action games of...,One of the most popular online action games of...,,"English, French, German, Italian, Spanish - Sp...",https://cdn.akamai.steamstatic.com/steam/apps/...,,{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,{'minimum': 'Minimum: OS X Snow Leopard 10.6....,"{'minimum': 'Minimum: Linux Ubuntu 12.04, Dual...",,,,['Valve'],['Valve'],,"{'currency': 'GBP', 'initial': 429, 'final': 4...",[29],"[{'name': 'default', 'title': 'Buy Team Fortre...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 6000},,"{'coming_soon': False, 'date': '1 Apr, 1999'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [2, 5], 'notes': 'Includes intense vio..."
2,game,Day of Defeat,30,0,False,,,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,,"English, French, German, Italian, Spanish - Spain",https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.dayofdefeat.com/,{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,{'minimum': 'Minimum: OS X Snow Leopard 10.6....,"{'minimum': 'Minimum: Linux Ubuntu 12.04, Dual...",,,,['Valve'],['Valve'],,"{'currency': 'GBP', 'initial': 429, 'final': 4...","[30, 944613]","[{'name': 'default', 'title': 'Buy Day of Defe...","{'windows': True, 'mac': True, 'linux': True}","{'score': 79, 'url': 'https://www.metacritic.c...",,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 4008},,"{'coming_soon': False, 'date': '1 May, 2003'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
3,game,Deathmatch Classic,40,0,False,,,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,,"English, French, German, Italian, Spanish - Sp...",https://cdn.akamai.steamstatic.com/steam/apps/...,,{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,{'minimum': 'Minimum: OS X Snow Leopard 10.6....,"{'minimum': 'Minimum: Linux Ubuntu 12.04, Dual...",,,,['Valve'],['Valve'],,"{'currency': 'GBP', 'initial': 429, 'final': 4...",[31],"[{'name': 'default', 'title': 'Buy Deathmatch ...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 2104},,"{'coming_soon': False, 'date': '1 Jun, 2001'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
4,game,Half-Life: Opposing Force,50,0,False,,,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,,"English, French, German, Korean",https://cdn.akamai.steamstatic.com/steam/apps/...,,{'minimum': '\r\n\t\t\t<p><strong>Minimum:</st...,{'minimum': 'Minimum: OS X Snow Leopard 10.6....,"{'minimum': 'Minimum: Linux Ubuntu 12.04, Dual...",,,,['Gearbox Software'],['Valve'],,"{'currency': 'GBP', 'initial': 429, 'final': 4...",[32],"[{'name': 'default', 'title': 'Buy Half-Life: ...","{'windows': True, 'mac': True, 'linux': True}",,,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 18753},,"{'coming_soon': False, 'date': '1 Nov, 1999'}","{'url': 'https://help.steampowered.com', 'emai...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"


In [23]:
def parse_steamspy_request(appid, name):
    """Parser to handle SteamSpy API data"""
    url = "https://steamspy.com/api.php"
    parameters = {"request": "appdetails", "appid": appid}

    json_data = get_request(url, parameters)
    return json_data

#set files and colmumns again
download_path = "Downloads"
steamspy_app_data = "steamspy_app_data.csv"
steamspy_index = "steamspy_index.txt"

steamspy_columns = ["appid", "name", "developer", "publisher", "score_rank", "positive", "negative", "userscore", 
                    "owners", "average_forever", "average_2weeks", "median_forever", "median_2weeks", "price", 
                    "initialprice", "discount", "languages", "genre", "ccu", "tags"]

reset_index(download_path, steamspy_index)

#Wipe the data if the indesx = 0
prepare_data_file(download_path, steamspy_app_data, index, steamspy_columns)

process_batches(
    parser = parse_steamspy_request,
    app_list = app_list,
    download_path = download_path, 
    data_filename = steamspy_app_data,
    index_filename = steamspy_index,
    columns = steamspy_columns,
    begin = index,
    ##The following two lines "end= 20" and "batchsize=5" can be commented/Uncommented out to either download the full data or just  small amount of data
    ##having the lines in the code will downlaod the small data. while removing them will downlaod all the data
    ##Its recommended that you have these two lines so that the downloading process doesnt take too long
    ## A zip file including all the downlaoded data will also be provided. you can unzip the file and copy the contents into their respective folder.
    end= 50,
    batchsize=5,

    pause = 0.3
)

Starting at index 0:

Exported lines 0-4 to steamspy_app_data.csv. Batch 0 time: 0:00:06 (avg: 0:00:06, remaining: 0:00:56)
Exported lines 5-9 to steamspy_app_data.csv. Batch 1 time: 0:00:06 (avg: 0:00:06, remaining: 0:00:49)
Exported lines 10-14 to steamspy_app_data.csv. Batch 2 time: 0:00:06 (avg: 0:00:06, remaining: 0:00:42)
Exported lines 15-19 to steamspy_app_data.csv. Batch 3 time: 0:00:07 (avg: 0:00:06, remaining: 0:00:37)
Exported lines 20-24 to steamspy_app_data.csv. Batch 4 time: 0:00:06 (avg: 0:00:06, remaining: 0:00:31)
Exported lines 25-29 to steamspy_app_data.csv. Batch 5 time: 0:00:05 (avg: 0:00:06, remaining: 0:00:24)
Exported lines 30-34 to steamspy_app_data.csv. Batch 6 time: 0:00:06 (avg: 0:00:06, remaining: 0:00:18)
Exported lines 35-39 to steamspy_app_data.csv. Batch 7 time: 0:00:06 (avg: 0:00:06, remaining: 0:00:12)
Exported lines 40-44 to steamspy_app_data.csv. Batch 8 time: 0:00:06 (avg: 0:00:06, remaining: 0:00:06)
Exported lines 45-49 to steamspy_app_data.csv.

In [24]:
# inspect downloaded steamspy data
pd.read_csv("downloads/steamspy_app_data.csv").head()

##This entire file has been referenced from the following source and modified to match my programs needs.
##Davis, N. (2019) Steam-data-science-project/notebooks at master · Nik-Davis/steam-data-science-project, GitHub. Available at: https://github.com/nik-davis/steam-data-science-project/tree/master/notebooks (Accessed: 23 April 2024). 

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount,languages,genre,ccu,tags
0,10,Counter-Strike,Valve,Valve,,230243,6010,0,"10,000,000 .. 20,000,000",8663,169,154,95,999,999,0,"English, French, German, Italian, Spanish - Sp...",Action,13122,"{'Action': 5470, 'FPS': 4896, 'Multiplayer': 3..."
1,20,Team Fortress Classic,Valve,Valve,,7032,1065,0,"5,000,000 .. 10,000,000",68,0,20,0,499,499,0,"English, French, German, Italian, Spanish - Sp...",Action,68,"{'Action': 763, 'FPS': 327, 'Multiplayer': 277..."
2,30,Day of Defeat,Valve,Valve,,6062,667,0,"5,000,000 .. 10,000,000",619,0,29,0,499,499,0,"English, French, German, Italian, Spanish - Spain",Action,90,"{'FPS': 798, 'World War II': 269, 'Multiplayer..."
3,40,Deathmatch Classic,Valve,Valve,,2423,514,0,"5,000,000 .. 10,000,000",15,0,13,0,499,499,0,"English, French, German, Italian, Spanish - Sp...",Action,3,"{'Action': 635, 'FPS': 150, 'Classic': 115, 'M..."
4,50,Half-Life: Opposing Force,Gearbox Software,Valve,,20819,1061,0,"2,000,000 .. 5,000,000",215,0,94,0,499,499,0,"English, French, German, Korean",Action,103,"{'FPS': 915, 'Action': 349, 'Classic': 280, 'S..."
