In [None]:
import requests
import grequests
import pandas as pd
import numpy as np
import pickle
import time

Get key (not needed but might help to not get rate limited)

In [None]:
key = open('key.txt').read()

Load apps list from pickle file (stores ids as integers)

In [None]:
with open('all_apps.pkl', 'rb') as fp:
    all_apps = pickle.load(fp)

Groups apps into smaller segments

In [None]:
app_chunks = np.array_split(all_apps, 20)

Set the chunk number you want to collect here (should be in range [0,19] )

In [None]:
CHUNK_NUM = 5

Collection stuff below

In [None]:
categories = [
    ['name'],
    ['type'],
    ['steam_appid'],
    ['developers'],
    ['publishers'],
    ['is_free'],
    ['price_overview', 'initial'],
    ['achievements', 'total'],
    ['release_date', 'date'],
    ['metacritic', 'score']
]

filters = ''
for cat in categories:
    filters += cat[0] + ','
filters += 'basic,genres,categories'

In [None]:
def fetch(game_info, categories):
    info = {}
    for category in categories:
        cur_info = game_info
        failed = False
        
        for subpart in category:
            if not failed and subpart in cur_info.keys():
                cur_info = cur_info[subpart]
            else:
                failed = True
        
        if not failed:
            info[' '.join(category)] = cur_info
        else:
            info[' '.join(category)] = np.NaN
    
    if 'genres' in game_info.keys():
        for genre in game_info['genres']:
            info['Genre: ' + genre['description']] = True
    if 'categories' in game_info.keys():
        for genre in game_info['categories']:
            info['Category: ' + genre['description']] = True
    
    
    return info

In [None]:
all_info = []

my_chunk = app_chunks[CHUNK_NUM]
batches = np.array_split(my_chunk, 70)

batch_num = 1
for batch in batches:
    start = time.time()
    print(f"batch {batch_num}/70 start")
    
    batch_info = []
    
    app_ids = [str(app_id) for app_id in batch]
    detail_urls = [f"http://store.steampowered.com/api/appdetails/?appids={app_id}" for app_id in app_ids]
    detail_params = {'filters': filters, 'key': key}

    rs = (grequests.get(url = u, params = detail_params) for u in detail_urls)
    results = grequests.map(rs)

    i = 0
    review_urls = []
    review_params = {'num_per_page': 0, 'language': 'all', 'purchase_type': 'all'}
    while i < len(results):
        app_id = app_ids[i]
        r = results[i]
        try:
            if r.json()[app_id]['success']:
                game_info = r.json()[app_id]['data']
                info = fetch(game_info, categories)
                batch_info.append(info)
                review_urls.append(f"http://store.steampowered.com/appreviews/{app_id}?json=1")
        except:
            print(f"Detail request for app_id {app_id} failed. Copy this entire message to failedIDs.txt")
        i += 1

    rs = (grequests.get(url = u, params = review_params) for u in review_urls)
    results = grequests.map(rs)
    i = 0
    while i < len(results):
        r = results[i]
        try:
            summary = r.json()
            if 'query_summary' in summary.keys():
                q_summary = summary['query_summary']

                if 'total_positive' in q_summary:
                    batch_info[i]['total_positive'] = q_summary['total_positive']
                else:
                    batch_info[i]['total_positive'] = np.NaN

                if 'total_reviews' in q_summary:
                    batch_info[i]['total_reviews'] = q_summary['total_reviews']
                else:
                    batch_info[i]['total_reviews'] = np.NaN
        except:
            print(f"Summary request for app_id {batch_info[i]['steam_appid']} failed. Copy this entire message to failedIDs.txt")

        i += 1
        
    all_info += batch_info
    end = time.time()
    print(f"batch {batch_num}/70 done. Took {end - start} seconds")
    batch_num += 1

In [None]:
df = pd.DataFrame.from_dict(all_info)
print(f"Length: {len(df)}") # should be around 7000
df.head()

In [None]:
df.to_pickle(f"df{CHUNK_NUM}.pkl")

Once all 20 chunks are created, the complete dataframe can be created with `df = pd.concat([df0, df1, df2, ... , df19], ignore_index=True, sort=False)`