In [23]:
import requests
import grequests
import pandas as pd
import numpy as np
import time
import pickle

The following code is how multiple dataframes should be combined `df = pd.concat([df1, df2], ignore_index=True, sort=False)` where the first parameter is a list of all the partial dataframes

Now to test stuff

In [2]:
key = open('key.txt').read()

In [3]:
URL = "http://api.steampowered.com/ISteamApps/GetAppList/v2"
PARAMS = {'key': key}
re = requests.get(url = URL, params = PARAMS)
apps = re.json()['applist']['apps']

In [33]:
apps1 = apps[0:200]

To store apps list
`with open('apps_list.pkl', 'wb') as fp:
    pickle.dump(apps, fp)`

To read stored apps list `with open('apps_list.pkl', 'rb') as fp:
    apps = pickle.load(fp)`

In [5]:
categories = [
    ['name'],
    ['type'],
    ['steam_appid'],
    ['developers'],
    ['publishers'],
    ['is_free'],
    ['price_overview', 'initial'],
    ['achievements', 'total'],
    ['release_date', 'date'],
    ['metacritic', 'score']
]

filters = ''
for cat in categories:
    filters += cat[0] + ','
filters += 'basic,genres,categories'

In [6]:
def fetch(game_info, categories):
    info = {}
    for category in categories:
        cur_info = game_info
        failed = False
        
        for subpart in category:
            if not failed and subpart in cur_info.keys():
                cur_info = cur_info[subpart]
            else:
                failed = True
        
        if not failed:
            info[' '.join(category)] = cur_info
        else:
            info[' '.join(category)] = np.NaN
    
    if 'genres' in game_info.keys():
        for genre in game_info['genres']:
            info['Genre: ' + genre['description']] = True
    if 'categories' in game_info.keys():
        for genre in game_info['categories']:
            info['Category: ' + genre['description']] = True
    
    
    return info

In [34]:
start = time.time()

all_info = []

app_ids = [str(app['appid']) for app in apps1]
detail_urls = [f"http://store.steampowered.com/api/appdetails/?appids={app_id}" for app_id in app_ids]
detail_params = {'filters': filters}

rs = (grequests.get(url = u, params = detail_params) for u in detail_urls)
results = grequests.map(rs)

i = 0
review_urls = []
review_params = {'num_per_page': 0, 'language': 'all', 'purchase_type': 'all'}
while i < len(results):
    app_id = app_ids[i]
    r = results[i]
    try:
        if r.json()[app_id]['success']:
            game_info = r.json()[app_id]['data']
            info = fetch(game_info, categories)
            all_info.append(info)
            review_urls.append(f"http://store.steampowered.com/appreviews/{app_id}?json=1")
    except:
        print(i)
    i += 1

rs = (grequests.get(url = u, params = review_params) for u in review_urls)
results = grequests.map(rs)
i = 0
while i < len(results):
    r = results[i]
    summary = r.json()
    if 'query_summary' in summary.keys():
        q_summary = summary['query_summary']

        if 'total_positive' in q_summary:
            all_info[i]['total_positive'] = q_summary['total_positive']
        else:
            all_info[i]['total_positive'] = np.NaN

        if 'total_reviews' in q_summary:
            all_info[i]['total_reviews'] = q_summary['total_reviews']
        else:
            all_info[i]['total_reviews'] = np.NaN
    
    i += 1



end = time.time()
print(end - start)

77.45303297042847


In [35]:
df1 = pd.DataFrame.from_dict(all_info)
df1.head()

Unnamed: 0,name,type,steam_appid,developers,publishers,is_free,price_overview initial,achievements total,release_date date,metacritic score,...,Category: Shared/Split Screen Co-op,Category: Includes Source SDK,Genre: Design & Illustration,Genre: Web Publishing,Genre: Education,Genre: Software Training,Genre: Game Development,Category: Additional High-Quality Audio,Category: Remote Play on Phone,Category: Valve Anti-Cheat enabled
0,Gales of Nayeli,game,1878490,[Blindcoco Studios],[Blindcoco Studios],False,,,2023,,...,,,,,,,,,,
1,升级打怪换装备,game,1878530,[此世工作室],[此世工作室],False,99.0,,"Feb 13, 2022",,...,,,,,,,,,,
2,Infinite Construction,game,1878580,[BackSnow Games],[BackSnow Games],False,999.0,,"Mar 3, 2022",,...,,,,,,,,,,
3,Aboard the Adventure Demo,demo,1878590,[Chenke Games],[Chenke Games],True,,,"Jan 20, 2022",,...,,,,,,,,,,
4,Ink and Paper: Wandering,game,1878660,[The Mumbling Mammoth],[The Mumbling Mammoth],False,999.0,1.0,"Feb 10, 2022",,...,,,,,,,,,,


In [9]:
start = time.time()

all_info2 = []

for app in apps1:
    app_id = app['appid']
    # make main api query
    URL = "http://store.steampowered.com/api/appdetails"
    PARAMS = {'appids': app_id, 'filters': filters}
    re = requests.get(url = URL, params = PARAMS)
    if re.json()[str(app_id)]['success']:
        game_info = re.json()[str(app_id)]['data']
        info = fetch(game_info, categories)
        # make review api query
        URL = f"http://store.steampowered.com/appreviews/{app_id}?json=1"
        PARAMS = {'num_per_page': 0, 'language': 'all', 'purchase_type': 'all'}
        re = requests.get(url = URL, params = PARAMS)
        summary = re.json()
        if 'query_summary' in summary.keys():
            q_summary = summary['query_summary']

            if 'total_positive' in q_summary:
                info['total_positive'] = q_summary['total_positive']
            else:
                info['total_positive'] = np.NaN

            if 'total_reviews' in q_summary:
                info['total_reviews'] = q_summary['total_reviews']
            else:
                info['total_reviews'] = np.NaN


        all_info2.append(info)
        
end = time.time()
print(end - start)

7.5939953327178955


In [10]:
df2 = pd.DataFrame.from_dict(all_info2)
df2.head()

Unnamed: 0,name,type,steam_appid,developers,publishers,is_free,price_overview initial,achievements total,release_date date,metacritic score,...,Category: Multi-player,Category: Co-op,Category: LAN Co-op,Genre: Action,Category: MMO,Category: PvP,Category: Online PvP,Category: Online Co-op,Category: In-App Purchases,Category: Partial Controller Support
0,Gales of Nayeli,game,1878490,[Blindcoco Studios],[Blindcoco Studios],False,,,2023,,...,,,,,,,,,,
1,升级打怪换装备,game,1878530,[此世工作室],[此世工作室],False,99.0,,"Feb 13, 2022",,...,,,,,,,,,,
2,Infinite Construction,game,1878580,[BackSnow Games],[BackSnow Games],False,999.0,,"Mar 3, 2022",,...,,,,,,,,,,
3,Aboard the Adventure Demo,demo,1878590,[Chenke Games],[Chenke Games],True,,,"Jan 20, 2022",,...,,,,,,,,,,
4,Ink and Paper: Wandering,game,1878660,[The Mumbling Mammoth],[The Mumbling Mammoth],False,999.0,1.0,"Feb 10, 2022",,...,True,True,True,,,,,,,


In [11]:
df2.equals(df1)

True

In [12]:
app_chunks = np.array_split(apps, 20)