In [1]:
import requests
import re
import json
from pathlib import Path
from multiprocessing import Pool

# Function defined in another file to get it to work with multiprocessing
from get_game_metadata import get_game_metadata

with open('auth.json', 'r') as auth:
    auth_data = json.load(auth)
key = auth_data['key']

def get_friend_ids(steam_id): 
    try:
        res = json.loads(requests.get('http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key={0}&steamid={1}&relationship=friend'.format(key, steam_id), timeout=60).text)
    except:
        return []
    
    friends = []
    if 'friendslist' in res:
        friendslist = res['friendslist']
        if 'friends' in friendslist:
            friends = friendslist['friends']
    
    friend_ids = []
    for friend in friends:
        friend_ids.append(friend['steamid'])
    return friend_ids

# Modifies complete_set input with unique Steam IDs
def run_friend_traversal(start_id, levels, complete_set):
    MAX_FRIEND_SEARCH = 30000
    
    if Path('steam_ids.json').is_file():
        with open('steam_ids.json') as file:
            complete_set.update(json.load(file)['steam_ids'])

    complete_set.add(start_id)         
    current_list = get_friend_ids(start_id)
    
    friend_count = 1
    while levels > 0 and friend_count <= MAX_FRIEND_SEARCH:
        levels -= 1;
        new_ids = set([])
        
        if levels > 0:
            for friend in current_list:
                if friend not in complete_set:
                    if friend_count >= MAX_FRIEND_SEARCH:
                        return
                    print('Getting friends list [{0} / {1}]'.format(friend_count, MAX_FRIEND_SEARCH), end='\r')
                    complete_set.add(friend)
                    found_ids = get_friend_ids(friend)
                    friend_count += 1
                    new_ids.update(found_ids)
        current_list = new_ids.copy()
    
def get_games(steam_id):
    try:
        res = json.loads(requests.get('http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key={0}&steamid={1}&format=json&include_appinfo=true&include_played_free_games=true'.format(key, steam_id)).text)
        res = res['response']
    except:
        print('Error getting games')
        return []
    
    games = []
    if 'games' in res:
        games = res['games']
    return games

In [60]:
# Get User IDs (already retrieved ~150,000 users, check Google Drive)

# complete_set = set([])
# run_friend_traversal('76561198017054389', 5, complete_set)

# steam_ids = {'steam_ids': list(complete_set)}
# with open('Data/etc.json', 'w') as out:
#     json.dump(steam_ids, out)
# print('etc.json file saved with {0} entries!'.format(len(complete_set)))

etc.json file saved with 187697 entries!


In [13]:
# Get All Steam Games (very quick run time)

# with open('Data/all_games_basic.json', 'w') as out:
#     all_games = json.loads(requests.get('https://api.steampowered.com/ISteamApps/GetAppList/v2/').text)
#     json.dump(json.loads(requests.get('https://api.steampowered.com/ISteamApps/GetAppList/v2/').text), out)
#     print('all_games_basic.json saved with {0} entries'.format(len(all_games)))

In [5]:
# Get Metadata for All Games (will take ~19 hours but provides data for ALL games currently on the Steam store)

# with open('Data/all_games_basic.json', 'r') as file:
#     all_games = json.load(file)['applist']['apps']

# updated_games = {}
# count = 1
# for game in all_games:
#     if count > 100:
#         break
#     print('Processing games [{0} / {1}]'.format(count, len(all_games)))
#     rating, tags, dlc = get_game_metadata(game['appid'])
#     if not dlc:
#         updated_games[game['appid']] = {'name': game['name'], 'rating': rating, 'tags': tags}
#     count += 1

# with open('Data/all_games.json', 'w') as out:
#     json.dump(updated_games, out)
#     print('all_games.json saved with {0} entries'.format(len(updated_games)))

In [2]:
import datetime

with open('steam_ids.json', 'r') as file:
    steam_ids = json.load(file)['steam_ids']

all_games_from_users = {}
users_with_games = {}
checked_set = set([])
count = 1
MAX_USERS = 20000

print_lines = 1
for steam_id in steam_ids:
    if count > MAX_USERS:
        break
    print('Getting games for account [{0} / {1}]'.format(count, MAX_USERS), end='\r')
    current_games = get_games(steam_id)
    if len(current_games) > 0:
        users_with_games[steam_id] = {}
        new_games = []
        urls = []
        for game in current_games:
            users_with_games[steam_id][game['appid']] = {'name': game['name'], 'playtime': game['playtime_forever']}
            if game['appid'] not in checked_set:
                checked_set.add(game['appid'])
                new_games.append(game)
                urls.append('https://store.steampowered.com/app/{0}'.format(game['appid']))

        with Pool(15) as p:
            records = p.map(get_game_metadata, urls)

        for x in range(len(new_games)):
            if records[x]:
                rating, tags = records[x]
                if len(tags) > 0:
                    game = new_games[x]
                    all_games_from_users[game['appid']] = {'name': game['name'], 'rating': rating, 'tags': tags}
    count += 1

with open('all_games_from_users.json', 'w') as out:
    json.dump(all_games_from_users, out)
    print('all_games_from_users.json saved with {0} entries'.format(len(all_games_from_users)))

with open('users_with_games.json', 'w') as out:
    json.dump(users_with_games, out)
    print('users_with_games.json saved with {0} entries'.format(len(users_with_games)))

all_games_from_users.json saved with 21608 entries
users_with_games.json saved with 3151 entries
