## Load the artur steam dataset

In [1]:
from pandas import DataFrame
import pickle


with open("data/full_user_data.pkl", "rb") as dataPickle:
    data: DataFrame = pickle.load(dataPickle, fix_imports=True)

data.set_index("steamid", inplace=True)

## Augmenting the dataset

This block makes it simple to collect additional data without the need for constant supervision. Re-run as many times as needed to collect a ridiculous amount of data.

First the augmented dataset is loaded if possible

In [None]:
from collections import deque

apiKey = "5C9F6A3AB40F423F4BF70D5C055749FA"

try:
    with open("data/bloomData.pkl", "rb") as dataPickle:
        data2: DataFrame = pickle.load(dataPickle, fix_imports=True)
except:
    data2 = DataFrame()


All users from Artur's dataset not yet in the augmented dataset is added to a processing queue. And each user's friend is added to the queue if it is not processed yet

In [3]:
processedIDs = set[str]([id for id in data2.index])

unprocessedFriends \
    = set([id for id in data.index if id not in processedIDs]) |\
    set[str]([f["steamid"] for list in data2["friendsList"] for f in list if f["steamid"] not in processedIDs])

processingQueue = deque[str](unprocessedFriends)
batchesDone = 0


We query the steam web api to collect the data and append them into the bloom dataset.

In [4]:
import pandas
from gather_user_data import (
    get_friend_list,
    get_player_bans,
    get_player_summaries,
    get_recent_playtime,
    get_user_group_list,
    get_owned_games
)

while len(processingQueue) > 0:
    processingBatches = list[set[str]]()
    while len(processingQueue) > 0:
        batch = set[str]()

        while len(batch) < 100 and len(processingQueue) > 0:
            nextItem = processingQueue.popleft()
            if nextItem in processedIDs:
                continue

            batch.add(nextItem)
            processedIDs.add(nextItem)

        if len(batch) > 0:
            processingBatches.append(batch)

    for batch in processingBatches:
        batchIDStr = ",".join([id for id in batch])

        summaries = get_player_summaries(apiKey, batchIDStr)
        banInfo = get_player_bans(apiKey, batchIDStr)

        summaries = {s["steamid"]: s for s in summaries}

        # Combine ban info into same dict
        for b in banInfo:
            id = b["SteamId"]
            if(id not in summaries):
                print(f"ID {id} somehow not in fetched summaries... skipping")
                batch.remove(id)
                continue

            summaries[id] = summaries[id] | b

        for id in batch:
            friends = get_friend_list(apiKey, id, "all")
            ownedGames = get_owned_games(apiKey, id)
            userGroups = get_user_group_list(apiKey, id)
            recentlyPlayed = get_recent_playtime(apiKey, id)

            summaries[id]["friendsList"] = friends
            summaries[id]["ownedGames"] = ownedGames
            summaries[id]["groups"] = userGroups
            summaries[id]["recentlyPlayed"] = recentlyPlayed

            for f in [f for f in friends if f["steamid"] not in processedIDs]:
                processingQueue.append(f["steamid"])


        df = DataFrame.from_dict(summaries, orient="index")

        data2: DataFrame = pandas.concat([data2, df]) # type: ignore
        with open("data/bloomData.pkl", "wb") as blmdataPickle:
            pickle.dump(data2, blmdataPickle)
        
        batchesDone += 1

ID 76561199019268990 somehow not in fetched summaries... skipping
ID 76561197975234542 somehow not in fetched summaries... skipping
ID 76561198000246996 somehow not in fetched summaries... skipping
ID 76561198003287723 somehow not in fetched summaries... skipping
ID 76561198023607383 somehow not in fetched summaries... skipping
ID 76561198049669501 somehow not in fetched summaries... skipping
ID 76561197975268863 somehow not in fetched summaries... skipping
ID 76561198042820093 somehow not in fetched summaries... skipping


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
with open("data/bloomData.pkl", "wb") as blmdataPickle:
    pickle.dump(data2, blmdataPickle)

In [None]:
"76561198095778157" in data2.index

True

In [None]:
data2.loc["76561198095778157"]

Nickname                                                            Flow
Privacy Setting                                                   Public
Friends List           [76561197984297977, 76561197986866340, 7656119...
Group Memberships      [12455, 5888848, 6023441, 6250936, 10006645, 1...
Owned Games                                                           []
Country Code                                                          DE
State Code                                                            04
City ID                                                            13004
VAC Banned                                                          True
Community Banned                                                   False
Economy Banned                                                      none
Num of Vac Bans                                                        3
Days Since Last Ban                                                 2947
Num Of Game Bans                                   