In [1]:
from dotenv import load_dotenv
load_dotenv()

# Use the variable with:
import os
API_KEY = os.getenv("API_KEY")
STEAM_ID = os.getenv("STEAM_ID")

import requests
import pandas as pd
from tqdm.auto import tqdm

General flow:
- Start with my Steam ID.
- Get a list of my games and save it in a dataframe.
- Everytime the dataframe is updated, save it in a csv file.
- Append the SteamID to the list of already visited SteamIDs.
- For each SteamID, get a list of their friends, check if they are already in the list of visited SteamIDs, and if not, add them to the list of SteamIDs to visit.
- Save the list of SteamIDs to visit in a csv file.
- Repeat the process.

In [118]:
# Takes a Steam ID and returns a DataFrame row with all of the user's games and playtime
def get_games(steam_id):
    url = f'http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key={API_KEY}&steamid={steam_id}&format=json&include_played_free_games=1'
    response = requests.get(url)
    data = response.json()
    data = data['response']
    # only extract app_id and playtime_forever from data['games']
    clean_data = []
    if 'games' not in data:
        return None
    for game in data['games']:
        if game['playtime_forever'] == 0:
            continue
        clean_data.append({'id': game['appid'], 'time': game['playtime_forever']})
    games = pd.DataFrame({'Total Games': data['game_count'], 'Played Games': len(clean_data), 'Games': [clean_data]}, index=[steam_id])
    return games

def get_games_2(steam_id,):
    url = f'http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key={API_KEY}&steamid={steam_id}&format=json&include_played_free_games=1'
    response = requests.get(url)
    data = response.json()
    data = data['response']
    # only extract app_id and playtime_forever from data['games']
    # create empty games_data dataframe
    games_data = pd.DataFrame(columns=['steam_id', 'id', 'time'])
    if 'games' not in data:
        return None
    for game in data['games']:
        if game['playtime_forever'] == 0:
            continue
        new_row = pd.DataFrame({'steam_id': steam_id, 'id': game['appid'], 'time': game['playtime_forever']}, index=[steam_id])
        games_data = pd.concat([games_data, new_row], ignore_index=True)
    games = pd.DataFrame({'total_games': data['game_count'], 'played_games': len(games_data)}, index=[steam_id])
    
    return games, games_data


In [119]:
# Takes a Steam ID and returns a list of the user's friends
def get_friends(steam_id, visited_friends):
    url = f'http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key={API_KEY}&steamid={steam_id}&relationship=friend'
    response = requests.get(url)
    data = response.json()
    if 'friendslist' not in data:
        return None
    friends_to_visit = []
    for friend in data['friendslist']['friends']:
        if friend['steamid'] not in visited_friends:
            friends_to_visit.append(friend['steamid'])
    return friends_to_visit

In [120]:
# Saves all already visited friend IDs and saves it to a file
def save_visited_friends(visited_friends):
    with open('visited_friends.txt', 'w') as f:
        for friend in visited_friends:
            f.write(f'{friend}\n')

# Saves the queue of friends to visit to a file
def save_friends_queue(friends_queue):
    with open('friends_queue.txt', 'w') as f:
        for friend in friends_queue:
            f.write(f'{friend}\n')

# Appends the games dataframe to a csv file (or creates a new one if it doesn't exist)
def save_games(games, filename, overwrite=False):
    if not os.path.isfile(filename) or overwrite:
        games.to_csv(filename, header=['total_games', 'played_games'], index_label='steam_id')
    else:
        games.to_csv(filename, mode='a', header=False)
    
def save_game_time(games_data, filename, overwrite=False):
    if not os.path.isfile(filename) or overwrite:
        games_data.to_csv(filename, header=['steam_id', 'id', 'time'], index=False)
    else:
        games_data.to_csv(filename, mode='a', header=False, index=False)

In [121]:
overwrite = True # Set to True to start from scratch

# Read queue.txt if exists, otherwise start with STEAM_ID
if os.path.exists('friends_queue.txt') and not overwrite:
    with open('friends_queue.txt', 'r') as f:
        friends_queue = f.read().splitlines()
else:
    friends_queue = [STEAM_ID]

# Read visited_friends.txt if exists
if os.path.exists('visited_friends.txt') and not overwrite:
    with open('visited_friends.txt', 'r') as f:
        visited_friends = f.read().splitlines()
else:
    visited_friends = []

if overwrite:
    if os.path.exists('user_games_data.csv'):
        os.remove('user_games_data.csv')
    if os.path.exists('games_time.csv'):
        os.remove('games_time.csv')

iterations = 2

for i in tqdm(range(iterations)):
    user = friends_queue.pop(0)
    visited_friends.append(user)

    if (i+1)%100 == 0:
        print(f"Queue: {len(friends_queue)} | Visited total: {len(visited_friends)}")

    # Get games and append them to the csv
    games, games_data = get_games_2(user)
    
    
    # Get (not yet visited) friends of the user and append them to the queue
    friends = get_friends(user, visited_friends)
    if friends is not None and len(friends) + len(friends_queue) < 3000:
        friends_queue.extend(friends)

    if games is not None and games['played_games'][0] > 0:
        save_games(games, 'user_games_data.csv')
        save_game_time(games_data, 'games_time.csv')

    # Save the visited friends and the friends queue
    save_visited_friends(visited_friends)
    save_friends_queue(friends_queue)
    

  0%|          | 0/2 [00:00<?, ?it/s]

In [122]:
games_data

Unnamed: 0,steam_id,id,time
0,76561197962287503,10,3
1,76561197962287503,70,77
2,76561197962287503,220,125
3,76561197962287503,240,472
4,76561197962287503,80,13
...,...,...,...
3809,76561197962287503,2562730,8985
3810,76561197962287503,1485590,798
3811,76561197962287503,2017080,109
3812,76561197962287503,440,539


In [123]:
games = pd.read_csv('user_games_data.csv', index_col=0)

In [127]:
games.loc[games.total_games > 50]

Unnamed: 0_level_0,total_games,played_games
steam_id,Unnamed: 1_level_1,Unnamed: 2_level_1
76561198280830500,1156,887
76561197962287503,5624,3814


In [128]:
games.head()

Unnamed: 0_level_0,total_games,played_games
steam_id,Unnamed: 1_level_1,Unnamed: 2_level_1
76561198280830500,1156,887
76561197962287503,5624,3814


In [130]:
games_time = pd.read_csv('games_time.csv')

In [140]:
games_time.loc[games_time.steam_id == int(STEAM_ID)]

Unnamed: 0,steam_id,id,time
0,76561198280830500,4000,77748
1,76561198280830500,400,637
2,76561198280830500,20900,498
3,76561198280830500,22000,632
4,76561198280830500,17410,396
...,...,...,...
882,76561198280830500,2667970,5
883,76561198280830500,70,616
884,76561198280830500,2716140,31
885,76561198280830500,1035510,201


(numpy.int64, str)

TODO:
- GetOwnedGames - include_free_played_games
dunno, so far this looks pretty nice