In [3]:
from dotenv import load_dotenv
load_dotenv()

# Use the variable with:
import os
API_KEY = os.getenv("API_KEY")
STEAM_ID = os.getenv("STEAM_ID")

import requests
import pandas as pd
from tqdm.notebook import tqdm

data_folder = "data"

General flow:
- Start with my Steam ID.
- Get a list of my games and save it in a dataframe.
- Everytime the dataframe is updated, save it in a csv file.
- Append the SteamID to the list of already visited SteamIDs.
- For each SteamID, get a list of their friends, check if they are already in the list of visited SteamIDs, and if not, add them to the list of SteamIDs to visit.
- Save the list of SteamIDs to visit in a csv file.
- Repeat the process.

In [None]:
class DataBase():
    def __init__(self, data_folder, overwrite=False):
        self.data_folder = None
        self._create_data_folder(data_folder)

        if overwrite:
            self._overwrite()
        
        self.user_data_file = os.path.join(self.data_folder, "user_data.csv")
        self.game_data_file = os.path.join(self.data_folder, "game_data.csv")
        self.time_data_file = os.path.join(self.data_folder, "time_data.csv")
        self.game_list_file = os.path.join(self.data_folder, "game_list.csv")
        self.friends_queue_file = os.path.join(self.data_folder, "friends_queue.txt")
        self.visited_file = os.path.join(self.data_folder, "visited.txt")
        
        self.user_data = None
        self.game_data = None
        self.time_data = None

        self._load_user_data(self.user_data_file)
        self._load_game_data(self.game_data_file)
        self._load_time_data(self.time_data_file)

        self.friends_queue = None
        self._load_friends_queue(self.friends_queue_file)
        self.visited = None
        self._load_visited_friends(self.visited_file)

        self.game_list = None
        self._load_game_list(self.game_list_file)

    def cycle(self):
        user = self.friends_queue.pop(0)
        
        self.visited.append(user)

        # Get friends, append to friends_queue if not already in visited
        friends = self._get_friends(user)
        if friends is not None:
            for friend in friends:
                if friend not in self.visited and len(self.friends_queue) < 1000:
                    self.friends_queue.append(friend['steamid'])
        self._save_friends_queue(self.friends_queue_file)

        # Get games info
        time_data, user_data = self._get_games(user)

        if time_data is not None and user_data['total_games'][0] > 0:
            self.time_data = pd.concat([self.time_data, time_data], ignore_index=True, axis=0)
            self.user_data = pd.concat([self.user_data, user_data], ignore_index=True, axis=0)
            self._save_time_data(self.time_data_file)
            self._save_user_data(self.user_data_file)

        self._save_visited_data(self.visited_file)   
        self._save_game_data(self.game_data_file)  
   

    def _get_games(self, steam_id):
        url = f'http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key={API_KEY}&steamid={steam_id}&format=json&include_played_free_games=1'
        response = requests.get(url)
        raw_data = response.json()
        raw_data = raw_data['response']

        user_id = self.user_data['user_id'].max()+1 if not self.user_data.empty else 1

        time_data = []
        if 'games' not in raw_data:
            return None, None
        for game in raw_data['games']:
            if game['playtime_forever'] > 0:
                row = {
                    'user_id' : user_id,
                    'game_steam_id': game['appid'],
                    'playtime' : game['playtime_forever'],
                }
                time_data.append(row)
                if game['appid'] not in self.game_data['game_steam_id'].values:
                    try:
                        game_name = self.game_list[self.game_list['game_steam_id'] == game['appid']]['game_name'].values[0]
                    except Exception as e:
                        game_name = 'unknown game'
                    new_row = pd.DataFrame([{'game_steam_id': game['appid'], 'game_name': game_name}])
                    self.game_data = pd.concat([self.game_data, new_row], ignore_index=True)
        user_data = {'user_id': user_id, 'total_games': len(time_data)}

        time_data = pd.DataFrame(time_data)
        user_data = pd.DataFrame([user_data])

        return time_data, user_data

    def _get_friends(self, steam_id):
        url = f'http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key={API_KEY}&steamid={steam_id}&relationship=friend'
        response = requests.get(url)
        data = response.json()
        if 'friendslist' not in data:
            return None
        return data['friendslist']['friends']
    
    def _save_user_data(self, user_data_file):
        self.user_data.to_csv(user_data_file, index=False)

    def _save_game_data(self, game_data_file):
        self.game_data.to_csv(game_data_file, index=False)
    
    def _save_time_data(self, time_data_file):
        self.time_data.to_csv(time_data_file, index=False)

    def _save_game_list(self, game_list_file):
        self.game_list.to_csv(game_list_file, index=False)

    def _save_friends_queue(self, friends_data_file):
        with open(friends_data_file, 'w') as f:
            for friend in self.friends_queue:
                f.write(friend + '\n')

    def _save_visited_data(self, visited_file):
        with open(visited_file, 'w') as f:
            for friend in self.visited:
                f.write(friend + '\n')

    def _create_data_folder(self, data_folder):
        if not os.path.exists(data_folder):
            os.makedirs(data_folder)
        self.data_folder = data_folder

    def _load_user_data(self, user_data):
        if os.path.exists(user_data):
            user_data = pd.read_csv(user_data)
        else:
            user_data = pd.DataFrame(columns=['user_id', 'total_games'])
        self.user_data = user_data
        self._save_user_data(self.user_data_file)
    
    def _load_game_data(self, game_data):
        if os.path.exists(game_data):
            game_data = pd.read_csv(game_data)
        else:
            game_data = pd.DataFrame(columns=['game_steam_id', 'game_name'])
        self.game_data = game_data
        self._save_game_data(self.game_data_file)
    
    def _load_time_data(self, time_data):
        if os.path.exists(time_data):
            time_data = pd.read_csv(time_data)
        else:
            time_data = pd.DataFrame(columns=['user_id', 'game_steam_id', 'playtime'])
        self.time_data = time_data
        self._save_time_data(self.time_data_file)
    
    def _load_friends_queue(self, friends_queue):
        if os.path.exists(friends_queue):
            with open(friends_queue, 'r') as f:
                friends_queue = f.read().splitlines()
        else:
            friends_queue = [STEAM_ID]
        self.friends_queue = friends_queue
        self._save_friends_queue(self.friends_queue_file)
    
    def _load_visited_friends(self, visited):
        if os.path.exists(visited):
            with open(visited, 'r') as f:
                visited = f.read().splitlines()
        else:
            visited = []
        self.visited = visited
        self._save_visited_data(self.visited_file)

    def _load_game_list(self, game_list):
        if os.path.exists(game_list):
            self.game_list = pd.read_csv(game_list)
        else:
            url = "http://api.steampowered.com/ISteamApps/GetAppList/v0002/"
            response = requests.get(url)
            data = response.json()
            game_list = pd.DataFrame(data['applist']['apps'])
            game_list.rename(columns={'appid': 'game_steam_id', 'name': 'game_name'}, inplace=True)
            # remove unusual line terminators
            game_list['game_name'] = game_list['game_name'].str.replace(r'\r', '')
            # rename games with no name to 'unknown game'
            game_list['game_name'] = game_list['game_name'].replace('', 'unknown game')
            self.game_list = game_list
            self._save_game_list(self.game_list_file)

    def _overwrite(self):
        # delete data folder and all files
        if os.path.exists(self.data_folder):
            import shutil
            shutil.rmtree(self.data_folder)
        os.makedirs(self.data_folder)

In [None]:
database = DataBase("data")

In [178]:
cycles_count = 5000

for i in tqdm(range(cycles_count)):
    database.cycle()
    if i % 100 == 0:
        print(f"Queue length: {len(database.friends_queue)}")

  0%|          | 0/5000 [00:00<?, ?it/s]

Queue length: 189
Queue length: 8719
Queue length: 13211
Queue length: 21445
Queue length: 41807
Queue length: 62648
Queue length: 71239
Queue length: 77295
Queue length: 86750
Queue length: 98431
Queue length: 122595
Queue length: 153209
Queue length: 178143
Queue length: 200848
Queue length: 218085
Queue length: 235429
Queue length: 253449
Queue length: 263675
Queue length: 270652
Queue length: 278398
Queue length: 288379
Queue length: 300711
Queue length: 309344
Queue length: 319729
Queue length: 335331
Queue length: 354333
Queue length: 374381
Queue length: 383531
Queue length: 398953
Queue length: 405954
Queue length: 412290
Queue length: 417517
Queue length: 424906
Queue length: 447305
Queue length: 459083
Queue length: 468287
Queue length: 476967
Queue length: 486522
Queue length: 493803
Queue length: 504187
Queue length: 517533
Queue length: 530406
Queue length: 549602
Queue length: 570158
Queue length: 591428
Queue length: 610389
Queue length: 626495
Queue length: 637326
Queue