In [1]:
import requests 
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import random
from tqdm import tqdm
from dbclient import DBClient
from steam_game_info import SteamGameInfo
from rawg import RAWG
from bs4 import BeautifulSoup

class SteamCrawl:
    def __init__(self):
        self.session = requests.Session()
        self.base_url = f'https://store.steampowered.com/search/?category1=998&supportedlang=english'
        self.urls = []
        self.dbc = DBClient('games.db')
        self.dbc.create_table()

    def crawl(self, fetch_urls=False):
        # get list of urls
        if fetch_urls:
            self.__download_urls_page_source()
            self.__parse_urls()
        else:
            self.__parse_urls()
        # loop through list and 
        for url in tqdm(self.urls):
            # get features for each url
            game = {}
            game.update(self.__get_steam_features(url))
            game.update(self.__get_rawg_features(url))
            # save features in db
            self.dbc.add_game(game)

        self.dbc.to_csv('games.csv')

        return 'finished'
        
    def __download_urls_page_source(self):
        self.browser = webdriver.Safari()
        results_page = self.browser.get(self.base_url)
        self.__short_pause()
        lastHeight = self.browser.execute_script("return document.body.scrollHeight")
        while True:
            self.browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            self.__short_pause()
            newHeight = self.browser.execute_script("return document.body.scrollHeight")
            if newHeight == lastHeight:
                break
            lastHeight = newHeight
        self.__save_game_list_source()
        self.browser.close()
    
    def __parse_urls(self):
        html = self.__load_game_list_source()
        soup = BeautifulSoup(html)
        a_tags = soup.find('div', id='search_results').find_all('a')
        self.urls = [ a_tag.get('href') for a_tag in a_tags ]

    def __save_game_list_source(self):
        with open("game_list.html", "w") as f:
            f.write(self.browser.page_source)

    def __load_game_list_source(self):
        with open("game_list.html", "r") as f:
            game_list_source = f.read()
        return game_list_source

    def __short_pause(self):
        duration = random.uniform(0, 3)
        time.sleep(duration)

    def __get_steam_features(self, url):
        sgi = SteamGameInfo()
        html = sgi.get_game_html(url)
        if html:
            features = sgi.strip_features(html)
            return features 

    def __get_rawg_features(self, url):
        name = url.split('/')[5].replace('_', ' ')
        rawg = RAWG()
        features = rawg.get_game(name)
        return features
                

# Test

In [2]:
sc = SteamCrawl()

sc.crawl()

ion Humans has an all new language that lots of workers can all execute at the same time.You'll be taught everything you need to know. Even useless skills can be put to work!From the creators of Human Resource Machine, Little Inferno, and World of Goo.Includes a new soundtrack by Kyle Gabler.  7 Billion Humans will release in early 2018. Wishlist or follow to receive updates on its progress!"}
 17%|█▋        | 5040/30254 [1:36:08<6:50:29,  1.02it/s]error adding game {'Franchise': None, 'Genres': 'Action, RPG, Indie', 'OriginalCost': '$49.97', 'DiscountedCost': None, 'SteamURL': '/', 'Tags': '', 'Name': '7 Billion Humans', 'RawgID': 52005, 'Metacritic': None, 'Presence': 53317, 'Platform': 'PC, macOS, iOS, Linux, Nintendo Switch', 'RatingsBreakdown': 'recommended: 22, exceptional: 8, meh: 4, skip: 2', 'ReleaseDate': '2018-08-22', 'ESRB': None, 'Achievements': 19, 'CreatorsCount': 3, 'Description': "Automate swarms of office workers to solve puzzles inside your very own parallel computer

TypeError: object of type 'NoneType' has no len()

In [3]:
sc.dbc.drop_table()

In [4]:
sc.dbc.close()

In [3]:
sc.dbc.get_all_games()

units\nissue orders before the battle\nMODS:\nCreate mods and new units using Steam Workshop'),
 (991,
  None,
  'Adventure, Indie, RPG',
  '$19.99',
  None,
  'singleplayer',
  1,
  'English, French, Italian, German, Japanese, Korean, Russian, Hungarian, Turkish',
  'https://store.steampowered.com/app/746850/?snr=1_5_9__205',
  0,
  0,
  'AMD / NVIDIA dedicated graphics card',
  '7 GB available space',
  '8 GB RAM',
  'Cyberpunk, Sci-fi, Futuristic, Atmospheric, Open World, Story Rich, Exploration, RPG, Adventure, Mature, Singleplayer, Driving, Third Person, First-Person, Walking Simulator, Female Protagonist, Great Soundtrack, Masterpiece, Choices Matter, Voxel',
  'Cloudpunk',
  262389,
  74,
  432,
  'PlayStation 4, PC',
  'exceptional: 6, recommended: 1, meh: 1, skip: 1',
  '2020-04-23',
  None,
  71,
  1,
  'Your name is Rania. This is your first night working for Cloudpunk, the semi-legal delivery company based in the sprawling city of Nivalis. You go everywhere, from the Marrow