
# Steam Analysis — Trend of Video Games before and during COVID-19

Scraping based on and adapted from Jack Etheredge's Steam Webscraping Linear Regression
(See: https://www.github.com/Jack-Etheredge/Steam-Webscraping-Linear-Regression)


# Libraries

Note: Install any dependencies needed
- pip install selenium

In [108]:
import urllib
import urllib.request
import requests
from bs4 import BeautifulSoup


# Get list of games

In [8]:
list_games = []

for page in range(1, 20):
    bs = BeautifulSoup(urllib.request.urlopen('http://store.steampowered.com/search/?category1=998%2C996&page='+str(page)).read(), "html.parser")

    games = bs.find_all("span", {"class": "title"})
    
    games = [game.contents for game in games]
    games = [game[0] for game in games]
    
    list_games.extend(games)
    print(games)
print(list_games)

['Back 4 Blood', 'Destiny 2', 'Counter-Strike: Global Offensive', 'New World', 'Apex Legends™', 'Demon Slayer -Kimetsu no Yaiba- The Hinokami Chronicles', 'FINAL FANTASY XIV Online', 'Dead by Daylight', 'Halo: The Master Chief Collection', 'Grand Theft Auto V', 'Sid Meier’s Civilization® VI', 'Team Fortress 2', 'The Elder Scrolls® Online', "Tom Clancy's Rainbow Six® Siege", 'Warframe', 'The Riftbreaker', 'Phasmophobia', 'Rust', 'War Thunder', 'Red Dead Redemption 2', 'Fallout 4', 'Sea of Thieves', 'Madden NFL 22', 'The Jackbox Party Pack 8', 'Dota 2']
['The Sims™ 4', "Baldur's Gate 3", 'Borderlands 3', 'Bloons TD 6', 'NBA 2K22', 'Disco Elysium - The Final Cut', 'Black Desert', 'PUBG: BATTLEGROUNDS', 'Tales of Arise', 'Fallout 76', 'RimWorld', 'STAR WARS™: The Old Republic™', 'SMITE®', 'Timberborn', 'It Takes Two', 'Battlefield V', 'House Flipper', 'Pathfinder: Wrath of the Righteous', 'NARAKA: BLADEPOINT', 'Valheim', 'Microsoft Flight Simulator', 'Battlefield 4™', 'The Outer Worlds', '

['Chernobylite', 'MX Bikes', 'DRAGON BALL XENOVERSE 2', 'Dark Deception', 'Rift Wizard', 'The Jackbox Party Pack 4', 'Call of Duty: World at War', 'Rogue Company', 'Black Mesa', 'Transport Fever 2', 'Football Manager 2022', 'Warhammer 40,000: Inquisitor - Martyr', 'Cooking Simulator VR', 'Company of Heroes 2', 'Dragon Age™ Inquisition', 'ENDER LILIES: Quietus of the Knights', 'The Survivalists', 'Crusader Kings II', 'Maneater', 'METAL GEAR SOLID V: THE PHANTOM PAIN', 'Barotrauma', 'Destroy All Humans!', 'Crush Crush', 'Castlevania Advance Collection', 'EARTH DEFENSE FORCE 5']
['Tricky Towers', 'Fishing Planet', 'Dishonored 2', 'RollerCoaster Tycoon® 3: Complete Edition', 'DOOM', 'Kingdoms Reborn', 'Sid Meier’s Civilization® VI Anthology', 'Cooking Simulator', 'Lost in Random™', 'World of Tanks', 'Skater XL - The Ultimate Skateboarding Game', 'Grand Theft Auto V: Premium Edition', 'Fallout 4: Game of the Year Edition', 'Borderlands 3 Ultimate Edition', 'Battlefield Bundle', 'A Dance of 


# Get App IDs for those games

In [32]:
list_ids = []

for page in range(1, 20):
    bs = BeautifulSoup(urllib.request.urlopen('http://store.steampowered.com/search/?category1=998%2C996&page='+str(page)).read(), "html.parser")

    ids = [r['data-ds-appid'] for r in bs.find_all(name="a", attrs={"data-ds-appid":True})]
    
    list_ids.extend(ids)
    print(ids)
print(list_ids)

['924970', '1085660', '730', '1063730', '1172470', '1490890', '39210', '381210', '976730', '271590', '289070', '440', '306130', '359550', '230410', '780310', '739630', '252490', '236390', '1174180', '377160', '1172620', '1519350', '570', '1552350']
['1222670', '1086940', '397540', '960090', '632470', '1644960', '578080', '740130', '582660', '1151340', '1286830', '294100', '1238810', '1426210', '386360', '1062090', '613100', '892970', '1184370', '1238860', '1250410', '1203220', '578650', '346110', '238960']
['812140', '413150', '881100', '281990', '270880', '594650', '489830', '686810', '1238840', '1252330', '311210', '703080', '239140', '255710', '284160', '552990', '1414850', '581320', '648800', '526870', '105600', '1091500', '1149620', '1517290', '594570']
['1293830', '601510', '218620', '620980', '242760', '1271700', '1190000', '1426450', '427520', '107410', '264710', '582010', '291550', '275850', '1056640', '1672970', '1549970', '394360', '221100', '1384160', '1167630', '677620', '


# Let's transform these two lists into a dictionary

In [41]:
dict_games = dict(zip(list_ids, list_games))
print(dict_games)

{'924970': 'Back 4 Blood', '1085660': 'Destiny 2', '730': 'Counter-Strike: Global Offensive', '1063730': 'New World', '1172470': 'Apex Legends™', '1490890': 'Demon Slayer -Kimetsu no Yaiba- The Hinokami Chronicles', '39210': 'FINAL FANTASY XIV Online', '381210': 'Dead by Daylight', '976730': 'Halo: The Master Chief Collection', '271590': 'Grand Theft Auto V', '289070': 'Sid Meier’s Civilization® VI', '440': 'Team Fortress 2', '306130': 'The Elder Scrolls® Online', '359550': "Tom Clancy's Rainbow Six® Siege", '230410': 'Warframe', '780310': 'The Riftbreaker', '739630': 'Phasmophobia', '252490': 'Rust', '236390': 'War Thunder', '1174180': 'Red Dead Redemption 2', '377160': 'Fallout 4', '1172620': 'Sea of Thieves', '1519350': 'Madden NFL 22', '570': 'The Jackbox Party Pack 8', '1552350': 'Dota 2', '1222670': 'The Sims™ 4', '1086940': "Baldur's Gate 3", '397540': 'Borderlands 3', '960090': 'Bloons TD 6', '632470': 'NBA 2K22', '1644960': 'Disco Elysium - The Final Cut', '578080': 'Black Des


# Scrape data in SteamDB based on App IDs

For each ID, scrape data from https://steamdb.info/app/[APP_ID]/graphs/

In [54]:
# Here, we can search the AppID of any given game name

def searchID(game_search):
    app_id = None
    for gid, gname in dict_games.items():
        if gname == game_search:
            app_id = gid
            break
    return app_id
        
app_id = searchID("Counter-Strike: Global Offensive")
print("AppID: " + app_id)

AppID: 730


In [96]:
# Here, we are trying to access database for game and populate our monthly list with peak number of players for each month

list_monthly = []

headers = { "User-Agent":"Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0" }

url = "http://steamdb.info/app/"+str(app_id)+"/graphs/"
src = requests.get(url, headers=headers)
bs = BeautifulSoup(src.content, "html.parser")

# returns None, guessing because table hasn't loaded onto webpage yet
table = bs.find('table')
print(table)

#table = bs.find('table')
#table_rows = table.find_all('tr')

#for tr in table_rows:
#    td = tr.find_all('td')
#    row = [i.text for i in td]
#    print(row)

None


In [95]:
# Let's try using urllib module instead of requests module
# Reference: stackoverflow.com/questions/45448994/wait-page-to-load-before-getting-data-with-requests-get-in-python-3

url = "http://steamdb.info/app/"+str(app_id)+"/graphs/"
req = urllib.request.Request(
    url,
    data=None,
    headers=headers
)

try:
    with urllib.request.urlopen(req) as response:
        html = response.read().decode('utf-8')
        
except urllib.request.HTTPError as e:
    if e.code == 404:
        print(f"{e.code} Error: URL not found")
    elif e.code == 503:
        print(f"{e.code} Error: Service not available")
    else:
        print("HTTP Error")

503 Error: Service not available


In [None]:
# 503 Error. Looks like we'll need to find another way to scrape data, one that can mimic a browser. Selenium?

# Using Selenium (Read First)

In [None]:
# You will need a webdriver.

# If you don't get any indication of a webdriver as shown in output,
# you may need to follow this guide and install everything in the da2 machine:
# https://stackoverflow.com/a/44039546

In [154]:
# Headers we need
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By

o = Options()
o.add_argument('--headless')
o.add_argument('--no-sandbox')
o.add_argument('--disable-dev-shm-usage')

# Let's test out the service
s = Service(ChromeDriverManager().install())

[WDM] - 

[WDM] - Current google-chrome version is 94.0.4606
[WDM] - Get LATEST driver version for 94.0.4606
[WDM] - Driver [/home/wjohns53/.wdm/drivers/chromedriver/linux64/94.0.4606.61/chromedriver] found in cache


In [None]:
# Now let's test the driver (we'll use Steam as an example)

driver = webdriver.Chrome(service=s, options=o)

driver.get("http://store.steampowered.com")
html = driver.page_source
time.sleep(2)
print(html)

driver.close