In [1]:
import pandas as pd
import numpy as np 
from bs4 import BeautifulSoup
import requests
import json
import urllib.request as request
import steamreviews
import re
import datetime
import time 
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

In [2]:
# http://api.steampowered.com/ISteamApps/GetAppList/v0001/?key=STEAMKEY&format=json
with open('AppListV1.json', 'r') as f:
    apps = json.load(f)
    
AppList = apps['applist']['apps']['app']
len(AppList)

94290

In [3]:
def retrySession(
    retries=5,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    session=None):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

In [4]:
def webCapture(url):
    headers = {
   # pretend I am a browser
   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36',
   }
    s = requests.Session()
    response = retrySession(session=s).get(url, headers = headers, verify=False)
    soup = BeautifulSoup(response.text, 'html.parser') #parse the data
    return soup

In [None]:
AppID = []
AppName = []
Genre = []
Tags = []
Developer = []
Publisher = []
ReviewInfor = []
ReviewStatus = []
ReleaseDate = []
IsFree = []
Price = []
count = 0

#for i in range(0, len(AppList)):
for i in range(82242, 90000):
    ID = str(AppList[i]["appid"])
    Name = str(AppList[i]["name"])
    url = f"https://store.steampowered.com/app/{ID}/{Name}"
    # scrapy web page
    gameDetails = webCapture(url)
    
    # get game tags
    try: 
        tag_infor = gameDetails.select("div.glance_tags.popular_tags")[0].get_text()
        tags = re.sub(r'[\t\n\r+]', ' ', tag_infor)
        tags = list(' '.join(tags.split()).split(" "))   # remove white space and put tags in a list
        genre = tags[0] # the first tag is the main genre for the game

    # get developer and publisher
        developer = re.sub(r"[\n]", '', 
                       gameDetails.select("div.dev_row > div.summary.column")[0].get_text().strip())
    
        publisher = re.sub(r"[\n]", '', 
                       gameDetails.select("div.dev_row > div.summary.column")[1].get_text().strip())

    # get "All Reviews" information 
        review_infor = re.sub(r'[\t\n\r+]', '', 
               gameDetails.select('div.user_reviews_summary_row > div.summary.column')[0].get_text())
        review_status = True if review_infor != "No user reviews" else False
        
    # get release date
        date = gameDetails.select("div.release_date > div.date")[0].get_text()
    
    except: 
        genre = None
        tags = None
        developer = None
        publisher = None
        review_infor = None
        review_status = None
        date = None
    
    # get app price
    game_link = f"http://store.steampowered.com/api/appdetails?appids={ID}"
    s = requests.Session()
    source = retrySession(session=s).get(game_link, headers={'User-Agent':'Mozilla/5.0'}, verify=False)
    json_dict = json.loads(source.text)
    
    try: 
        if json_dict[ID]["success"] == True: 
            is_free = json_dict[ID]["data"]["is_free"]
            try: 
                price = json_dict[ID]["data"]["price_overview"] if is_free == False else None
            except: 
                price = None
        else: 
            is_free = None
            price = None
    except TypeError: 
        continue
    
    AppID.append(ID)
    AppName.append(Name)
    Genre.append(genre)
    Tags.append(tags)
    Developer.append(developer)
    Publisher.append(publisher)
    ReviewInfor.append(review_infor)
    ReviewStatus.append(review_status)
    ReleaseDate.append(date)
    IsFree.append(is_free)
    Price.append(price)
    count += 1

print(count)

In [58]:
df_games = pd.DataFrame(list(zip(AppID, AppName, Genre, Tags, Developer, Publisher, ReviewInfor,
                                         ReviewStatus, ReleaseDate, IsFree, Price)),
                       columns=["AppID", "AppName", "Genre", "Tags", "Developer", "Publisher", "ReviewInfor",
                                         "ReviewStatus", "ReleaseDate", "IsFree", "Price"])
df_games.head(3)

Unnamed: 0,AppID,AppName,Genre,Tags,Developer,Publisher,ReviewInfor,ReviewStatus,ReleaseDate,IsFree,Price
0,5702,BLUR Trailer 3,,,,,,,,,
1,5703,StargateResistance,,,,,,,,False,
2,5706,Guns Of Icarus 1,,,,,,,,False,


In [60]:
df_games.to_csv("steam_games.csv",index=False)

In [None]:
df_games = pd.read_csv("steam_games.csv")
df_games.head()

In [2]:
# https://store.steampowered.com/app/618980/Wanderland_Advanced_Adventurer_Pack/
# no review: https://store.steampowered.com/app/619610/Ultratank/
ids = ["523660", '618980']

request_params = dict()
request_params['language'] = 'english'
# Reference: https://partner.steamgames.com/doc/store/getreviews

game_reviews = steamreviews.download_reviews_for_app_id_batch(ids, chosen_request_params=request_params)
game_reviews

[appID = 523660] expected #reviews = 520


{'reviews': {'68187610': {'recommendationid': '68187610',
   'author': {'steamid': '76561198045740795',
    'num_games_owned': 26,
    'num_reviews': 1,
    'playtime_forever': 232,
    'playtime_last_two_weeks': 232,
    'last_played': 1588097810},
   'language': 'english',
   'review': 'Tremendous, exhilarating fun!\nThe attention to detail in this game is second to none, every factor of gameplay and level design has been intentionally made to evoke the feelings of playing with your action figures as a kid.\nIt\'s incredibly refreshing to see a game in this day and age which gets you to play the game to unlock new characters and armour pieces rather than relying on microtransactions up the ying yang. Extra points for this.\nThe variety in enemy types, weapon pickups and character designs took a lot of evident effort. I believe the creators should be truly commended for this. The first time bouncy balls showed up had me go "Oh S**t there\'s goddamn bouncy balls!" out loud... I never t

In [64]:
df_games.AppID.values

array(['5702', '5703', '5706', ..., '667290', '1197620', '1292000'],
      dtype=object)

In [66]:
import datetime

GameID = []
SteamID = []
NumGamesOwned = []
NumReviewsGiven = []
PlayTime = []
PlayTimeLastTwoWeeks = []
LastPlayTime = []
Language = []
Review = []
TimeCreated = []
VoteUp = []
VoteFunny = []
CommentCount = []
SteamPurchase = []

#ids = df_games.AppID.values
ids = ["523660", '618980', '667290']
request_params = dict()
request_params['language'] = 'english'
game_reviews = steamreviews.download_reviews_for_app_id_batch(ids, chosen_request_params=request_params)


for i in range(len(ids)):
    gameid = ids[i]
    with open(f'data/review_{gameid}.json', 'r') as f:
        review_dict = json.load(f)
    
    if len(review_dict["reviews"]) != 0:
        GameID.append(gameid)
        for j in range(len(review_dict["reviews"])):
            try: 
                steamids = list(review_dict["reviews"])
                doc = review_dict["reviews"][steamids[j]]
                author_profile = doc["author"]
                language = doc["language"]
                num_games_owned = author_profile["num_games_owned"]
                num_reviews = author_profile["num_reviews"]
                playtime_forever = author_profile["playtime_forever"]
                playtime_last_two_weeks = author_profile["playtime_last_two_weeks"]
                last_played = datetime.datetime.fromtimestamp(author_profile["last_played"])
                review = doc["review"]
                timestamp_created = datetime.datetime.fromtimestamp(doc["timestamp_created"])
                if doc["voted_up"] == True:
                    votes_up = doc["votes_up"]

                else:
                    votes_up = None
                votes_funny = doc["votes_funny"]
                comment_count = doc["comment_count"]
                steam_purchase = doc["steam_purchase"]
            except TypeError: 
                continue

            SteamID.append(steamids[j])
            NumGamesOwned.append(num_games_owned)
            NumReviewsGiven.append(num_reviews)
            PlayTime.append(playtime_forever)
            PlayTimeLastTwoWeeks.append(playtime_last_two_weeks)
            LastPlayTime.append(last_played)
            Language.append(language)
            Review.append(review)
            TimeCreated.append(timestamp_created)
            VoteUp.append(votes_up)
            VoteFunny.append(votes_funny)
            CommentCount.append(comment_count)
            SteamPurchase.append(steam_purchase)

Loading idprocessed_on_20200430.txt
Creating idprocessed_on_20200430.txt
Downloading reviews for appID = 523660
[appID = 523660] expected #reviews = 543
[appID = 523660] num_reviews = 531 (expected: 543)
Downloading reviews for appID = 618980
[appID = 618980] expected #reviews = 2
[appID = 618980] num_reviews = 2 (expected: 2)
Downloading reviews for appID = 667290
[appID = 667290] expected #reviews = 5
[appID = 667290] num_reviews = 5 (expected: 5)
Game records written: 3


In [67]:
df_reviews = pd.DataFrame(list(zip(SteamID, NumGamesOwned, NumReviewsGiven, PlayTime, PlayTimeLastTwoWeeks, 
                LastPlayTime, Language, Review, TimeCreated, VoteUp, VoteFunny, CommentCount, SteamPurchase)),
                columns=["SteamID", "NumGamesOwned", "NumReviewsGiven", "PlayTime", "PlayTimeLastTwoWeeks", 
                         "LastPlayTime", "Language", "Review", "TimeCreated", "VoteUp", "VoteFunny", 
                         "CommentCount", "SteamPurchase"])
df_reviews.head(3)

Unnamed: 0,SteamID,NumGamesOwned,NumReviewsGiven,PlayTime,PlayTimeLastTwoWeeks,LastPlayTime,Language,Review,TimeCreated,VoteUp,VoteFunny,CommentCount,SteamPurchase
0,68321027,152,10,495,495,1588221043,english,This game is such an addicting tower defense s...,2020-04-30 13:06:07,0.0,0,0,True
1,68320684,94,6,741,741,1588130567,english,Fresh take on environment and tower defense. W...,2020-04-30 12:58:56,0.0,0,0,True
2,68320459,184,9,1042,1042,1588121534,english,After viewing the steam page I was pleasantly ...,2020-04-30 12:54:17,0.0,0,0,True


In [42]:
review_dict["reviews"][list(review_dict["reviews"])[0]]

{'recommendationid': '68187610',
 'author': {'steamid': '76561198045740795',
  'num_games_owned': 26,
  'num_reviews': 1,
  'playtime_forever': 232,
  'playtime_last_two_weeks': 232,
  'last_played': 1588097810},
 'language': 'english',
 'review': 'Tremendous, exhilarating fun!\nThe attention to detail in this game is second to none, every factor of gameplay and level design has been intentionally made to evoke the feelings of playing with your action figures as a kid.\nIt\'s incredibly refreshing to see a game in this day and age which gets you to play the game to unlock new characters and armour pieces rather than relying on microtransactions up the ying yang. Extra points for this.\nThe variety in enemy types, weapon pickups and character designs took a lot of evident effort. I believe the creators should be truly commended for this. The first time bouncy balls showed up had me go "Oh S**t there\'s goddamn bouncy balls!" out loud... I never thought I\'d be afraid of bouncy balls!\n\