In [None]:
# SI 618 Final Project

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import re
import json
import requests

## Steam Store Reviews Data

Data for the review for each game in the steam store can be scraped from the web from the base url: `https://store.steampowered.com/appreviews/<appid>?json=1`

This data is returned in the form of a json file

In [11]:
def get_steam_reviews(
    appid,
    params={
        "json": 1,
        "filter": "all",
        "language": "english",
    },
):
    """
    Get Steam reviews for a specific game/app by scraping the steamstore website endpoint

    Parameters
    ----------
    appid : int
            Steam app id
    params : dict
            Parameters to pass to API call

    Returns
    -------
    json
            Steam reviews as json
    """
    base_url = "https://store.steampowered.com/appreviews/"

    # Uses a header to avoid being timed out by steam
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
    }

    # Call API
    response = requests.get(
        base_url + str(appid), headers=headers, params=params
    )

    # Return as  json
    return response.json()

In [17]:
def get_n_reviews(appid, n=100):
    """
    Function code orginially written by Andrew Muller at https://andrew-muller.medium.com/scraping-steam-user-reviews-9a43f9e38c92

    The most reviews that can be returned at once is 100. This function will return n reviews for a given appid by using the cursor to get the next 100 reviews until n reviews have been returned.

    Parameters
    ----------
    appid : int
        The appid of the game to get reviews for
    n : int
        The number of reviews to return

    Returns
    -------
    reviews : list
        A list of dictionaries containing the reviews

    """
    reviews = []
    cursor = "*"
    params = {
        "json": 1,
        "filter": "all",
        "language": "english",
        "day_range": 9223372036854775807,
        "review_type": "all",
        "purchase_type": "all",
    }

    while n > 0:
        params["cursor"] = cursor.encode()
        params["num_per_page"] = min(100, n)
        n -= 100

        response = get_steam_reviews(appid, params)
        cursor = response["cursor"]
        reviews += response["reviews"]

        if len(response["reviews"]) < 100:
            break

    return reviews

## Steam Store Game Data



In [8]:
def get_steam_game_data(params={"request": "top100forever", "format": "json"}):
    """
    Uses the steamspy API to return data for games on the steam marketplace
    default params are for the top 100 games by playtime forever, all games can be retrieved by changing the request param to all

    Parameters
    ----------

    Returns
    -------
    json
        Steam game data as json

    """
    base_url = "https://steamspy.com/api.php"

    response = requests.get(base_url, params=params)
    return response.json()

# (Temp) Data to dataframes

In [39]:
game_df = pd.DataFrame().from_dict(get_steam_game_data(), orient="index")

In [37]:
review_list = []
for appid in game_df.appid:
    try:
        response = get_n_reviews(appid, 10)
        for review in response:
            review_dict = {
                "appid": appid,
                "recommendationid": review["recommendationid"],
                "author": review["author"]["steamid"],
                "author_playtime_forever": review["author"][
                    "playtime_forever"
                ],
                "author_playtime_last_two_weeks": review["author"][
                    "playtime_last_two_weeks"
                ],
                "author_last_played": review["author"]["last_played"],
                "author_num_reviews": review["author"]["num_reviews"],
                "votes_up": review["votes_up"],
                "votes_funny": review["votes_funny"],
                "weighted_vote_score": review["weighted_vote_score"],
                "comment_count": review["comment_count"],
                "timestamp_created": review["timestamp_created"],
                "timestamp_updated": review["timestamp_updated"],
                "steam_purchase": review["steam_purchase"],
                "received_for_free": review["received_for_free"],
                "written_during_early_access": review[
                    "written_during_early_access"
                ],
                "review": review["review"],
            }

            review_list.append(review_dict)
    except:
        print(appid)

review_df = pd.DataFrame(review_list)

In [38]:
review_df.head()

Unnamed: 0,appid,recommendationid,author,author_playtime_forever,author_playtime_last_two_weeks,author_last_played,author_num_reviews,votes_up,votes_funny,weighted_vote_score,comment_count,timestamp_created,timestamp_updated,steam_purchase,received_for_free,written_during_early_access,review
0,570,139894673,76561198088348690,624225,2643,1697992631,2,422,129,0.9612147212028505,0,1686495146,1686495146,False,False,False,This isn't a free game. Each match cost you a ...
1,570,136918609,76561198147839040,318322,0,1685701832,59,1481,971,0.96044921875,41,1681842720,1681842720,False,False,False,This community is so nice i got a lot of tips ...
2,570,114361523,76561198091204721,785092,0,1649172225,1,575,20,0.9603879451751708,44,1650750971,1650750971,False,False,False,If you told me when I started playing dota 2 a...
3,570,122685262,76561198364587737,6,0,1662282441,3,819,577,0.9582464694976806,0,1663940272,1663940272,False,False,False,This community is so nice i got a lot of tips ...
4,570,103119288,76561198870849422,200,0,1668097004,52,576,500,0.9543924927711486,14,1637398450,1637398450,False,False,False,This community is so nice i got a lot of tips ...


In [40]:
game_df.head()

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount,ccu
570,570,Dota 2,Valve,Valve,,1706564,371508,0,"200,000,000 .. 500,000,000",0,0,0,0,0,0,0,623349
730,730,Counter-Strike: Global Offensive,Valve,Valve,,6769882,922560,0,"100,000,000 .. 200,000,000",0,0,0,0,0,0,0,933496
1172470,1172470,Apex Legends,Respawn Entertainment,Electronic Arts,,584895,148317,0,"50,000,000 .. 100,000,000",0,0,0,0,0,0,0,236252
578080,578080,PUBG: BATTLEGROUNDS,"KRAFTON, Inc.","KRAFTON, Inc.",,1284031,949571,0,"50,000,000 .. 100,000,000",0,0,0,0,0,0,0,265608
1063730,1063730,New World,Amazon Games,Amazon Games,,186761,79224,0,"50,000,000 .. 100,000,000",0,0,0,0,3999,3999,0,53871
