# Retrieve NMS review data from Steam API

In [1]:
import numpy as np
import pandas as pd
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import urllib.parse
import time
from datetime import datetime
import math
import random
import copy
import os
from tqdm import tqdm

In [2]:
def requests_retry_session(
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    session=None,
):
    """
    Taken verbatim from 'https://www.peterbe.com/plog/best-practice-with-retries-with-requests' by Peter Bengtsson,
    accessed on 23/06/2025
    """
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

def requests_retry(
    url,
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    timeout=4,
    session=None):

    """
    Adapted from 'https://www.peterbe.com/plog/best-practice-with-retries-with-requests' by Peter Bengtsson,
    accessed on 23/06/2025
    """
    
    s = requests.Session()
    # s.auth = ('user', 'pass')
    # s.headers.update({'x-test': 'true'})
    
    response = requests_retry_session(
        retries,
        backoff_factor,
        status_forcelist,
        session=s).get(url,timeout=timeout)

    return response

In [3]:
def steam_appID_review_request(appid,requestFilterType,nReviewsPerPage,cursor='*',DEBUG=False):

    ## For as-of-yet unknown reasons, the Steam API for '/appreviews/:appid?' will sometimes return None data despite html request returning success (status_code==200), and data returned from Steam API returning 'success==1'.
    ## So, here I have implemented a repeated attempt loop, that should retry the query for the current appid and review page (as defined by variable 'cursor'). On success, it should break from this attempt loop
    ## and continue the rest of the programe.
    ##
    ## TODO: the error that underlies this need for a repeated attempt loop has yet to be identified. I am uncertain if this issue is arising from 'requests', 'urllib', or the Steam API.
    success = False
    for attempt in range(0,3):
        ## Make API request
        appreviewsReq = requests_retry(f"https://store.steampowered.com/appreviews/{appid}?json=1filter={requestFilterType}&language=all&cursor={cursor}&date_range_type=all&l=english&review_type=all&purchase_type=all&filter_offtopic_activity=0&num_per_page={nReviewsPerPage}")
        if appreviewsReq.status_code == 200:
            if DEBUG: print(f"HTML 'Get' request successful.")
            appreviews = appreviewsReq.json()
            if appreviews['success'] == 1:
                if DEBUG: print(f"API request successful. Returned data converted to json format.")
            else:
                print(f"AppID {appid} reviews request failed on page cursor={cursor}.")
                continue ## Retry request
                # raise Exception(f"AppID {appid} reviews request failed.")
        else:
            print(f"HTML 'Get' request failed with error code {appreviewsReq.status_code} on page cursor={cursor}.")
            continue ## Retry request
            # raise Exception(f"HTML 'Get' request failed with error code {appreviewsReq.status_code}.")

        ## Check there is data returned from query, or if data is None type
        if appreviews['cursor'] is None:
            print(f"AppID {appid} reviews request failed, returning null data on page cursor={cursor}.")
            continue ## Retry request
        else:
            success = True
            break

    if success==False:
        raise Exception(f"AppID {appid} reviews request failed on page cursor={cursor}."+"\n"+f"HTML 'Get' request returned code {appreviewsReq.status_code}.")
    
    return appreviews

## Grab initial data regarding steam app

In [4]:
appid = 275850 # NMS app id on Steam                ##3140120 #Newly released, low number of reviews, game to test end of iteration over review pages loop in below script  ## 
nReviewsPerPage = 20 # Number of reviews per page of Steam API get request
## Set requestFilterType
"""
See https://github.com/Revadike/InternalSteamWebAPI/wiki/Get-App-Reviews 

recent – sorted by creation time
updated – sorted by last updated time
all – sorted by helpfulness, with sliding windows based on day_range parameter, will always find results to return.
summary – (default) sorted by helpfulness, strictly returns 10 reviews without paging (ignores num_per_page), represents the summary score by including reviews based on the proportion of positive to negative votes (see the corresponding blog article)

If paging through the reviews with cursor then choose either the recent option or the updated option to eventually receive an empty response list.
"""
requestFilterType = 'updated' ## for debugging set to 'summary', otherwise set to 'updated'

In [5]:
appreviews = steam_appID_review_request(appid,requestFilterType,nReviewsPerPage,cursor='*',DEBUG=True) ## cursor='*' returns a sample from the first page of results (as determined by the rest of the request filters).

HTML 'Get' request successful.
API request successful. Returned data converted to json format.


In [6]:
reviewSummary = copy.deepcopy(appreviews['query_summary'])
print(reviewSummary)

{'num_reviews': 20, 'review_score': 8, 'review_score_desc': 'Very Positive', 'total_positive': 295192, 'total_negative': 59593, 'total_reviews': 354785}


In [7]:
numReviews = reviewSummary['total_reviews']
numReviewPages = math.ceil(float(numReviews)/float(nReviewsPerPage))
print(f"Number of review pages = {numReviewPages}")

Number of review pages = 17740


In [8]:
## Desired info to pull out of the Steam API data returned.
## Information about the review author is stored in a dictionary nested 
## within the indiviudal review's data.
desiredDataFilters = [
    'recommendationid',
    {'author':
        ['num_games_owned',
         'num_reviews',
         'playtime_forever',
         'playtime_last_two_weeks',
         'playtime_at_review',
         'last_played']
    },
   'language',
   'review',
   'timestamp_created',
   'timestamp_updated',
   'voted_up',
   'votes_up',
   'votes_funny',
   'weighted_vote_score',
   'comment_count',
   'steam_purchase',
   'received_for_free',
   'written_during_early_access',
   'primarily_steam_deck',
]

## Begin gathering review data

In [None]:
reviewData = {}
cursor = '*'
## Loop over each review page
for page in tqdm(range(1,numReviewPages+1)):
    ## Reduce request frequency in less 'robotic' fashion to prevent sites blocking access
    time.sleep(random.uniform(0.0,2.0)/100) ## 0 -- 20 milli-seconds 
    
    ## request current page number for reviews
    rawData = steam_appID_review_request(appid,requestFilterType,nReviewsPerPage,cursor)

    ## Double check that returned data has provided the required information to locate the next page of reviews.
    ## This *should* be caught by repeated attempts at the query, and other checks and balances internal to 'steam_appID_review_request()'
    if rawData['cursor'] is None: 
        raise Exception(f"Function 'steam_appID_review_request()' returned data with 'cursor' == None."+ 
                        "\n"+f"Previous cursor {cursor} from iteration/page number {page} of {numReviewPages}.")
    ## Obtain url format of cursor for review page itteration
    cursor = urllib.parse.quote_plus(str(rawData['cursor']))
    
    ## Iterate through reviews on this page
    for review in rawData['reviews']:
        ## For each review, collect data from 'desiredDataFilters' list of keys
        for item in desiredDataFilters:
            ## Default case, 'desiredDataFilters' entry will be a simple string
            if type(item) == str:
                if item in list(reviewData.keys()):
                    ## If we've seen this entry from 'desiredDataFilters' before, take the current data
                    ## stored in reviewData[item] and append the next value from the current review on
                    ## current page
                    value = copy.deepcopy(reviewData[item])
                    newValue = value + [copy.deepcopy(review[item])]
                    reviewData[item] = newValue
                else:
                    ## If we haven't seen this entry from 'desiredDataFilters' before, initialise a new list
                    value = copy.deepcopy(review[item])
                    reviewData[item] = [value]
            ## Same code as above, but reflects that in the data structure returned from the Steam API
            ## information about the review author is stored in a dictionary nested within the indiviudal 
            ## review's data
            elif type(item) == dict:
                for key, val in item.items():
                    for innerKey in val:
                        if innerKey in list(reviewData.keys()):
                            value = copy.deepcopy(reviewData[innerKey])
                            newValue = value + [copy.deepcopy(review[key][innerKey])]
                            reviewData[innerKey] = newValue
                        else:
                            value = copy.deepcopy(review[key][innerKey])
                            reviewData[innerKey] = [value]

  5%|████▋                                                                                     | 915/17740 [11:44<5:54:11,  1.26s/it]

In [None]:
df = pd.DataFrame.from_dict(reviewData)
df = df.astype({'timestamp_created':"datetime64[s]",'timestamp_updated':"datetime64[s]"})
df

In [None]:
savePath = "./Data/"
saveFile = "SteamAPI_NMS_reviews.xlsx" ##   If needed for short number of reviews test... #"TEST--SteamAPI_RAND-GAME_reviews.xlsx"

tmp = ""
for savePathChunk in savePath.split("/")[:-1]:
    tmp += savePathChunk + "/"
    try:
        os.mkdir(tmp)
    except:
        pass

with pd.ExcelWriter(path=savePath+saveFile,mode="w") as writer:
    df.to_excel(writer)