# Retrieve NMS review data from Steam API

In [1]:
import numpy as np
import pandas as pd
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import urllib.parse
import time
from datetime import datetime
import math
import random
import copy
import os
from tqdm import tqdm

In [2]:
def requests_retry_session(
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    session=None,
):
    """
    Taken verbatim from 'https://www.peterbe.com/plog/best-practice-with-retries-with-requests' by Peter Bengtsson,
    accessed on 23/06/2025
    """
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

def requests_retry(
    url,
    params=[],
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    timeout=4,
    session=None):

    """
    Adapted from 'https://www.peterbe.com/plog/best-practice-with-retries-with-requests' by Peter Bengtsson,
    accessed on 23/06/2025
    """
    
    s = requests.Session()
    # s.auth = ('user', 'pass')
    # s.headers.update({'x-test': 'true'})
    
    response = requests_retry_session(
        retries,
        backoff_factor,
        status_forcelist,
        session=s).get(url,params=params,timeout=timeout)

    return response

In [3]:
def steam_appID_review_request(appid,requestFilterType,nReviewsPerPage,cursor='*',retries=3,DEBUG=False):

    ## For as-of-yet unknown reasons, the Steam API for '/appreviews/:appid?' will sometimes return None data despite html request returning success (status_code==200), and data returned from Steam API returning 'success==1'.
    ## So, here I have implemented a repeated attempt loop, that should retry the query for the current appid and review page (as defined by variable 'cursor'). On success, it should break from this attempt loop
    ## and continue the rest of the programe.
    ##
    ## TODO: the error that underlies this need for a repeated attempt loop has yet to be identified. I am uncertain if this issue is arising from 'requests', 'urllib', or the Steam API.
    success = False
    # the params of the API
    params = {
            'json':1,
            'language': 'all',
            'cursor': cursor,                                  # set the cursor to retrieve reviews from a specific "page"
            'num_per_page': nReviewsPerPage,
            'filter': requestFilterType,
            'date_range_type':'all',
            'purchase_type':'all',
            'filter_offtopic_activity':0,
            'review_type':'all',
            'playtime_filter_max':0
        }
    for ii,attempt in enumerate(range(0,retries)):
        ## Make API request
        appreviewsReq = requests_retry(f"https://store.steampowered.com/appreviews/{appid}",params,retries=retries)
        if appreviewsReq.status_code == 200:
            if DEBUG: print(f"HTML 'Get' request successful.")
            appreviews = appreviewsReq.json()
            if appreviews['success'] == 1:
                if DEBUG: print(f"API request successful. Returned data converted to json format.")
            else:
                print(f"AppID {appid} reviews request failed on page cursor={cursor}.")
                continue ## Retry request
                # raise Exception(f"AppID {appid} reviews request failed.")
        else:
            print(f"HTML 'Get' request failed with error code {appreviewsReq.status_code} on page cursor={cursor}.")
            continue ## Retry request
            # raise Exception(f"HTML 'Get' request failed with error code {appreviewsReq.status_code}.")

        ## Check there is data returned from query, or if data is None type
        if appreviews['cursor'] is None:
            if appreviews['reviews']:
                print(f"AppID {appid} review has reached final page. Finishing loop iterating over review pages!")
                success = True
                break
            else:
                print(f"AppID {appid} review failed, returning null data on page cursor={cursor} on attempt {ii+1} of {retries}. Will attempt next retry of request.")
                continue ## Retry request
        else:
            success = True
            break

            
    if success==False:
        raise Exception(f"AppID {appid} reviews request failed on page cursor={cursor}."+"\n"+f"HTML 'Get' request returned code {appreviewsReq.status_code}.")
    
    return appreviews

## Grab initial data regarding steam app

In [4]:
appid = 275850 # NMS app id on Steam                ##3140120 #Newly released, low number of reviews, game to test end of iteration over review pages loop in below script  ## 
nReviewsPerPage = 100 # Number of reviews per page of Steam API get request
## Set requestFilterType
"""
See https://github.com/Revadike/InternalSteamWebAPI/wiki/Get-App-Reviews 

recent – sorted by creation time
updated – sorted by last updated time
all – sorted by helpfulness, with sliding windows based on day_range parameter, will always find results to return.
summary – (default) sorted by helpfulness, strictly returns 10 reviews without paging (ignores num_per_page), represents the summary score by including reviews based on the proportion of positive to negative votes (see the corresponding blog article)

If paging through the reviews with cursor then choose either the recent option or the updated option to eventually receive an empty response list.
"""
requestFilterType = 'updated' ## for debugging set to 'summary', otherwise set to 'updated'

In [5]:
appreviews = steam_appID_review_request(appid,requestFilterType,nReviewsPerPage,cursor='*',DEBUG=True) ## cursor='*' returns a sample from the first page of results (as determined by the rest of the request filters).

HTML 'Get' request successful.
API request successful. Returned data converted to json format.


In [6]:
reviewSummary = copy.deepcopy(appreviews['query_summary'])
print(reviewSummary)

{'num_reviews': 100, 'review_score': 8, 'review_score_desc': 'Very Positive', 'total_positive': 295731, 'total_negative': 59610, 'total_reviews': 355341}


In [7]:
numReviews = reviewSummary['total_reviews']
numReviewPages = math.ceil(float(numReviews)/float(nReviewsPerPage))
print(f"Number of review pages = {numReviewPages}")

Number of review pages = 3554


In [8]:
## Desired info to pull out of the Steam API data returned.
## Information about the review author is stored in a dictionary nested 
## within the indiviudal review's data.
desiredDataFilters = [
    'recommendationid',
    {'author':
        ['num_games_owned',
         'num_reviews',
         'playtime_forever',
         'playtime_last_two_weeks',
         'playtime_at_review',
         'last_played']
    },
   'language',
   'review',
   'timestamp_created',
   'timestamp_updated',
   'voted_up',
   'votes_up',
   'votes_funny',
   'weighted_vote_score',
   'comment_count',
   'steam_purchase',
   'received_for_free',
   'written_during_early_access',
   'primarily_steam_deck',
]

## Begin gathering review data

Configuration parameters

In [9]:
savePath = "./Data/"
saveFile = "SteamAPI_NMS_reviews.xlsx" ##   If needed for short number of reviews test... #"TEST--SteamAPI_RAND-GAME_reviews.xlsx"
retries = 3
rowsPerOutputFile = 75000

In [10]:
def write_to_excel(df,savePath,saveFile):
    tmp = ""
    for savePathChunk in savePath.split("/")[:-1]:
        tmp += savePathChunk + "/"
        try:
            os.mkdir(tmp)
        except:
            pass

    path = savePath+saveFile
    with pd.ExcelWriter(path=path,mode="w") as writer:
        df.to_excel(writer,engine='xlsxwriter')
    print(f"{path}")
    return

In [11]:
def remove_illegal_characters(df):
    return df.map(lambda x: x.encode('unicode_escape').decode('utf-8') if isinstance(x, str) else x)

In [12]:
reviewData = {}
cursor = '*'
saveFileCounter = 0 # counter to track savefile number
## Loop over each review page
for page in tqdm(range(1,numReviewPages+1)):
    ## Reduce request frequency in less 'robotic' fashion to prevent sites blocking access
    time.sleep(random.uniform(0.0,2.0)/100) ## 0 -- 20 milli-seconds 
    
    ## request current page number for reviews
    rawData = steam_appID_review_request(appid,requestFilterType,nReviewsPerPage,cursor,retries=retries)

    ## Double check that returned data has provided the required information to locate the next page of reviews.
    ## This *should* be caught by repeated attempts at the query, and other checks and balances internal to 'steam_appID_review_request()'
    if rawData['cursor'] is None: 
        print(f"AppID {appid} review returned null data on page cursor={cursor}.")
        if rawData['reviews']:
            print(f"AppID {appid} review has reached final page. Finishing loop iterating over review pages!")
            break
        else:
            raise Exception(f"Function 'steam_appID_review_request()' returned data with 'cursor' == None."+ 
                "\n"+f"Previous cursor {cursor} from iteration/page number {page} of {numReviewPages}.")
       
    ## Obtain url format of cursor for review page itteration
    cursor = str(rawData['cursor']) ## urllib.parse.quote_plus()
    
    ## Iterate through reviews on this page
    for entry in rawData['reviews']:
        reviewDataKeys = list(reviewData.keys())
        ## For each review, collect data from 'desiredDataFilters' list of keys
        for item in desiredDataFilters:
            ## Default case, 'desiredDataFilters' entry will be a simple string
            if type(item) == str:
                try:
                    ## If data exists, will continue to extract and save
                    dat = copy.copy(entry[item])
                except:
                    ## If data doesn't exist, create NA value. This ensures all columns in final dataframe are same length, else pandas will raise an error.
                    dat = pd.NA        
                    
                if item in reviewDataKeys:
                    ## If we've seen this entry from 'desiredDataFilters' before, take the current data
                    ## stored in reviewData[item] and append the next value from the current review on
                    ## current page
                    value = reviewData[item]
                    newValue = value + [dat]
                    reviewData[item] = newValue
                else:
                    ## If we haven't seen this entry from 'desiredDataFilters' before, initialise a new list
                    value = dat
                    reviewData[item] = [value]

            ## Same code as above, but reflects that in the data structure returned from the Steam API
            ## information about the review author is stored in a dictionary nested within the indiviudal 
            ## review's data
            elif type(item) == dict:
                for key, val in item.items():
                    for innerKey in val:
                        try:
                            ## If data exists, will continue to extract and save
                            dat = copy.copy(entry[key][innerKey])
                        except:
                            ## If data doesn't exist, create NA value. This ensures all columns in final dataframe are same length, else pandas will raise an error.
                            dat = pd.NA      
                            
                        if innerKey in reviewDataKeys:
                            value = reviewData[innerKey]
                            newValue = value + [dat]
                            reviewData[innerKey] = newValue
                        else:
                            value = dat
                            reviewData[innerKey] = [value]

    try:
        nRows = len(reviewData[desiredDataFilters[0]])
    except: 
        nRows = 0
        
    if nRows>=rowsPerOutputFile:
        print(f"Current data size of {nRows} x Rows has exceeded requested rows per output file length of {rowsPerOutputFile}."
             +"\n"
             +f"Writing current data to file before proceeding!")
        saveFileCounter += 1  ## Increase counter to track savefile number

        ## Create pandas dataframe and update to datetime format
        df = pd.DataFrame.from_dict(reviewData)
        df = df.astype({'timestamp_created':"datetime64[s]",'timestamp_updated':"datetime64[s]"})
        ## Clean illegal characters
        df = remove_illegal_characters(df)
        
        ## Proper filename for current savefile number
        name, fileExt = saveFile.split('.')
        currentSaveFile = name + str(saveFileCounter).zfill(3) + '.' + fileExt
        write_to_excel(df,savePath,currentSaveFile)

        ## Clear current data residing in RAM
        del reviewData
        reviewData = {}

## Clear up final chunk of data by saving to file
try:
    nRows = len(reviewData[desiredDataFilters[0]])
except: 
    nRows = 0
    
if nRows>=1 :
    saveFileCounter += 1  ## Increase counter to track savefile number
    
    ## Create pandas dataframe and update to datetime format
    df = pd.DataFrame.from_dict(reviewData)
    df = df.astype({'timestamp_created':"datetime64[s]",'timestamp_updated':"datetime64[s]"})
    ## Clean illegal characters
    df = remove_illegal_characters(df)
    
    ## Proper filename for current savefile number
    name, fileExt = saveFile.split('.')
    currentSaveFile = name + str(saveFileCounter).zfill(3) + '.' + fileExt
    write_to_excel(df,savePath,currentSaveFile)

 21%|███████████████████▏                                                                       | 750/3554 [14:13<1:21:43,  1.75s/it]

Current data size of 75095 x Rows has exceeded requested rows per output file length of 75000.
Writing current data to file before proceeding!


 21%|███████████████████▏                                                                       | 751/3554 [14:36<6:15:35,  8.04s/it]

./Data/SteamAPI_NMS_reviews001.xlsx


 42%|█████████████████████████████████████▉                                                    | 1500/3554 [28:02<1:01:11,  1.79s/it]

Current data size of 75000 x Rows has exceeded requested rows per output file length of 75000.
Writing current data to file before proceeding!


 42%|██████████████████████████████████████                                                    | 1501/3554 [28:23<4:25:31,  7.76s/it]

./Data/SteamAPI_NMS_reviews002.xlsx


 63%|██████████████████████████████████████████████████████████▏                                 | 2250/3554 [43:45<55:27,  2.55s/it]

Current data size of 75000 x Rows has exceeded requested rows per output file length of 75000.
Writing current data to file before proceeding!


 63%|█████████████████████████████████████████████████████████                                 | 2251/3554 [44:07<3:03:32,  8.45s/it]

./Data/SteamAPI_NMS_reviews003.xlsx


 66%|████████████████████████████████████████████████████████████▋                               | 2342/3554 [45:13<13:29,  1.50it/s]

AppID 275850 review failed, returning null data on page cursor=AoJ4jqKgr/ECfL6x/wE= on attempt 1 of 3. Will attempt next retry of request.


 66%|████████████████████████████████████████████████████████████▋                               | 2346/3554 [45:16<14:05,  1.43it/s]

AppID 275850 review failed, returning null data on page cursor=AoJwp7KarPECfoqL/wE= on attempt 1 of 3. Will attempt next retry of request.


 69%|███████████████████████████████████████████████████████████████▉                            | 2468/3554 [46:55<16:31,  1.10it/s]

AppID 275850 review failed, returning null data on page cursor=AoJ4joGD7+4Cfq7E4gE= on attempt 1 of 3. Will attempt next retry of request.


 84%|███████████████████████████████████████████████████████████████████████████▉              | 3001/3554 [1:01:05<24:13,  2.63s/it]

Current data size of 75099 x Rows has exceeded requested rows per output file length of 75000.
Writing current data to file before proceeding!


 84%|██████████████████████████████████████████████████████████████████████████▎             | 3002/3554 [1:01:33<1:34:35, 10.28s/it]

./Data/SteamAPI_NMS_reviews004.xlsx


 89%|███████████████████████████████████████████████████████████████████████████████▊          | 3154/3554 [1:03:20<04:30,  1.48it/s]

AppID 275850 review failed, returning null data on page cursor=AoJ4m+DvrdcCc6fmXg== on attempt 1 of 3. Will attempt next retry of request.


 99%|█████████████████████████████████████████████████████████████████████████████████████████▎| 3528/3554 [1:11:22<00:41,  1.58s/it]

AppID 275850 review failed, returning null data on page cursor=AoJwgqKYwNYCcfPmXg== on attempt 1 of 3. Will attempt next retry of request.


100%|██████████████████████████████████████████████████████████████████████████████████████████| 3554/3554 [1:12:04<00:00,  1.22s/it]


./Data/SteamAPI_NMS_reviews005.xlsx
