In [16]:
from datetime import datetime, timedelta
import requests
import pickle
from pathlib import Path
import pandas as pd
import time

# Exploration

In [17]:
## copied straight from article
def get_user_reviews(review_appid, params):

    user_review_url = f'https://store.steampowered.com/appreviews/{review_appid}'
    req_user_review = requests.get(
        user_review_url,
        params=params
    )

    if req_user_review.status_code != 200:
        print(f'Fail to get response. Status code: {req_user_review.status_code}')
        return {"success": 2}

    try:
        user_reviews = req_user_review.json()
    except:
        return {"success": 2}

    return user_reviews

In [56]:
review_appid = 2767030
params = {
    'json':1,
    'language': 'english',
    'cursor': '*',                                  # set the cursor to retrieve reviews from a specific "page"
    'num_per_page': 20,
    'filter': 'all',
    'day_range': 365,
    'review_type': 'all'
}

reviews_response = get_user_reviews(review_appid, params)
reviews_response

{'success': 1,
 'query_summary': {'num_reviews': 0,
  'review_score': 0,
  'review_score_desc': 'No user reviews',
  'total_positive': 0,
  'total_negative': 0,
  'total_reviews': 0},
 'reviews': [],
 'cursor': '*'}

In [40]:
reviews_response.keys()

dict_keys(['success', 'query_summary', 'reviews', 'cursor'])

# Write Reviews for One Game

In [22]:
## adapted from article
def get_reviews_for_gameID(appid, review_count_cutoff=1000, datetime_cutoff=datetime(2024, 1, 1, 0, 0, 0)):
    
    # the params of the API
    params = {
        'json':1,
        'language': 'english',
        'cursor': '*',                                  # set the cursor to retrieve reviews from a specific "page"
        'num_per_page': 100,
        'filter': 'recent'
    }
    
    # time_interval = timedelta(hours=24)                         # the time interval to get the reviews
    # end_time = datetime.fromtimestamp(1716718910)               # the timestamp in the return result are unix timestamp (GMT+0)
    end_time = datetime.now()
    # start_time = end_time - time_interval
    start_time = datetime_cutoff
    
    print(f"Start time: {start_time}")
    print(f"End time: {end_time}")
    print(start_time.timestamp(), end_time.timestamp())
    
    passed_start_time = False
    passed_end_time = False
    
    selected_reviews = []

    i = 0
    ## Run this loop until we get a certain number of reviews, or until timestamps are outside of our time bounds
    while (len(selected_reviews) < review_count_cutoff) and (not passed_start_time or not passed_end_time) and (i <= 20):
        i += 1
        reviews_response = get_user_reviews(appid, params)
    
        # not success?
        if reviews_response["success"] != 1:
            print("Not a success")
            print(reviews_response)
    
        if reviews_response["query_summary"]['num_reviews'] == 0:
            print("No reviews.")
            print(reviews_response)
    
        for review in reviews_response["reviews"]:
            recommendation_id = review['recommendationid']
    
            timestamp_created = review['timestamp_created']
            timestamp_updated = review['timestamp_updated']
    
            # skip the comments that beyond end_time
            if not passed_end_time:
                if timestamp_created > end_time.timestamp():
                    continue
                else:
                    passed_end_time = True
    
            # exit the loop once detected a comment that before start_time
            if not passed_start_time:
                if timestamp_created < start_time.timestamp():
                    passed_start_time = True
                    break
    
            # extract the useful (to me) data
            ## these attributes are taken right from the article
            ## we could see what other useful info there is
            
            author_steamid = review['author']['steamid']        # will automatically redirect to the profileURL if any
            playtime_forever = review['author']['playtime_forever']
            playtime_last_two_weeks = review['author']['playtime_last_two_weeks']
            playtime_at_review_minutes = review['author']['playtime_at_review']
            last_played = review['author']['last_played']
    
            review_text = review['review']
            voted_up = review['voted_up']
            votes_up = review['votes_up']
            votes_funny = review['votes_funny']
            weighted_vote_score = review['weighted_vote_score']
            steam_purchase = review['steam_purchase']
            received_for_free = review['received_for_free']
            written_during_early_access = review['written_during_early_access']
    
            my_review_dict = {
                'recommendationid': recommendation_id,
                'author_steamid': author_steamid,
                'playtime_at_review_minutes': playtime_at_review_minutes,
                'playtime_forever_minutes': playtime_forever,
                'playtime_last_two_weeks_minutes': playtime_last_two_weeks,
                'last_played': last_played,
    
                'review_text': review_text,
                'timestamp_created': timestamp_created,
                'timestamp_updated': timestamp_updated,
    
                'voted_up': voted_up,
                'votes_up': votes_up,
                'votes_funny': votes_funny,
                'weighted_vote_score': weighted_vote_score,
                'steam_purchase': steam_purchase,
                'received_for_free': received_for_free,
                'written_during_early_access': written_during_early_access,
            }

            ## store results as list of dicts, one dict per review
            selected_reviews.append(my_review_dict)
    
        # go to next page
        try:
            cursor = reviews_response['cursor']         # cursor field does not exist in the last page
        except Exception as e:
            cursor = ''
    
        # no next page
        # exit the loop
        if not cursor:
            print("Reached the end of all comments.")
            break
    
        # set the cursor object to move to next page to continue
        params['cursor'] = cursor
        print('To next page. Next page cursor:', cursor)
        time.sleep(1)
    return selected_reviews

In [39]:
# test get_reviews function
reviews = get_reviews_for_gameID(2767030)
len(reviews)

Start time: 2024-01-01 00:00:00
End time: 2025-12-05 20:59:36.063780
1704096000.0 1764997176.06378
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user reviews', 'total_positive': 0, 'total_negative': 0, 'total_reviews': 0}, 'reviews': [], 'cursor': '*'}
To next page. Next page cursor: *
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user reviews', 'total_positive': 0, 'total_negative': 0, 'total_reviews': 0}, 'reviews': [], 'cursor': '*'}
To next page. Next page cursor: *
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user reviews', 'total_positive': 0, 'total_negative': 0, 'total_reviews': 0}, 'reviews': [], 'cursor': '*'}
To next page. Next page cursor: *
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user reviews', 'total_positive': 0, 'total_nega

0

In [34]:
## my own doing
def review_to_csv(appname, appid, review_count_cutoff=1000, datetime_cutoff=datetime(2024, 1, 1, 0, 0, 0), save_to_csv=True):
    reviews = get_reviews_for_gameID(appid, review_count_cutoff, datetime_cutoff)
    review_df = pd.DataFrame(reviews)
    review_df["Appname"] = appname ## add columne for game name
    review_df = review_df.set_index("recommendationid") ## set rec ID to be the index
    ## should confirm what happens to this index when we stack multiple games

    ## If save to csv is on, write results to csv in its own folder
    if save_to_csv:
        appname_cleaned = appname.replace(" ", "_")
        foldername = f"data/{appid}_{appname_cleaned}"
        filename = f"{appid}_{appname_cleaned}_reviews.csv"
        output_path = Path(
            foldername, filename
        )
        if not output_path.parent.exists():
            output_path.parent.mkdir(parents=True)

        print(f"Writing results for {appname} to {output_path}")
        review_df.to_csv(output_path)

    ## whether or not we write results out, still return the DF object
    return review_df
    

In [27]:
## test the function
review_df = review_to_csv("Elden Ring", 1245620)
review_df

Start time: 2024-01-01 00:00:00
End time: 2025-12-05 20:37:41.774719
1704096000.0 1764995861.774719
To next page. Next page cursor: AoJ4o66e9ZoDde/sqgY=
To next page. Next page cursor: AoJwgMr375oDcLKmqgY=
To next page. Next page cursor: AoJ4xrTE65oDcJ+5qQY=
To next page. Next page cursor: AoJ40c2B55oDdLqJpwY=
To next page. Next page cursor: AoJ41tjq45oDduyCpgY=
To next page. Next page cursor: AoJw1/Pj4JoDccPipAY=
To next page. Next page cursor: AoJwyNT+25oDfO7poQY=
To next page. Next page cursor: AoJwn4id15oDc5DYoAY=
To next page. Next page cursor: AoJ4wpen05oDfIWYoAY=
To next page. Next page cursor: AoJwp6XrzJoDd6rEnwY=
To next page. Next page cursor: AoJ498btx5oDeoD7ngY=
Writing results for Elden Ring to data/1245620_Elden_Ring/1245620_Elden_Ring_reviews.csv


Unnamed: 0_level_0,author_steamid,playtime_at_review_minutes,playtime_forever_minutes,playtime_last_two_weeks_minutes,last_played,review_text,timestamp_created,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,steam_purchase,received_for_free,written_during_early_access,Appname
recommendationid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
212664333,76561199239412470,11395,11395,0,1756610689,Was not worth my time. Absolute pain. Youtube ...,1764995078,1764995078,True,0,0,0.5,True,False,False,Elden Ring
212663127,76561198065373945,2471,2502,506,1764995363,banger,1764993502,1764993502,True,0,0,0.5,True,False,False,Elden Ring
212662189,76561199438633574,8165,8165,647,1764901036,pot guy makes it 10\10,1764992263,1764992263,True,0,0,0.5,True,False,False,Elden Ring
212661834,76561199012043687,5761,5761,1080,1764990383,"Bleed good, me like, curved swords fun.",1764991762,1764991762,True,0,0,0.5,True,False,False,Elden Ring
212660308,76561198152769897,10020,10063,43,1764992811,Yeah,1764989755,1764989755,True,0,0,0.5,True,False,False,Elden Ring
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209444577,76561198153870559,19394,19400,0,1763507699,Really weird and confounding adventure you nee...,1763350033,1763350033,True,0,0,0.5,True,False,False,Elden Ring
209444574,76561198120165046,14757,14757,0,1763349760,Could you wait a little bit? I just have to do...,1763350029,1763350048,True,0,0,0.5,True,False,False,Elden Ring
209444241,76561199380776321,508,2310,1236,1764644207,its really great and fun especially if u use a...,1763349568,1763349568,True,0,0,0.5,True,False,False,Elden Ring
209443129,76561197961101610,11898,11898,0,1720256774,Overall: 9/10 - Worth Completing Repeatedly\r\...,1763348101,1763348101,True,0,0,0.5,True,False,False,Elden Ring


In [29]:
## Read csv back in and confirm it looks okay
appname = "Elden_Ring"
appid = 1245620


review_path = f"data/{appid}_{appname}/{appid}_{appname}_reviews.csv"

review_df_csv = pd.read_csv(review_path, index_col=0)
review_df_csv

Unnamed: 0_level_0,author_steamid,playtime_at_review_minutes,playtime_forever_minutes,playtime_last_two_weeks_minutes,last_played,review_text,timestamp_created,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,steam_purchase,received_for_free,written_during_early_access,Appname
recommendationid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
212664333,76561199239412470,11395,11395,0,1756610689,Was not worth my time. Absolute pain. Youtube ...,1764995078,1764995078,True,0,0,0.500000,True,False,False,Elden Ring
212663127,76561198065373945,2471,2502,506,1764995363,banger,1764993502,1764993502,True,0,0,0.500000,True,False,False,Elden Ring
212662189,76561199438633574,8165,8165,647,1764901036,pot guy makes it 10\10,1764992263,1764992263,True,0,0,0.500000,True,False,False,Elden Ring
212661834,76561199012043687,5761,5761,1080,1764990383,"Bleed good, me like, curved swords fun.",1764991762,1764991762,True,0,0,0.500000,True,False,False,Elden Ring
212660308,76561198152769897,10020,10063,43,1764992811,Yeah,1764989755,1764989755,True,0,0,0.500000,True,False,False,Elden Ring
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209444577,76561198153870559,19394,19400,0,1763507699,Really weird and confounding adventure you nee...,1763350033,1763350033,True,0,0,0.500000,True,False,False,Elden Ring
209444574,76561198120165046,14757,14757,0,1763349760,Could you wait a little bit? I just have to do...,1763350029,1763350048,True,0,0,0.500000,True,False,False,Elden Ring
209444241,76561199380776321,508,2310,1236,1764644207,its really great and fun especially if u use a...,1763349568,1763349568,True,0,0,0.500000,True,False,False,Elden Ring
209443129,76561197961101610,11898,11898,0,1720256774,Overall: 9/10 - Worth Completing Repeatedly\r\...,1763348101,1763348101,True,0,0,0.500000,True,False,False,Elden Ring


# Scrape Reviews for all games in sample

For next time, or one of you guys to pick up... we need to determine our sample of games somehow

In [30]:
app_df = pd.read_csv("data/app_ids.csv")
app_df.head(50)

Unnamed: 0,App ID,App Title
0,1675200,Steam Deck
1,1808500,ARC Raiders
2,2767030,Marvel Rivals
3,730,Counter-Strike 2
4,1142710,Total War: WARHAMMER III
5,1085660,Destiny 2
6,2622380,ELDEN RING NIGHTREIGN
7,3564740,Where Winds Meet
8,2807960,Battlefield™ 6
9,3531720,ELDEN RING NIGHTREIGN The Forsaken Hollows


In [31]:
id_list = app_df["App ID"].to_list()
print(id_list[0:10])

title_list = app_df["App Title"].to_list()
print(title_list[0:10])

[1675200, 1808500, 2767030, 730, 1142710, 1085660, 2622380, 3564740, 2807960, 3531720]
['Steam Deck', 'ARC Raiders', 'Marvel Rivals', 'Counter-Strike 2', 'Total War: WARHAMMER III', 'Destiny 2', 'ELDEN RING NIGHTREIGN', 'Where Winds Meet', 'Battlefield™ 6', 'ELDEN RING NIGHTREIGN The Forsaken Hollows']


In [49]:
def concat_reviews(id_list, title_list, output_path, review_count_cutoff=300):
    assert len(id_list) == len(title_list)
    num_failures = 0
    for i in range(len(id_list)):
        appname = title_list[i]
        appid = id_list[i]

        print(f"Gathering reviews for {appname}\n\n")
        try:
            review_df = review_to_csv(appname, appid, review_count_cutoff=review_count_cutoff, datetime_cutoff=datetime(2024, 1, 1, 0, 0, 0), save_to_csv=False)
            review_df.to_csv(output_path, mode='a')
        except:
            print(f"Couldn't gather reviews for {appname}")
            num_failures += 1
    print("Total games that failed:", num_failures)

In [50]:

test_id_list = id_list[0:50]
test_title_list = title_list[0:50]
output_path = "data/test_raw_reviews.csv"

concat_reviews(test_id_list, test_title_list, output_path)

Gathering reviews for Steam Deck


Start time: 2024-01-01 00:00:00
End time: 2025-12-05 21:14:42.396886
1704096000.0 1764998082.396886
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user reviews', 'total_positive': 0, 'total_negative': 0, 'total_reviews': 0}, 'reviews': [], 'cursor': '*'}
To next page. Next page cursor: *
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user reviews', 'total_positive': 0, 'total_negative': 0, 'total_reviews': 0}, 'reviews': [], 'cursor': '*'}
To next page. Next page cursor: *
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user reviews', 'total_positive': 0, 'total_negative': 0, 'total_reviews': 0}, 'reviews': [], 'cursor': '*'}
To next page. Next page cursor: *
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user review

In [51]:
review_df = pd.read_csv("data/test_raw_reviews.csv")
review_df.head(50)

Unnamed: 0,recommendationid,author_steamid,playtime_at_review_minutes,playtime_forever_minutes,playtime_last_two_weeks_minutes,last_played,review_text,timestamp_created,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,steam_purchase,received_for_free,written_during_early_access,Appname
0,212664845,76561197979706900,3752,3752,3038,1764995809,Addictive. Stressful. Time waster.,1764995842,1764995842,True,0,0,0.5,True,False,False,ARC Raiders
1,212664820,76561198968397284,8612,8612,1183,1764995767,fuak arc\r\n,1764995808,1764995808,True,0,0,0.5,True,False,False,ARC Raiders
2,212664759,76561198839360554,8168,8168,1250,1764988288,W,1764995728,1764995728,True,0,0,0.5,True,False,False,ARC Raiders
3,212664729,76561198290379303,200,200,200,1764992791,awesome game!,1764995683,1764995683,True,0,0,0.5,True,False,False,ARC Raiders
4,212664705,76561197963884718,3420,3420,2724,1764993456,If the Steam comments section is like every ot...,1764995649,1764995649,False,0,0,0.5,True,False,False,ARC Raiders
5,212664692,76561197997691391,6367,6367,5900,1764995308,I like the gathering and sneaking around the A...,1764995625,1764995625,False,0,0,0.5,True,False,False,ARC Raiders
6,212664560,76561198072747481,3946,3946,1828,1764995392,"Very well made game, every time I hop on I exp...",1764995439,1764995439,True,0,0,0.5,True,False,False,ARC Raiders
7,212664471,76561197994426373,573,573,573,1764995176,I thought this would be too sweaty for me. Hon...,1764995309,1764995309,True,0,0,0.5,True,False,False,ARC Raiders
8,212664440,76561199812111269,1392,1392,1392,1764994364,vERY FUN AND happy to play,1764995262,1764995262,True,0,0,0.5,True,False,False,ARC Raiders
9,212664396,76561198014425024,1981,1981,376,1764995621,"Incredible graphics, fun emergent gameplay and...",1764995199,1764995199,True,0,0,0.5,True,False,False,ARC Raiders


In [57]:
# id_list = id_list
# title_list = title_list[0:50]
output_path = "data/raw_reviews.csv"

concat_reviews(id_list, title_list, output_path)

Gathering reviews for Steam Deck


Start time: 2024-01-01 00:00:00
End time: 2025-12-05 22:00:53.192015
1704096000.0 1765000853.192015
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user reviews', 'total_positive': 0, 'total_negative': 0, 'total_reviews': 0}, 'reviews': [], 'cursor': '*'}
To next page. Next page cursor: *
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user reviews', 'total_positive': 0, 'total_negative': 0, 'total_reviews': 0}, 'reviews': [], 'cursor': '*'}
To next page. Next page cursor: *
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user reviews', 'total_positive': 0, 'total_negative': 0, 'total_reviews': 0}, 'reviews': [], 'cursor': '*'}
To next page. Next page cursor: *
No reviews.
{'success': 1, 'query_summary': {'num_reviews': 0, 'review_score': 0, 'review_score_desc': 'No user review