In [1]:
import requests # import the requests library to request info from the api
import pandas as pd # import the pandas library to get data analysis and manipulation tools
import time

def get_reviews(appid, params={'json':1}): # define the function get_reviews. arguments are the app id of the game, and params allows us to input the parameters of the information we want
        url = f'https://store.steampowered.com/appreviews/{appid}' # define variable url which holds the information we want to scrape. this is the review page of Lies of P
        response = requests.get(url=url, params=params, headers={'User-Agent': 'Mozilla/5.0'}) # define variable response. we are getting the info from the url using the requests library and storing it in this variable
        return response.json() # return the response variable in json format

# ultimately, this function gets us the reviews of the game we input returned to us in json format.

In [2]:
def get_n_reviews(appid, n): # define function to get a certain n of reviews. arguments are the game's app id and number of reviews we want (default here is 100)
    reviews = [] # declare variable reviews which is an empty list. this will be used to store the reviews later on.
    cursor = '*' # * is like an initializer in pagination. you can think of this as "start here!"
    params = {
            'json' : 1,
            'filter' : 'all',
            'language' : 'english',
            'day_range' : 9223372036854775807,
            'review_type' : 'all',
            'purchase_type' : 'all'
            } # these are the parameters of the reviews we want to get

    while n > 0: # define a while loop. while n (number of reviews we want) is greater than 0, then the loop will keep on going. so essentially this loop will keep going until it eventually breaks when the condition down below is met
        params['cursor'] = cursor.encode() # remember that cursor we defined up there? we are adding it to the parameters. telling the loop to start at the beginning.
        params['num_per_page'] = min(100, n) # adding num_per_page to the parameters. this tells the number of reviews per page that the API should return. the num_per_page will always be set to 100 unless the remaining reviews in n is less than that
        n -= 100 # this decreases the number of reviews we want to retrieve by 100, since we were already able to get that 100.

        response = get_reviews(appid, params) # declare variable response which calls the get_reviews function and passes the appid and the NEW parameters as arguments
        cursor = response['cursor'] # the cursor is updated to the new cursor provided by the json we got from get_reviews. it's like telling the function to turn to the next page of reviews
        reviews += response['reviews'] # add the reviews in the json to the empty list of reviews above 

        if len(response['reviews']) < 100: break # if the number of reviews in the retrieved reviews from a json is less than 100 (meaning we have reached the last page) then stop the loop

    clean_reviews = []
    for review in reviews:
        clean_reviews.append({
        'recommendationid':review['recommendationid'],
        'review':review['review'],
        'voted_up':review['voted_up'],
        'weighted_vote_score':float(review['weighted_vote_score']),
        'playtime_forever':round(float(review['author']['playtime_forever']/60), 2),
        'playtime_at_review':round(float(review['author']['playtime_at_review']/60), 2),
        'timestamp_created':pd.to_datetime(review['timestamp_created'], unit='s'),
        'timestamp_updated':pd.to_datetime(review['timestamp_updated'], unit='s')
    }) 
        
    return clean_reviews # gives us our list of reviews!


------

In [3]:
# lies of p

lop_id = 1627720
lop_eng_reviews = 13901 # total number of english reviews as of Jan 27, 2024

lies_of_p = get_n_reviews(lop_id, lop_eng_reviews)

In [4]:
lop_df = pd.DataFrame(lies_of_p)

In [5]:
print(lop_df.shape)
lop_df.head(5)

(13901, 8)


Unnamed: 0,recommendationid,review,voted_up,weighted_vote_score,playtime_forever,playtime_at_review,timestamp_created,timestamp_updated
0,149149878,the closest we're getting for a bloodborne gam...,True,0.961972,36.7,19.4,2023-10-29 19:22:32,2023-10-29 19:22:32
1,148293801,Experience the horror of being french,True,0.955329,23.25,23.25,2023-10-15 17:53:36,2023-10-15 17:53:36
2,154087316,all right then. keep your Bloodborne Sony.,True,0.953623,70.57,64.63,2023-12-21 21:39:15,2023-12-21 21:39:15
3,148236957,They really went fine i'll make Bloodborne on ...,True,0.950368,41.45,41.45,2023-10-14 21:39:41,2023-10-14 21:39:41
4,147355339,From the alternate reality where Miyazaki rea...,True,0.940474,76.17,54.02,2023-09-30 13:16:07,2023-09-30 13:16:07


In [18]:
file_name = 'lop_df.csv'

lop_df.to_csv(f'{file_name}', index = False)
output_filepath = f'/Users/javier/Desktop/steam-review-data/{file_name}'
lop_df.to_csv(output_filepath, index = False)

------

In [6]:
# wo long

wo_long_id = 1448440
wo_long_eng_reviews = 3859 # total number of english reviews as of Jan 27, 2024

wo_long = get_n_reviews(wo_long_id, wo_long_eng_reviews)

In [7]:
wo_long_df = pd.DataFrame(wo_long)

In [8]:
print(wo_long_df.shape)
wo_long_df.head(5)

(3859, 8)


Unnamed: 0,recommendationid,review,voted_up,weighted_vote_score,playtime_forever,playtime_at_review,timestamp_created,timestamp_updated
0,134018909,Saw all the reviews about performance and said...,False,0.928326,2.53,2.53,2023-03-04 03:58:51,2023-03-04 03:58:51
1,153417645,I.... really don't know how to review this. On...,False,0.891513,1.5,0.5,2023-12-12 03:01:00,2023-12-12 03:01:00
2,133962852,A good game buried deep beneath the biggest ca...,False,0.88831,85.58,3.27,2023-03-03 08:32:07,2023-03-03 15:48:12
3,153249994,Performance was never fixed,False,0.88687,25.02,25.02,2023-12-09 22:58:33,2023-12-09 22:58:33
4,134236783,After having completely finished this game and...,False,0.884483,44.73,40.95,2023-03-07 12:27:19,2023-03-10 20:01:43


In [19]:
file_name = 'wo_long_df.csv'

wo_long_df.to_csv(f'{file_name}', index = False)
output_filepath = f'/Users/javier/Desktop/steam-review-data/{file_name}'
wo_long_df.to_csv(output_filepath, index = False)

-----

In [9]:
# star wars: jedi survivor

star_wars_id = 1774580
star_wars_eng_reviews = 32379 # total number of eng reviews as of Jan 27, 2024

star_wars = get_n_reviews(star_wars_id, star_wars_eng_reviews)

In [10]:
star_wars_df = pd.DataFrame(star_wars)

In [11]:
print(star_wars_df.shape)
star_wars_df.head(5)

(32379, 8)


Unnamed: 0,recommendationid,review,voted_up,weighted_vote_score,playtime_forever,playtime_at_review,timestamp_created,timestamp_updated
0,137493977,I'm not your Beta Tester EA.,False,0.962316,0.37,0.37,2023-04-29 16:46:50,2023-04-29 16:46:50
1,137509326,Wait for performance fixes.,False,0.961892,6.12,4.8,2023-04-29 20:44:27,2023-04-29 20:44:27
2,150979796,"Don't get me wrong, The PC release of this gam...",True,0.94878,46.7,46.7,2023-11-22 12:20:48,2023-11-22 12:20:48
3,145957830,"When this game first launched, I was smart, I ...",False,0.946475,1.65,1.65,2023-09-09 04:54:51,2023-09-09 04:54:51
4,148551958,[h1][b]May the FPS be With You[/b][/h1]\n\n[i]...,False,0.942032,67.68,67.68,2023-10-20 17:06:39,2023-11-23 12:35:20


In [20]:
file_name = 'star_wars_df.csv'

star_wars_df.to_csv(f'{file_name}', index = False)
output_filepath = f'/Users/javier/Desktop/steam-review-data/{file_name}'
star_wars_df.to_csv(output_filepath, index = False)

-----

In [12]:
# remnant II

remnant_id = 1282100
remnant_eng_reviews = 27464 # total number of eng reviews as of Jan 27, 2024


remnant = get_n_reviews(remnant_id, remnant_eng_reviews)

In [13]:
remnant_df = pd.DataFrame(remnant)

In [14]:
print(remnant_df.shape)
remnant_df.head(5)

(27451, 8)


Unnamed: 0,recommendationid,review,voted_up,weighted_vote_score,playtime_forever,playtime_at_review,timestamp_created,timestamp_updated
0,143137396,Great game. Please add a North marker to the m...,True,0.959391,29.95,24.27,2023-07-31 10:40:10,2023-07-31 10:40:10
1,142838045,"Just like the previous entry in the franchise,...",True,0.927445,119.3,118.68,2023-07-26 14:58:57,2023-11-21 18:58:03
2,149330673,Remnant 1 but everything more and is one of th...,True,0.910919,192.15,165.05,2023-11-01 19:52:15,2023-11-09 03:48:54
3,143149536,"Honestly, I'm regretting not playing the first...",True,0.90857,52.67,36.35,2023-07-31 15:10:01,2023-07-31 15:10:01
4,144174130,"Like for everyone else in these reviews, I alw...",True,0.906624,47.88,34.17,2023-08-14 12:41:43,2023-08-14 12:41:43


In [21]:
file_name = 'remnant_df.csv'

remnant_df.to_csv(f'{file_name}', index = False)
output_filepath = f'/Users/javier/Desktop/steam-review-data/{file_name}'
remnant_df.to_csv(output_filepath, index = False)

---

In [23]:
# lords of the fallen

lords_id = 1501750
lords_eng_reviews = 14509 # total number of eng reviews as of Jan 27, 2024

lords = get_n_reviews(lords_id, lords_eng_reviews)

In [24]:
lords_df = pd.DataFrame(lords)

In [26]:
print(lords_df.shape)
lords_df.head(5)

(14509, 8)


Unnamed: 0,recommendationid,review,voted_up,weighted_vote_score,playtime_forever,playtime_at_review,timestamp_created,timestamp_updated
0,148136042,Having issues with performance? \n\nChange bot...,True,0.935533,35.53,0.62,2023-10-13 14:37:25,2023-10-13 14:37:25
1,148896243,"This is a 'Souls-Like' through and through, if...",True,0.928267,35.42,25.57,2023-10-26 08:01:26,2023-10-26 08:01:26
2,148457883,Giving this a positive review because it's gen...,True,0.903862,34.62,29.32,2023-10-18 21:41:53,2023-10-18 21:42:13
3,148328489,This game is honestly way better than I origin...,True,0.897246,66.13,17.85,2023-10-16 10:48:22,2023-10-16 10:48:22
4,148383765,This game has some of the most incredible vist...,True,0.888353,83.07,37.5,2023-10-17 12:44:23,2023-10-17 16:54:11


In [27]:
file_name = 'lords_df.csv'

lords_df.to_csv(f'{file_name}', index = False)
output_filepath = f'/Users/javier/Desktop/steam-review-data/{file_name}'
lords_df.to_csv(output_filepath, index = False)