# Steam Game Review Scraper

In [1]:
import requests as re
import pandas as pd
import urllib.parse

## Requesting reviews from Steam:

For this project, we are going to collect the following information:
- Steam ID
- Review Text
- Date Posted
- If review was made during early access

In [2]:
init_request = re.get('https://store.steampowered.com/appreviews/236430?json=1?filter=all&language=english').json()

In [3]:
init_request['success']

1

In [4]:
init_request['query_summary']

{'num_reviews': 20,
 'review_score': 8,
 'review_score_desc': 'Very Positive',
 'total_positive': 12340,
 'total_negative': 2213,
 'total_reviews': 14553}

- Using cursors to get more batches of reviews:

In [5]:
cursor = init_request['cursor']
cursor 

'AoIFQFbAAAAAAAB17KbXBQ=='

In [6]:
request = re.get(f'https://store.steampowered.com/appreviews/236430?json=1?filter=all&language=english&num_per_page=100&cursor={cursor}').json()
cursor = request['cursor']
cursor

'AoIFQGKPcsAAAAB0y8LUBQ=='

In [7]:
request = re.get(f'https://store.steampowered.com/appreviews/236430?json=1?filter=all&language=english&num_per_page=100&cursor={cursor}').json()
cursor = request['cursor']
cursor

'AoIFQFzAAAAAAAB2mdbUBQ=='

- Convert to dataframe:

In [8]:
# Initial request:
init_response = re.get(f'https://store.steampowered.com/appreviews/236430?json=1&filter=recent&language=all&day_range=365&cursor=*&review_type=all&purchase_type=all&num_per_page=100').json()

# Used to get further batches of reviews:
cursor = urllib.parse.quote_plus(init_response['cursor'])

# Creating the actual DataFrame:
init_reviews = init_response['reviews']

columns = ['recommendationid', 'review', 'language', 'init_date', 'update_date', 'in_early_access', 'voted_up']
reviews_data = {
    'recommendationid' : [review['recommendationid'] for review in init_reviews],
    'review': [review['review'] for review in init_reviews],
    'language' : [review['language'] for review in init_reviews],
    'init_date': [review['timestamp_created'] for review in init_reviews],
    'update_date': [review['timestamp_updated'] for review in init_reviews],
    'in_early_access': [review['written_during_early_access'] for review in init_reviews],
    'voted_up' : [review['voted_up'] for review in init_reviews]
}
reviews = pd.DataFrame(reviews_data, columns=columns)

# Keeping track if a request was successful:
success = init_request['success']
while success == 1:
    request = re.get(f'https://store.steampowered.com/appreviews/236430?json=1&filter=recent&language=all&day_range=365&cursor={cursor}&review_type=all&purchase_type=all&num_per_page=100').json()
    
    # About the 'filter' parameter:
    # If paging through the reviews with cursor 
    # then choose either the recent option or the updated option to eventually receive an empty response list
    response_list = request['query_summary']
    if response_list['num_reviews'] == 0:
        break

    more_reviews = request['reviews']
    more_data = {
        'recommendationid' : [review['recommendationid'] for review in more_reviews],
        'review': [review['review'] for review in more_reviews],
        'language' : [review['language'] for review in more_reviews],
        'init_date': [review['timestamp_created'] for review in more_reviews],
        'update_date': [review['timestamp_updated'] for review in more_reviews],
        'in_early_access': [review['written_during_early_access'] for review in more_reviews],
        'voted_up' : [review['voted_up'] for review in more_reviews]
    }
    more = pd.DataFrame(more_data, columns=columns)
    reviews = pd.concat([reviews, more], ignore_index=True)

    success = request['success']
    cursor = urllib.parse.quote_plus(request['cursor'])

reviews

Unnamed: 0,recommendationid,review,language,init_date,update_date,in_early_access,voted_up
0,192239535,This game is bad. The only good thing is the m...,english,1744051869,1744051869,False,False
1,192187029,I regret nothing.,english,1743989964,1743989964,False,True
2,192166329,Juegardo.,spanish,1743968248,1743968248,False,True
3,192099068,''All said opinions are my own based on my own...,czech,1743906311,1743906311,False,True
4,192078880,Just cause it has a 2 in the Name doesn't mean...,english,1743885562,1743885562,False,True
...,...,...,...,...,...,...,...
44026,9919693,OH YEAH ITS A+++\n\nThey did a much better job...,english,1398354249,1398365837,False,True
44027,9919677,追記\nフロムにメール送ってきいてみました。\n\nコピーライトと再起動後に日本語が当たるか...,japanese,1398354117,1398485850,False,True
44028,9919652,good :),japanese,1398353963,1398353963,False,True
44029,9919612,Japanese Language dokooooooooooooooooooooo,japanese,1398353706,1398353706,False,True


## Saving the reviews into a csv file:

In [9]:
reviews.to_csv('ds2-reviews.csv')