# Steam Game Review Scraper

In [1]:
import requests as re
import pandas as pd
import urllib.parse

## Requesting reviews from Steam:

For this project, we are going to collect the following information:
- Steam ID
- Review Text
- Date Posted
- If review was made during early access

In [2]:
init_request = re.get('https://store.steampowered.com/appreviews/335300?json=1?filter=all&language=english').json()

In [3]:
init_request['success']

1

In [4]:
init_request['query_summary']

{'num_reviews': 20,
 'review_score': 8,
 'review_score_desc': 'Very Positive',
 'total_positive': 22012,
 'total_negative': 4476,
 'total_reviews': 26488}

- Using cursors to get more batches of reviews:

In [5]:
cursor = init_request['cursor']
cursor 

'AoIIPwaKQHmMguQE'

In [6]:
request = re.get(f'https://store.steampowered.com/appreviews/335300?json=1?filter=all&language=english&num_per_page=100&cursor={cursor}').json()
cursor = request['cursor']
cursor

'AoIIPwAAAHyKjuEE'

In [7]:
request = re.get(f'https://store.steampowered.com/appreviews/335300?json=1?filter=all&language=english&num_per_page=100&cursor={cursor}').json()
cursor = request['cursor']
cursor

'AoIIPwYYan3L6NsE'

- Convert to dataframe:

In [8]:
# Initial request:
init_response = re.get(f'https://store.steampowered.com/appreviews/335300?json=1&filter=recent&language=english&day_range=365&cursor=*&review_type=all&purchase_type=all&num_per_page=100').json()

# Used to get further batches of reviews:
cursor = urllib.parse.quote_plus(init_response['cursor'])

# Creating the actual DataFrame:
init_reviews = init_response['reviews']

columns = ['recommendationid', 'review', 'init_date', 'update_date', 'in_early_access', 'voted_up']
reviews_data = {
    'recommendationid' : [review['recommendationid'] for review in init_reviews],
    'review': [review['review'] for review in init_reviews],
    'init_date': [review['timestamp_created'] for review in init_reviews],
    'update_date': [review['timestamp_updated'] for review in init_reviews],
    'in_early_access': [review['written_during_early_access'] for review in init_reviews],
    'voted_up' : [review['voted_up'] for review in init_reviews]
}
reviews = pd.DataFrame(reviews_data, columns=columns)

# Keeping track if a request was successful:
success = init_request['success']
while success == 1:
    request = re.get(f'https://store.steampowered.com/appreviews/335300?json=1&filter=recent&language=english&day_range=365&cursor={cursor}&review_type=all&purchase_type=all&num_per_page=100').json()
    
    # About the 'filter' parameter:
    # If paging through the reviews with cursor 
    # then choose either the recent option or the updated option to eventually receive an empty response list
    response_list = request['query_summary']
    if response_list['num_reviews'] == 0:
        break

    more_reviews = request['reviews']
    more_data = {
        'recommendationid' : [review['recommendationid'] for review in more_reviews],
        'review': [review['review'] for review in more_reviews],
        'init_date': [review['timestamp_created'] for review in more_reviews],
        'update_date': [review['timestamp_updated'] for review in more_reviews],
        'in_early_access': [review['written_during_early_access'] for review in more_reviews],
        'voted_up' : [review['voted_up'] for review in more_reviews]
    }
    more = pd.DataFrame(more_data, columns=columns)
    reviews = pd.concat([reviews, more], ignore_index=True)

    # if len(reviews.duplicated(subset=['recommendationid'])) != 0:
    #     break

    success = request['success']
    cursor = urllib.parse.quote_plus(request['cursor'])

reviews

Unnamed: 0,recommendationid,review,init_date,update_date,in_early_access,voted_up
0,161822555,I know Dark Souls 2 gets a lot of hate but per...,1711822731,1711822731,False,False
1,161819114,If you're expecting it to be like the original...,1711819586,1711819586,False,True
2,161818428,shit.,1711819000,1711819000,False,False
3,161816145,,1711816954,1711816954,False,True
4,161816097,genuinely bad,1711816908,1711816908,False,False
...,...,...,...,...,...,...
39773,15162268,Try tongue but hole,1427932431,1428081346,False,True
39774,15162220,"So far so good, played it for 20mins so far wi...",1427932153,1427932153,False,True
39775,15162161,Still haven't died!\n\nBonedrinker Rufus 109 -...,1427931845,1427931845,False,True
39776,15162057,Needs more cow Bell\n\n10/10,1427931196,1427931196,False,True


## Saving the reviews into a csv file:

In [9]:
reviews.to_csv('reviews.csv')