# Steam Review Scraper

A scraper that scrape reviews within a fixed time interval

Using API: https://partner.steamgames.com/doc/store/getreviews

## Scrape Reviews

In [1]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [13]:
from datetime import datetime, timedelta
import requests
import pickle
from pathlib import Path

In [14]:
import requests
import json

def get_user_reviews(review_appid, params):
    """
    Fetch user reviews from the Steam store API.

    Args:
        review_appid (str or int): The Steam app ID.
        params (dict): Parameters for filtering the reviews.

    Returns:
        dict: A dictionary containing user reviews if successful, or {"success": 2} if an error occurs.
    """
    user_review_url = f'https://store.steampowered.com/appreviews/{review_appid}'
    
    try:
        # Send the GET request with a timeout of 5 seconds
        req_user_review = requests.get(user_review_url, params=params, timeout=5)
        req_user_review.raise_for_status()  # Raise an error for HTTP errors (4xx, 5xx)
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return {"success": 2}
    
    try:
        user_reviews = req_user_review.json()
    except json.JSONDecodeError:
        print("Error: Unable to parse JSON response")
        return {"success": 2}
    
    # Validate API response
    if "success" not in user_reviews or user_reviews.get("success") != 1:
        print("Invalid API response format")
        return {"success": 2}

    return user_reviews


In [None]:
def has_min_reviews(app_id,total_reviews, min_reviews=4):
     # Check the total number of reviews
    if total_reviews >= min_reviews:
        print(f"Added app ID {app_id}: {total_reviews} reviews found.")
        return True
    else:
        print(f"Skipped app ID {app_id}: {total_reviews} reviews found.")
        return False

In [None]:

# REad app_ids.txt and store them in an array:
app_ids = []
with open('app_ids.txt', 'r') as f:
    for line in f:
        app_ids.append(line.strip())
    # close the file
    f.close()



# the params of the API
params = {
    'json':1,
    'language': 'turkish',
    'cursor': '*',                                  # set the cursor to retrieve reviews from a specific "page"
    'num_per_page': 100,
    'filter': 'recent'
}

# the time interval to get the reviews
time_interval = timedelta(hours=24)

# the timestamp in the return result are unix timestamp (GMT+0)
end_time = datetime.now()

# start_time = end_time - time_interval
start_time = datetime(2024, 1, 1, 0, 0, 0)

print(f"Start time: {start_time}")     # printing local timezone for logging
print(f"End time: {end_time}")
print(start_time.timestamp(), end_time.timestamp())

passed_start_time = False
passed_end_time = False

filtered_app_ids = []

selected_reviews = []

# Save filtered app IDs to a file
output_path = Path("app_ids_with_reviews.txt")

# loop to get all reviews for each app_id
 
for app_id in app_ids:
    print(f"Getting reviews for app_id: {app_id}")
    while (not passed_start_time or not passed_end_time):

        reviews_response = get_user_reviews(app_id, params)

        # not success?
        if reviews_response["success"] != 1:
            print("Not a success")
            print(reviews_response)
            break

        # If the given app_id has no reviews, skip to the next app_id
        
        if reviews_response["query_summary"]['num_reviews'] == 0:
            print("No reviews.")
            print(reviews_response)
            break

        # Check the total number of reviews
        total_reviews =  reviews_response.get("query_summary", {}).get("num_reviews", 0)
        if has_min_reviews(app_id, total_reviews, min_reviews=4):
            filtered_app_ids.append(app_id)
            
            for review in reviews_response["reviews"]:
                recommendation_id = review['recommendationid']
                
                timestamp_created = review['timestamp_created']
                timestamp_updated = review['timestamp_updated']

                # skip the comments that beyond end_time
                if not passed_end_time:
                    if timestamp_created > end_time.timestamp():
                        continue
                    else:
                        passed_end_time = True
                        
                # exit the loop once detected a comment that before start_time
                if not passed_start_time:
                    if timestamp_created < start_time.timestamp():
                        passed_start_time = True
                        break

                # extract the useful (to me) data
                # Add APP ID to the review 
                review['app_id'] = app_id
                author_steamid = review['author']['steamid']        # will automatically redirect to the profileURL if any
                playtime_forever = review['author']['playtime_forever']
                playtime_last_two_weeks = review['author']['playtime_last_two_weeks']
                playtime_at_review_minutes = review['author']['playtime_at_review']
                last_played = review['author']['last_played']

                review_text = review['review']
                voted_up = review['voted_up']
                votes_up = review['votes_up']
                votes_funny = review['votes_funny']
                weighted_vote_score = review['weighted_vote_score']
                steam_purchase = review['steam_purchase']
                received_for_free = review['received_for_free']
                written_during_early_access = review['written_during_early_access']
                

                my_review_dict = {
                    'app_id': app_id,
                    'recommendationid': recommendation_id,
                    'author_steamid': author_steamid,
                    'playtime_at_review_minutes': playtime_at_review_minutes,
                    'playtime_forever_minutes': playtime_forever,
                    'playtime_last_two_weeks_minutes': playtime_last_two_weeks,
                    'last_played': last_played,

                    'review_text': review_text,
                    'timestamp_created': timestamp_created,
                    'timestamp_updated': timestamp_updated,

                    'voted_up': voted_up,
                    'votes_up': votes_up,
                    'votes_funny': votes_funny,
                    'weighted_vote_score': weighted_vote_score,
                    'steam_purchase': steam_purchase,
                    'received_for_free': received_for_free,
                    'written_during_early_access': written_during_early_access,
                }

                selected_reviews.append(my_review_dict)
                # Output filtered app ids
                with open(output_path, 'w') as f:
                    for app_id in filtered_app_ids:
                        f.write(f"{app_id}\n")

            # go to next page # 10 pages in total
            try:
                # cursor field does not exist in the last page
                cursor = reviews_response['cursor']   


            except Exception as e:
                cursor = ''

            # no next page
            # exit the loop
            if not cursor:
                print("Reached the end of all comments.")
                break
            
            # set the cursor object to move to next page to continue
            params['cursor'] = cursor
            print('To next page. Next page cursor:', cursor)

            # close the file
            f.close()

Start time: 2024-01-01 00:00:00
End time: 2025-03-27 23:27:48.662327
1704056400.0 1743107268.662327
Getting reviews for app_id: 1245620
Added app ID 1245620: 100 reviews found.
App IDs with at least 4 reviews saved to app_ids_with_reviews.txt
Added app ID 1245620: 100 reviews found.
App IDs with at least 4 reviews saved to app_ids_with_reviews.txt
Added app ID 1245620: 100 reviews found.
App IDs with at least 4 reviews saved to app_ids_with_reviews.txt
Added app ID 1245620: 100 reviews found.
App IDs with at least 4 reviews saved to app_ids_with_reviews.txt
Added app ID 1245620: 100 reviews found.
App IDs with at least 4 reviews saved to app_ids_with_reviews.txt
Added app ID 1245620: 100 reviews found.
App IDs with at least 4 reviews saved to app_ids_with_reviews.txt
Added app ID 1245620: 100 reviews found.
App IDs with at least 4 reviews saved to app_ids_with_reviews.txt
Added app ID 1245620: 100 reviews found.
App IDs with at least 4 reviews saved to app_ids_with_reviews.txt
Added ap

In [None]:
#selected_reviews[:10]

In [None]:
len(selected_reviews)

4510

In [None]:
# save the selected reviews to a file
def save_to_pkl(foldername, pkl_filename, selected_reviews):
    foldername = "reviews"
    pkl_filename= f"reviews_{start_time.strftime('%Y%m%d-%H%M%S')}_{end_time.strftime('%Y%m%d-%H%M%S')}.pkl"
    output_path = Path(
        foldername, pkl_filename
    )
    if not output_path.parent.exists():
        output_path.parent.mkdir(parents=True)

    pickle.dump(selected_reviews, open(output_path, 'wb'))

## Read a review pickle object

In [None]:
import pandas as pd
from datetime import datetime  # Import datetime module
from pathlib import Path  # Import Path for file handling
import pickle  # Import pickle for loading data


def save_to_csv(data, pklfilename):

    # Define the time range
    start_time = datetime(2024, 1, 1, 0, 0, 0)
    end_time = datetime.now()

    # Define folder and file paths
    foldername = f"reviews"
    output_path = Path(foldername, pklfilename)

    # Check if the file exists
    if not output_path.exists():
        print("File not found.")
        exit()

    # Load the reviews from the pickle file
    selected_reviews = pickle.load(open(output_path, 'rb'))

    # Convert the reviews to a DataFrame
    reviews_df = pd.DataFrame(selected_reviews)

    # Save the DataFrame to a CSV file
    csv_filename = f"reviews_{start_time.strftime('%Y%m%d-%H%M%S')}_{end_time.strftime('%Y%m%d-%H%M%S')}.csv"
    csv_output_path = Path(foldername, csv_filename)

    # Ensure the folder exists
    Path(foldername).mkdir(parents=True, exist_ok=True)

    # Save to CSV
    reviews_df.to_csv(csv_output_path, index=False)

    print(f"Reviews saved to {csv_output_path}")

Reviews saved to reviews\reviews_20240101-000000_20250327-012613.csv


In [None]:
# Save reviews to a CSV file
import csv

csv_filename = f"reviews_{start_time.strftime('%Y%m%d-%H%M%S')}_{end_time.strftime('%Y%m%d-%H%M%S')}.csv"
csv_output_path = Path(
    foldername, csv_filename
)

with open(csv_output_path, 'w', newline='', encoding='utf-8') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(selected_reviews[0].keys())
    for review in selected_reviews:
        csv_writer.writerow(review.values())
        

In [None]:
len(selected_reviews)

4510

In [None]:
selected_reviews[0]

{'app_id': '1245620',
 'recommendationid': '191251140',
 'author_steamid': '76561199168777335',
 'playtime_at_review_minutes': 721,
 'playtime_forever_minutes': 769,
 'playtime_last_two_weeks_minutes': 133,
 'last_played': 1743023933,
 'review_text': 'O EN BASTAKİ OROS?PU COCUGUNA VURMAYIN\r\nELİNDE GÜLLE DOVUO MK',
 'timestamp_created': 1743021111,
 'timestamp_updated': 1743021111,
 'voted_up': True,
 'votes_up': 0,
 'votes_funny': 0,
 'weighted_vote_score': 0.5,
 'steam_purchase': True,
 'received_for_free': False,
 'written_during_early_access': False}