In [10]:
"""
Steam Review Scrapper - War Thunder

Name: Muhammad Faris Irfan Bin Mohmad Natar, Faiq Arifbillah bin Syahrir
Student ID: IS01083410, IS01083886

Steam API endpoint:
    https://store.steampowered.com/appreviews/<appid>?json=1
"""

import csv
import time
import requests
from datetime import datetime, timezone

In [11]:
# Configuration

APP_ID       = 236390               # War Thunder's Steam App ID
MAX_PAGES    = 5                    # Number of pages (batches) to fetch
NUM_PER_PAGE = 20                   # Reviews per page (max 100 for Steam API)
OUTPUT_FILE  = "warthunder_reviews.csv"
DELAY        = 1.5                  # Delay (seconds) between requests

STEAM_REVIEW_URL = f"https://store.steampowered.com/appreviews/{APP_ID}"

In [12]:
# Builds the query-parameter dictionary for a Steam review API request

def build_params(cursor: str, num_per_page: int = NUM_PER_PAGE) -> dict:
    return {
        "json":          1,
        "language":      "english",
        "filter":        "recent",      # Sort by most recent
        "review_type":   "all",         # Include both positive and negative
        "purchase_type": "all",
        "num_per_page":  num_per_page,
        "cursor":        cursor,
    }

# Fetches a single page (batch) of reviews from the Steam API

def fetch_reviews_page(cursor: str) -> dict | None:
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/120.0.0.0 Safari/537.36"
        )
    }
    params = build_params(cursor)

    try:
        response = requests.get(
            STEAM_REVIEW_URL,
            params=params,
            headers=headers,
            timeout=10
        )
        response.raise_for_status()
        data = response.json()

        # Steam returns success=1 even when results are empty, so check explicitly
        if data.get("success") != 1:
            print(f"[WARNING] Steam API returned success={data.get('success')}")
            return None

        return data

    except requests.RequestException as e:
        print(f"[ERROR] Request failed: {e}")
        return None

# Transforms raw review dicts from the Steam API into clean records containing only the three fields

def parse_reviews(raw_reviews: list[dict]) -> list[dict]:
    cleaned = []

    for review in raw_reviews:
        # Reviewer name 
        # Nested under the 'author' key in Steam's response
        reviewer_name = review.get("author", {}).get("steam_id", "Unknown")
        # Steam doesn't expose display names in this endpoint; use the
        # persona name if available, otherwise fall back to the Steam ID
        reviewer_name = review.get("author", {}).get("steamid", reviewer_name)

        # Review date
        # `timestamp_created` is a Unix timestamp (seconds since epoch, UTC)
        timestamp = review.get("timestamp_created", 0)
        review_date = datetime.fromtimestamp(
            timestamp, tz=timezone.utc
        ).strftime("%Y-%m-%d")

        # Review content
        review_content = review.get("review", "").strip()

        cleaned.append({
            "reviewer_name":  reviewer_name,
            "review_date":    review_date,
            "review_content": review_content,
        })

    return cleaned

# Writes all collected review records to a CSV file.

def save_to_csv(reviews: list[dict], filename: str) -> None:
    if not reviews:
        print("[WARNING] No reviews to save.")
        return

    fieldnames = ["reviewer_name", "review_date", "review_content"]

    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(reviews)

    print(f"[INFO] Saved {len(reviews)} reviews → '{filename}'")

In [13]:
# Main
"""
    Drives the full scraping loop for War Thunder reviews.

    Iterates up to `max_pages` cursor pages, collecting and parsing reviews
    from each, then returns the combined dataset.

    Args:
        max_pages: Maximum number of API pages to fetch. (5 Pages)

    Returns:
        Flat list of all cleaned review dicts.
    """

def scrape_war_thunder_reviews(max_pages: int = MAX_PAGES) -> list[dict]:
    all_reviews = []
    cursor      = "*"       # Steam uses '*' as the starting cursor

    for page_num in range(1, max_pages + 1):
        print(f"[INFO] Fetching page {page_num}/{max_pages}  (cursor: {cursor[:30]}...)")

        data = fetch_reviews_page(cursor)
        if data is None:
            print(f"[WARNING] Stopping early – failed to fetch page {page_num}.")
            break

        raw = data.get("reviews", [])
        if not raw:
            print("[INFO] No more reviews returned. Stopping.")
            break

        page_reviews = parse_reviews(raw)
        all_reviews.extend(page_reviews)
        print(f"  → {len(page_reviews)} reviews collected  (total: {len(all_reviews)})")

        # Advance the cursor to the next page
        cursor = data.get("cursor", "*")

        # Polite delay before the next request (skip after the last page)
        if page_num < max_pages:
            time.sleep(DELAY)

    return all_reviews

In [14]:
# Execute Main

if __name__ == "__main__":
    print("=" * 60)
    print("  War Thunder – Steam Review Scraper")
    print(f"  App ID : {APP_ID}")
    print(f"  Pages  : {MAX_PAGES}  ({NUM_PER_PAGE} reviews/page)")
    print("=" * 60)

    reviews = scrape_war_thunder_reviews(MAX_PAGES)

    if reviews:
        save_to_csv(reviews, OUTPUT_FILE)
        print(f"\nDone!  Total reviews scraped: {len(reviews)}")
    else:
        print("No reviews were scraped.")

  War Thunder – Steam Review Scraper
  App ID : 236390
  Pages  : 5  (20 reviews/page)
[INFO] Fetching page 1/5  (cursor: *...)
  → 20 reviews collected  (total so far: 20)
[INFO] Fetching page 2/5  (cursor: AoJwloHB1JwDd6K5xQY=...)
  → 20 reviews collected  (total so far: 40)
[INFO] Fetching page 3/5  (cursor: AoJ4ocOH1JwDct+xxQY=...)
  → 20 reviews collected  (total so far: 60)
[INFO] Fetching page 4/5  (cursor: AoJw/+fA05wDd9WoxQY=...)
  → 20 reviews collected  (total so far: 80)
[INFO] Fetching page 5/5  (cursor: AoJwupqO05wDe5ahxQY=...)
  → 20 reviews collected  (total so far: 100)
[INFO] Saved 100 reviews → 'warthunder_reviews.csv'

✅  Done!  Total reviews scraped: 100
