Import necessary packages.

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

The code below grabs data from Steam250 and parses its table for each year's games' ranks, names, and Steam application ID. It's listed here for demonstrative purposes, but the data has been saved locally to avoid making redundant requests.

In [None]:
years = [2018, 2019, 2020, 2021, 2022, 2023]

for year in years:
    url = 'https://steam250.com/reviews/' + str(year)
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'html.parser')

    section = soup.select_one("section.applist.compact")
    rows = section.find_all("div", id=True)

    records = []

    for row in rows:
        try:
            rank_div = row.find_all("div", recursive=False)[0]

            # Ignore rank change stats e.g. "+2" or "-1"
            texts = [t.strip() for t in rank_div.contents if isinstance(t, str) and t.strip()]
            if texts:
                rank = int(texts[0])
            else:
                continue

            name_tag = row.select_one("a[title]")
            name = name_tag.text.strip()

            app_link = name_tag["href"]
            appid = int(app_link.split("/")[-1])

            rating = row.select_one("span.rating").text.strip().replace("%","")
            rating = float(rating)

            votes_raw = row.select_one("span.votes")["title"]
            votes = int(votes_raw.split()[0].replace(",", ""))

            records.append({
                "rank": rank,
                "name": name,
                "appid": appid
            })

        except Exception as e:
            print(e)
            continue

    curr_df = pd.DataFrame(records)
    curr_file_name = str(year) + '_top250_ids.csv' 

    curr_df.to_csv(curr_file_name, index=False)