In [32]:
# Import required packages
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import json
import time

In [33]:
# Set up request
headers = {"User-Agent": "Mozilla/5.0"}
cookies = {
    "birthtime": "1004572800",  # corresponds to a valid birthdate (e.g., Jan 1, 1988)
    "lastagecheckage": "1-November-2001" 
}

# Get soup from top sellers page
topsellers_search_url = "https://store.steampowered.com/search/?filter=topsellers"
topsellers_search_page = requests.get(topsellers_search_url, headers=headers, cookies=cookies)
topsellers_soup = BeautifulSoup(topsellers_search_page.text, "html.parser")

# Get soup from indie page 
indie_search_url = "https://store.steampowered.com/search/?tags=492&supportedlang=english&filter=topsellers&ndl=1"
indie_search_page = requests.get(indie_search_url, headers=headers, cookies=cookies)
indie_soup = BeautifulSoup(indie_search_page.text, "html.parser")


In [34]:
# Find top sellers
topsellers_results = topsellers_soup.select('a.search_result_row')
indie_results = indie_soup.select('a.search_result_row')

def extract_game_data(soup_results):
    game_data = []

    for result in soup_results:
        url = result['href'].split('?')[0]
        title = result.select_one('.title').text.strip()

        price_block = result.select_one('.search_price, .search_price_discount_combined')
        if price_block:
            # Extract text and clean it
            price_text = price_block.get_text(separator=' ', strip=True).replace('Free to Play', '$0.00')
            prices = re.findall(r'[\$€£]\d+(?:\.\d{2})?', price_text)
            price = prices[-1] if prices else price_text.strip() or "Unknown"
        else:
            price = "Unknown"


        game_data.append({
            "title": title,
            "url": url,
            "price": price
        })
    
    return game_data

In [47]:
topsellers_data = extract_game_data(topsellers_results)
indie_data = extract_game_data(indie_results)


In [48]:
# For each game, extract the tags
def extract_tags_from_game(url):
    try:
        game_page = requests.get(url, headers=headers, cookies=cookies)
        soup = BeautifulSoup(game_page.text, "html.parser")
        for script in soup.find_all("script"):
            if "InitAppTagModal" in script.text:
                match = re.search(r'InitAppTagModal\(\s*\d+,\s*(\[\{.*?\}\])', script.text)
                if match:
                    tag_data_json = match.group(1)
                    tag_data = json.loads(tag_data_json)
                    return [tag["name"] for tag in tag_data]
    except Exception as e:
        print(f"Error processing {url}: {e}")
    return []

In [49]:
# Collect tags for the top games
for i, game in enumerate(topsellers_data):
    print(f"Processing {game['title']} - {game['url']}")
    tags = extract_tags_from_game(game["url"])
    topsellers_data[i]["tags"] = tags
    time.sleep(1)

Processing Schedule I - https://store.steampowered.com/app/3164500/Schedule_I/
Processing Counter-Strike 2 - https://store.steampowered.com/app/730/CounterStrike_2/
Processing Steam Deck - https://store.steampowered.com/app/1675200/Steam_Deck/
Processing Marvel Rivals - https://store.steampowered.com/app/2767030/Marvel_Rivals/
Processing R.E.P.O. - https://store.steampowered.com/app/3241660/REPO/
Processing Blue Prince - https://store.steampowered.com/app/1569580/Blue_Prince/
Processing Baldur's Gate 3 - https://store.steampowered.com/app/1086940/Baldurs_Gate_3/
Processing The Elder Scrolls® Online - https://store.steampowered.com/app/306130/The_Elder_Scrolls_Online/
Processing Ready or Not - https://store.steampowered.com/app/1144200/Ready_or_Not/
Processing Red Dead Redemption 2 - https://store.steampowered.com/app/1174180/Red_Dead_Redemption_2/
Processing Path of Exile 2 - https://store.steampowered.com/app/2694490/Path_of_Exile_2/
Processing The Last of Us™ Part II Remastered - htt

In [50]:
# Collect tags for the top indie games
for i, game in enumerate(indie_data):
    print(f"Processing {game['title']} - {game['url']}")
    tags = extract_tags_from_game(game["url"])
    indie_data[i]["tags"] = tags
    time.sleep(1)

Processing Blue Prince - https://store.steampowered.com/app/1569580/Blue_Prince/
Processing Rust - https://store.steampowered.com/app/252490/Rust/
Processing Last Epoch - https://store.steampowered.com/app/899770/Last_Epoch/
Processing Balatro - https://store.steampowered.com/app/2379780/Balatro/
Processing Drive Beyond Horizons - https://store.steampowered.com/app/2625420/Drive_Beyond_Horizons/
Processing Outer Wilds - https://store.steampowered.com/app/753640/Outer_Wilds/
Processing Enshrouded - https://store.steampowered.com/app/1203620/Enshrouded/
Processing Phasmophobia - https://store.steampowered.com/app/739630/Phasmophobia/
Processing Stardew Valley - https://store.steampowered.com/app/413150/Stardew_Valley/
Processing Terraria - https://store.steampowered.com/app/105600/Terraria/
Processing Factorio - https://store.steampowered.com/app/427520/Factorio/
Processing RimWorld - https://store.steampowered.com/app/294100/RimWorld/
Processing Stray - https://store.steampowered.com/ap

In [51]:
topsellers_df = pd.DataFrame(topsellers_data)
topsellers_df.to_csv("steam_top_sellers_data.csv", index=False)
indie_df = pd.DataFrame(indie_data)
indie_df.to_csv("steam_indie_data.csv", index=False)

In [52]:
# Analyze the data
#data = pd.read_csv("steam_top_sellers.csv")
import plotly.express as px
import plotly.io as pio

pio.renderers.default = "browser"

exploded_tags = topsellers_df.explode('tags')
tag_counts = exploded_tags['tags'].value_counts().reset_index()
tag_counts.columns = ['tag', 'count']

fig = px.bar(
    tag_counts,
    x = 'tag',
    y = 'count',
    title = 'Top Sellers Tags Distribution',
    labels = {'tags': 'Tags', 'count': "Count"},
    color_discrete_sequence = px.colors.qualitative.Plotly)

fig.update_layout(xaxis_tickangle=45)
fig.show()