In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import csv
import os
import random

BASE_URL = "https://steamdb.info"
SEARCH_URL = "https://steamdb.info/instantsearch/?page={page_id}"
CSV_FILE = 'games_name_url_price.csv'

def get_processed_pages(filename):
    if not os.path.exists(filename):
        return set()
    with open(filename, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        processed_pages = {int(row['page']) for row in reader}
    return processed_pages

def get_game_details():
    game_details = []

    print("LOADED")

    processed_pages = get_processed_pages(CSV_FILE)
    all_pages = set(range(1, 100))
    unprocessed_pages = list(all_pages - processed_pages)
    print(unprocessed_pages)
    while unprocessed_pages:
        page_index = random.choice(unprocessed_pages)
        print(f"Processing page {page_index}")
        unprocessed_pages.remove(page_index)

        options = webdriver.EdgeOptions()
        options.page_load_strategy = 'normal'
        driver = webdriver.Edge(options=options)
        driver.get(SEARCH_URL.format(page_id=page_index))

        # Random sleep before processing the page to avoid detection
        time.sleep(random.uniform(1,3))

        try:
            # Wait until the hits list is present
            WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'ais-Hits-list'))
            )
            hits_list = driver.find_element(By.CLASS_NAME, 'ais-Hits-list')
            games = hits_list.find_elements(By.CLASS_NAME, 'ais-Hits-item')
        except Exception as e:
            print(f"Error on page {page_index}: {e}")
            driver.quit()
            continue

        for game in games:
            try:
                price_element = game.find_element(By.CLASS_NAME, 's-hit--price')
                link = game.find_element(By.TAG_NAME, 'a')
                url_element = link.get_attribute('href')
                name_element = game.find_element(By.CLASS_NAME, 'ais-Highlight-nonHighlighted')
                release_element = game.find_element(By.CLASS_NAME, 's-hit--release')

                if name_element and price_element and release_element:
                    game_details.append({
                        'name': name_element.text,
                        'url': url_element,
                        'price': price_element.text,
                        'release': release_element.text,
                        'page': page_index
                    })
            except Exception as e:
                print(f"Error extracting details for a game on page {page_index}: {e}")
                continue

        driver.quit()
        save_to_csv(game_details, CSV_FILE)
        game_details.clear()  # Clear the list after saving

        # Random sleep after processing the page to avoid detection
    

def save_to_csv(game_details, filename):
    file_exists = os.path.isfile(filename)
    keys = game_details[0].keys() if game_details else []

    with open(filename, 'a', newline='', encoding='utf-8') as output_file:
        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
        if not file_exists:
            dict_writer.writeheader()
        dict_writer.writerows(game_details)

get_game_details()
print(f"Finished processing. Data saved to {CSV_FILE}")


LOADED
[1]
Processing page 1
Finished processing. Data saved to games_name_url_price.csv
