In [29]:
#Selenium 4.9.0
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options as EdgeOptions
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException

import re
import time
from datetime import datetime
import pandas as pd

import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud

import plotly.express as px
import matplotlib.pyplot as plt

game_id = 291550
url_template = "https://steamcommunity.com/app/{}/reviews/?p=1&browsefilter=mostrecent&filterLanguage=english"
url = url_template.format(game_id)

print(url)

options = EdgeOptions()

language = "en-US"
options.add_argument(f"--lang={language}")

service = Service("./msedgedriver.exe")
driver = webdriver.Edge(service=service, options=options)
driver.maximize_window()
driver.get(url)

def get_current_scroll_position(driver):
    return driver.execute_script("return window.pageYOffset;")

def scroll_to_bottom(driver):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(1)

def get_steam_id(card):
    profile_url = card.find_element(By.XPATH, './/div[@class="apphub_friend_block"]/div/a[2]').get_attribute('href')
    steam_id = profile_url.split('/')[-2]
    return steam_id

def scrape_review_data(card):
    date_posted_element = card.find_element(By.XPATH, './/div[@class="apphub_CardTextContent"]/div[@class="date_posted"]')
    date_posted = date_posted_element.text.strip()
    
    try:
        early_access_review_element = card.find_element(By.XPATH, './/div[@class="apphub_CardTextContent"]/div[@class="early_access_review"]')
        early_access_review = early_access_review_element.text.strip()
    except NoSuchElementException:
        early_access_review = ""

    try:
        received_compensation_element = card.find_element(By.CLASS_NAME, "received_compensation").text
    except NoSuchElementException:
        received_compensation_element = ""

    card_text_content_element = card.find_element(By.CLASS_NAME, "apphub_CardTextContent")
    review_content = card_text_content_element.text.strip()
    excluded_elements = [date_posted, early_access_review, received_compensation_element]

    for excluded_element in excluded_elements:
        review_content = review_content.replace(excluded_element, "")
    review_content = review_content.replace("\n", "")

    review_length = len(review_content.replace(" ", ""))

    thumb_text = card.find_element(By.XPATH, './/div[@class="reviewInfo"]/div[2]').text
    play_hours = card.find_element(By.XPATH, './/div[@class="reviewInfo"]/div[3]').text

    return early_access_review, review_content, thumb_text, review_length, play_hours, date_posted

reviews = []
steam_ids_set = set()
max_scroll_attempts = 5

try:
    last_position = get_current_scroll_position(driver)
    running = True
    while running:
        cards = driver.find_elements(By.CLASS_NAME, 'apphub_Card')

        for card in cards[-20:]:
            steam_id = get_steam_id(card)
            if steam_id in steam_ids_set:
                continue
            else:
                review = scrape_review_data(card)
                reviews.append(review)

        scroll_attempt = 0
        while scroll_attempt < max_scroll_attempts:
            scroll_to_bottom(driver)
            curr_position = get_current_scroll_position(driver)

            if curr_position == last_position:
                scroll_attempt += 1
                time.sleep(3)

                if curr_position >=3:
                    running = False
                    break
            else:
                last_position = curr_position
                break

except Exception as e:
    print(e)

finally:
    driver.quit()

df = pd.DataFrame(reviews, columns=['EarlyAccess', 'ReviewText', 'Review', 'ReviewLength', 'PlayHours', 'DatePosted'])

https://steamcommunity.com/app/291550/reviews/?p=1&browsefilter=mostrecent&filterLanguage=english
'NoneType' object has no attribute 'text'
