In [1]:
import math
import re
import pandas as pd
import time
import os
import traceback
import requests
import csv

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains

In [2]:
SCROLL_PAUSE_TIME = 2
# TIMEOUT = 4 * 60 * 60  # 4 hours in seconds
TIMEOUT = 10  # 1 minute in seconds

def load_all_reviews_in_popup(driver, popup_element):
    last_height = driver.execute_script("return arguments[0].scrollHeight", popup_element)
    start_time = time.time()
    while True:
        current_time = time.time()
        if current_time - start_time > TIMEOUT:
            print("Timeout reached, stopping the scroll.")
            break
        driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", popup_element)
        time.sleep(SCROLL_PAUSE_TIME)
        new_height = driver.execute_script("return arguments[0].scrollHeight", popup_element)
        if new_height == last_height:
            break
        last_height = new_height

def extract_reviews(popup_element, star_rating):
    reviews = popup_element.find_elements(By.CLASS_NAME, 'RHo1pe')
    review_list = []

    for review in reviews:
        try:
            username = review.find_element(By.CLASS_NAME, 'X5PpBb').text
            date = review.find_element(By.CLASS_NAME, 'bp9Aid').text
            
            rating_text = review.find_element(By.XPATH, './/div[@role="img"]').get_attribute("aria-label")
            rating = int(re.search(r'(\d+)개 만점에 (\d+)개', rating_text).group(2))
            
            content = review.find_element(By.CLASS_NAME, 'h3YV2d').text
            
            helpful_text = review.find_element(By.CLASS_NAME, 'AJTPZc').text if len(review.find_elements(By.CLASS_NAME, 'AJTPZc')) > 0 else '사용자 0명이 이 리뷰가 유용하다고 평가함'
            helpful = int(re.search(r'사용자 (\d+)명이 이 리뷰가 유용하다고 평가함', helpful_text).group(1))
            
            review_list.append({"username": username, "date": date, "rating": rating, "content": content, "helpful": helpful})
        except Exception as e:
            print(f"Error: {e}")
            continue

    review_df = pd.DataFrame(review_list)
    review_df.to_csv(f'./temu_reviews_star_{star_rating}.csv', index=False)

In [3]:
def setup_driver():
    options = Options()
    options.add_argument('--lang=ko')
    driver = webdriver.Chrome(options=options)
    driver.implicitly_wait(3)
    return driver

def load_reviews_for_star_rating(driver, star_rating):
    URL = 'https://play.google.com/store/apps/details?id=com.einnovation.temu&hl=ko'
    driver.get(URL)

    review_button_xpath = """//*[@id="yDmH0d"]/c-wiz[2]/div/div/div[1]/div/div[2]/div/div[1]/div[1]/c-wiz[4]/section/header/div/div[2]/button"""
    review_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, review_button_xpath)))
    review_button.click()

    star_button_xpath = """//*[@id="yDmH0d"]/div[5]/div[2]/div/div/div/div/div[2]/div/div[1]/div/div/div/div[3]/div[2]/span[2]"""
    star_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, star_button_xpath)))
    star_button.click()

    star_xpath_map = {
        1: """//*[@id="yDmH0d"]/div[5]/div[2]/div/div/div/div/div[2]/div[2]/div/div/span[2]/div[2]/div[2]""",
        2: """//*[@id="yDmH0d"]/div[5]/div[2]/div/div/div/div/div[2]/div[2]/div/div/span[3]/div[2]/div[2]""",
        3: """//*[@id="yDmH0d"]/div[5]/div[2]/div/div/div/div/div[2]/div[2]/div/div/span[4]/div[2]/div[2]""",
        4: """//*[@id="yDmH0d"]/div[5]/div[2]/div/div/div/div/div[2]/div[2]/div/div/span[5]/div[2]/div[2]""",
        5: """//*[@id="yDmH0d"]/div[5]/div[2]/div/div/div/div/div[2]/div[2]/div/div/span[6]/div[2]/div[2]""",
    }
    
    star_button_xpath = star_xpath_map[star_rating]
    star_element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, star_button_xpath)))
    star_element.click()

    popup_element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, '#yDmH0d > div.VfPpkd-Sx9Kwc.cC1eCc.UDxLd.PzCPDd.HQdjr.VfPpkd-Sx9Kwc-OWXEXe-FNFY6c > div.VfPpkd-wzTsW > div > div > div > div > div.fysCi.Vk3ZVd'))
    )

    load_all_reviews_in_popup(driver, popup_element)
    extract_reviews(popup_element, star_rating)

In [12]:
options = Options()
options.add_argument('--lang=ko')

URL = 'https://play.google.com/store/apps/details?id=com.einnovation.temu&hl=ko'

driver = webdriver.Chrome(options=options) # 크롬드라이버 객체 생성(초기화)
driver.implicitly_wait(3) # 암묵적 대기(3초)
driver.get(URL)

review_button_xpath = """//*[@id="yDmH0d"]/c-wiz[2]/div/div/div[1]/div/div[2]/div/div[1]/div[1]/c-wiz[5]/section/header/div/div[2]/button/i""" 
review_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, review_button_xpath)))
review_button.click()

star_button_xpath = """//*[@id="yDmH0d"]/div[5]/div[2]/div/div/div/div/div[2]/div/div[1]/div/div/div/div[3]/div[2]"""
star_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, star_button_xpath)))
star_button.click()

first_button_xpath = """//*[@id="yDmH0d"]/div[5]/div[2]/div/div/div/div/div[2]/div[2]/div/div/span[2]"""
first_element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, first_button_xpath)))
first_element.click()

In [13]:
# 별 1개 스크롤
popup_element = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, '#yDmH0d > div.VfPpkd-Sx9Kwc.cC1eCc.UDxLd.PzCPDd.HQdjr.VfPpkd-Sx9Kwc-OWXEXe-FNFY6c > div.VfPpkd-wzTsW > div > div > div > div > div.fysCi.Vk3ZVd'))
)

load_all_reviews_in_popup(driver, popup_element)

extract_reviews(popup_element, 1)
# driver.quit()

Timeout reached, stopping the scroll.
Error: 'NoneType' object has no attribute 'group'
Error: 'NoneType' object has no attribute 'group'
Error: 'NoneType' object has no attribute 'group'
Error: 'NoneType' object has no attribute 'group'
Error: 'NoneType' object has no attribute 'group'
Error: 'NoneType' object has no attribute 'group'
Error: 'NoneType' object has no attribute 'group'
Error: 'NoneType' object has no attribute 'group'
Error: 'NoneType' object has no attribute 'group'
Error: 'NoneType' object has no attribute 'group'
Error: 'NoneType' object has no attribute 'group'
