In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
import pandas as pd
import time
import re

In [None]:
def setup_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--start-maximized")
    options.add_argument("--disable-notifications")
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

In [None]:
def scrape_reviews(driver):
    """Scrape all reviews on the current page"""
    reviews = []
    
    # Find all review containers (each contains multiple reviews)
    review_containers = driver.find_elements(By.CSS_SELECTOR, ".review_list_wrap .inner_list > li")
    
    for container in review_containers:
        try:
            # Extract all data points with proper error handling
            review = {
                'date': container.find_element(By.CLASS_NAME, 'date').text,
                'score': extract_score(container),
                'user_tags': extract_user_tags(container),
                'user_rank': container.find_element(By.CSS_SELECTOR, 'a.top').get_attribute('onclick').split("t_review_rank_name: '")[1].split("'")[0] 
                            if container.find_elements(By.CSS_SELECTOR, 'a.top') else None,
                'usage_duration': container.find_element(By.CLASS_NAME, 'point_flag').text 
                                if container.find_elements(By.CLASS_NAME, 'point_flag') else None,
                'item_option': container.find_element(By.CLASS_NAME, 'item_option').text.strip() 
                            if container.find_elements(By.CLASS_NAME, 'item_option') else None,
                'text_review': container.find_element(By.CLASS_NAME, 'txt_inner').text.strip(),
                'recommendation_score': container.find_element(By.CSS_SELECTOR, '.recom_area .num').text 
                                    if container.find_elements(By.CSS_SELECTOR, '.recom_area .num') else None
            }
            
            # Extract poll characteristics
            poll_data = extract_poll_data(container)
            review.update(poll_data)
            
            reviews.append(review)
        except Exception as e:
            print(f"Skipping a review due to error: {str(e)}")
            continue
    
    return reviews

In [None]:
def extract_score(container):
    """Extract the complete score text"""
    try:
        return container.find_element(By.CSS_SELECTOR, '.score_area .review_point .point').text.strip()
    except:
        return None

In [None]:
def extract_user_tags(container):
    """Extract user tags as comma-separated string"""
    try:
        tags = [tag.text for tag in container.find_elements(By.CSS_SELECTOR, '.tag span')]
        return ', '.join(tags)
    except:
        return None

In [None]:
def extract_poll_data(container):
    """Extract all poll characteristics"""
    poll_data = {}
    try:
        poll_sample = container.find_element(By.CLASS_NAME, 'poll_sample')
        items = poll_sample.find_elements(By.CLASS_NAME, 'poll_type1')
        for item in items:
            dt = item.find_element(By.TAG_NAME, 'dt').text.strip()
            dd = item.find_element(By.TAG_NAME, 'dd').text.strip()
            poll_data[dt] = dd
    except:
        pass
    return poll_data

In [None]:
def handle_pagination(driver):
    """Handle pagination including 'next 10 pages' navigation"""
    try:
        # First try to find and click the next page within current range
        current_page = driver.find_element(By.CSS_SELECTOR, ".pageing strong")
        next_pages = driver.find_elements(By.XPATH, f"//a[@data-page-no and number(@data-page-no) > {current_page.text}]")
        
        if next_pages:
            # Click the next available page
            next_page = next_pages[0]
            driver.execute_script("arguments[0].click();", next_page)
            time.sleep(3)
            return True
        else:
            # If no more pages in current range, try "next 10 pages"
            next_10 = driver.find_element(By.CSS_SELECTOR, ".pageing a.next")
            if next_10:
                driver.execute_script("arguments[0].click();", next_10)
                time.sleep(3)
                return True
            return False
    except Exception as e:
        print(f"Pagination error: {str(e)}")
        return False

In [None]:
def apply_review_filters(driver):
    """Apply filters to show only 1-star and 2-star reviews"""
    try:
        print("Attempting to apply review filters...")
        
        # Wait for and click the filter button using JavaScript
        filter_btn = WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.ID, "filterBtn")))
        driver.execute_script("arguments[0].click();", filter_btn)
        print("Clicked filter button")
        time.sleep(1.5)  # Wait for popup animation

        # Wait for the filter options to be present in the popup
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#searchPoint")))
        
        # Click 2-star rating (point_type1_5)
        two_star = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#point_type1_5")))
        driver.execute_script("arguments[0].click();", two_star)
        print("Clicked 2-star filter")
        time.sleep(0.5)
        
        # Click 1-star rating (point_type1_6)
        one_star = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#point_type1_6")))
        driver.execute_script("arguments[0].click();", one_star)
        print("Clicked 1-star filter")
        time.sleep(0.5)
        
        # Click the Apply button (btnFilterConfirm)
        apply_btn = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "btnFilterConfirm")))
        driver.execute_script("arguments[0].click();", apply_btn)
        print("Clicked apply button")
        
        # Wait for filters to apply
        time.sleep(3)
        return True
        
    except Exception as e:
        print(f"Failed to apply filters: {str(e)}")
        # Try to close the popup if it's still open
        try:
            close_btn = driver.find_element(By.CSS_SELECTOR, ".btnClose")
            driver.execute_script("arguments[0].click();", close_btn)
            print("Closed filter popup after error")
        except:
            pass
        return False
    """Apply filters to show only 1-star and 2-star reviews"""
    try:
        print("Attempting to apply review filters...")
        
        # Wait for and click the filter button
        filter_btn = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.ID, "filterBtn")))
        driver.execute_script("arguments[0].click();", filter_btn)
        print("Clicked filter button")
        time.sleep(2)  # Wait for popup animation

        # Wait for the filter popup to appear
        WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, ".layer_pop_wrap")))
        
        # Uncheck "All reviews" if checked
        all_checkbox = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#point_type1_1")))
        
        if all_checkbox.is_selected():
            driver.execute_script("arguments[0].click();", all_checkbox)
            print("Unchecked 'All reviews'")
            time.sleep(1)
        
        # Check 2-star rating (point_type1_5)
        two_star = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#point_type1_5")))
        if not two_star.is_selected():
            driver.execute_script("arguments[0].click();", two_star)
            print("Checked 2-star filter")
            time.sleep(1)
        
        # Check 1-star rating (point_type1_6)
        one_star = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#point_type1_6")))
        if not one_star.is_selected():
            driver.execute_script("arguments[0].click();", one_star)
            print("Checked 1-star filter")
            time.sleep(1)
        
        # Click the Apply button (btnFilterConfirm)
        apply_btn = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, "btnFilterConfirm")))
        driver.execute_script("arguments[0].click();", apply_btn)
        print("Clicked apply button")
        
        # Wait for filters to apply and popup to close
        time.sleep(3)
        return True
        
    except Exception as e:
        print(f"Failed to apply filters: {str(e)}")
        # Try to close the popup if it's still open
        try:
            close_btn = driver.find_element(By.CSS_SELECTOR, ".btnClose")
            driver.execute_script("arguments[0].click();", close_btn)
            print("Closed filter popup after error")
        except:
            pass
        return False
    """Apply filters to show only 1-star and 2-star reviews"""
    try:
        # Click the filter button
        filter_btn = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, "filterBtn")))
        filter_btn.click()
        time.sleep(1)  # Wait for filter panel to appear

        # Uncheck "전체" (All) checkbox
        all_checkbox = WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#point_type1_1")))
        if all_checkbox.is_selected():
            all_checkbox.click()
        
        # Check 2-star (value="4") and 1-star (value="2") checkboxes
        two_star = driver.find_element(By.CSS_SELECTOR, "#point_type1_5")
        if not two_star.is_selected():
            two_star.click()
        
        one_star = driver.find_element(By.CSS_SELECTOR, "#point_type1_6")
        if not one_star.is_selected():
            one_star.click()
        
        time.sleep(1)  # Wait for filters to apply
        
        # Click the confirm/apply button (assuming it exists)
        try:
            apply_btn = driver.find_element(By.CSS_SELECTOR, ".btnConfirm")
            apply_btn.click()
        except:
            # If no separate apply button, just close the filter
            filter_btn.click()
        
        time.sleep(3)  # Wait for reviews to reload
        return True
    except Exception as e:
        print(f"Failed to apply filters: {str(e)}")
        return False

In [None]:
def main():
    driver = setup_driver()
    try:
        url = "https://www.oliveyoung.co.kr/store/goods/getGoodsDetail.do?goodsNo=A000000213153"
        driver.get(url)
        
        # Click the 리뷰 tab if needed
        try:
            review_tab = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.ID, "reviewInfo")))
            if "on" not in review_tab.get_attribute("class"):
                review_tab.click()
                time.sleep(3)
        except Exception as e:
            print(f"Couldn't click review tab: {str(e)}")
        
        # Apply review filters (1-star and 2-star only)
        if not apply_review_filters(driver):
            print("Warning: Failed to apply review filters")
        
        all_reviews = []
        max_pages = 100  # Safety limit to prevent infinite loops
        
        while True:
            print(f"\nScraping current page...")
            
            # Wait for reviews to load
            try:
                WebDriverWait(driver, 15).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, ".review_list_wrap .inner_list li")))
            except TimeoutException:
                print("Timeout waiting for reviews to load")
                break
            
            # Scrape current page
            page_reviews = scrape_reviews(driver)
            all_reviews.extend(page_reviews)
            print(f"Found {len(page_reviews)} reviews on this page")
            
            # Get current page number for debugging
            try:
                current_page = driver.find_element(By.CSS_SELECTOR, ".pageing strong").text
                print(f"Current page: {current_page}")
            except:
                pass
            
            # Try to go to next page
            if not handle_pagination(driver):
                print("No more pages available")
                break
                
            if len(all_reviews) >= max_pages * 10:  # Assuming ~10 reviews per page
                print(f"Reached maximum review limit ({max_pages * 10})")
                break
        
        # Save to CSV
        df = pd.DataFrame(all_reviews)
        
        # Reorder columns
        base_columns = ['date', 'score', 'user_tags', 'user_rank', 'usage_duration', 
                       'item_option', 'text_review', 'recommendation_score']
        poll_columns = [col for col in df.columns if col not in base_columns]
        df = df[base_columns + poll_columns]
        
        df.to_csv('olive_young_low_rating_reviews.csv', index=False, encoding='utf-8-sig')
        print(f"\nSuccess! Saved {len(df)} low-rating reviews to olive_young_low_rating_reviews.csv")
        
    finally:
        driver.quit()
    driver = setup_driver()
    

if __name__ == "__main__":
    main()