In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ActionChains
from pythainlp.util import isthai
from bs4 import BeautifulSoup
import pandas as pd
import time
import os
import re
import random

In [2]:
prefs = {"profile.default_content_setting_values.notifications": 1}
options = webdriver.ChromeOptions()
options.add_experimental_option("prefs", prefs)
options.add_argument("--disable-blink-features=AutomationControlled")

In [3]:
def handle_error(driver):
    try:
        # Switch to the iframe where error handling is expected
        iframe = WebDriverWait(driver, 10).until(
            EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
        )
        print("Switched to iframe.")

        # Try locating the refresh button by XPath
        try:
            refresh_button = driver.find_element(By.XPATH, "/html/body/div/div[2]/div/div[1]/div[2]/center/div[1]/div/div/div")
            print("Found the refresh button for nc_1_refresh1.")
        except Exception as e:
            print(f"Could not find the refresh button for nc_1_refresh1: {e}")
            try:
                refresh_button = driver.find_element(By.XPATH, "/html/body/div/div[2]/div/div[1]/div[2]/center/div[1]/div/div/div/i")
                print("Found the refresh button for nc_1_refresh2.")
            except Exception as e:
                print(f"Could not find the refresh button for nc_1_refresh2: {e}")
                return False  # If neither is found, return False

        # Click the refresh button
        refresh_button.click()
        print("Clicked the refresh button.")

        # Wait for the page to refresh
        time.sleep(5)  # Adjust time as needed
        driver.switch_to.default_content()

        # Handling other issues if no error was detected or handled
        try:
            # Switch to the iframe again (make sure it's still valid)
            iframe = WebDriverWait(driver, 10).until(
                EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
            )
            print("Switched to iframe again for slider handling.")

            try:
                # Wait for the slider to appear
                slider = WebDriverWait(driver, 10).until(
                    EC.visibility_of_element_located((By.CSS_SELECTOR, "span#nc_1_n1z.btn_slide"))
                )

                # Parameters for movement with wider random ranges
                step_size = random.choice([5, 10, 15, 20, 25])  # Random step size from a broader range
                initial_pause_time = random.uniform(0.01, 0.5)  # Random initial pause time between 0.01 and 0.5 seconds
                acceleration = random.uniform(1.2, 5)  # Random acceleration factor between 1.2 and 5

                print(f"Step size: {step_size}, Initial pause time: {initial_pause_time}, Acceleration: {acceleration}")

                # Create ActionChains to interact with the slider
                action = ActionChains(driver)
                action.click_and_hold(slider).perform()

                # Use JavaScript to get the width of the slider and its container
                slider_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').offsetWidth;")
                container_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').parentElement.offsetWidth;")
                total_distance = container_width - slider_width

                current_position = driver.execute_script("return arguments[0].getBoundingClientRect().left;", slider)
                current_position -= driver.execute_script("return arguments[0].parentElement.getBoundingClientRect().left;", slider)

                if current_position < total_distance:
                    pause_time = initial_pause_time

                    while current_position < total_distance:
                        move = min(step_size, total_distance - current_position)
                        action.move_by_offset(move, 0).perform()
                        time.sleep(pause_time)
                        current_position += move
                        pause_time = max(pause_time / acceleration, 0.001)  # Ensuring pause time does not go below 0.001 seconds

                    action.release().perform()
                    print("Slider has been moved smoothly.")
                    driver.switch_to.default_content()
                    
                    # Optionally re-check for errors after moving the slider
                    if handle_error(driver):
                        return True  # Exit if error was successfully handled
                else:
                    print("Slider is already in the correct position.")

                time.sleep(5)

            except Exception as e:
                print(f"An error occurred while handling the slider: {e}")

            # Switch back to the main content
            driver.switch_to.default_content()

        except Exception as e:
            print(f"An error occurred while handling the iframe or slider: {e}")
            return False  # Indicate that the error handling failed

    except Exception as e:
        print(f"Error handling failed: {e}")
        driver.switch_to.default_content()
        return False  # Indicate that the error handling failed

    return False

In [4]:
def is_new_comment(comment_data, content):
    return not any(existing['Message'] == content for existing in comment_data)

In [5]:
def is_valid_comment(comment):
    thai_pattern = re.compile(r'[\u0E00-\u0E7F\s\d\W]+')
    return thai_pattern.fullmatch(comment)

In [6]:
def scrape_comments(url, num_comments, star_rating=None):
    driver = webdriver.Chrome(options=options)
    #driver.get("https://www.lazada.co.th/")
    #time.sleep(3)
    driver.get(url)
    time.sleep(3)

    move_slider(driver)

    # Check if the button exists
    button_exit = driver.find_elements(By.CLASS_NAME, "sfo__close")
    if button_exit:
        n = button_exit[0]
        n.click()

    time.sleep(3)
    SCROLL_PAUSE_TIME = 3
    scroll_increment = 400
    while True:
        # Scroll down by the specified increment
        driver.execute_script(f"window.scrollBy(0, {scroll_increment});")
        time.sleep(SCROLL_PAUSE_TIME)
    
        # Check if the filter option is now visible
        try:
            filter_container = WebDriverWait(driver, 1).until(
                EC.visibility_of_element_located((By.CLASS_NAME, "oper"))
            )
            filter_option = filter_container.find_element(By.XPATH, ".//span[@class='condition']")
            if filter_option.is_displayed():
                filter_option.click()  # Click the filter option
                break  # Exit the loop since the filter option is found and clicked
        except Exception as e:
            print(f"Filter not found yet, continue scrolling...")
    
        # Update the current scroll position and total height
        current_scroll_position = driver.execute_script("return window.scrollY;")
        total_height = driver.execute_script("return document.body.scrollHeight;")
    
        # Stop scrolling if the bottom of the page is reached
        if current_scroll_position + scroll_increment >= total_height:
            print("Reached the bottom of the page. Stopping scrolling.")
            break

    time.sleep(3)

    if star_rating is not None:
        try:
            # Wait for the star filter options to appear and click the desired star rating
            star_filter_options = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "next-menu-content"))
            )
            star_options = star_filter_options.find_elements(By.CLASS_NAME, "next-menu-item")
            star_options[6 - star_rating].click()
            time.sleep(5)
            
            # Switch to the iframe
            try:
                iframe = WebDriverWait(driver, 10).until(
                    EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
                )
                print("Switched to iframe.")
        
                try:
                    # Wait for the slider to appear
                    slider = WebDriverWait(driver, 10).until(
                        EC.visibility_of_element_located((By.CSS_SELECTOR, "span#nc_1_n1z.btn_slide"))
                    )
                
                    # Create ActionChains to interact with the slider
                    action = ActionChains(driver)
                    action.click_and_hold(slider).perform()
                    
                    # Use JavaScript to get the width of the slider and its container
                    slider_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').offsetWidth;")
                    container_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').parentElement.offsetWidth;")
                    total_distance = container_width - slider_width
                    
                    step_size = random.uniform(3, 7)  # Random step size between 3 and 7 pixels
                    initial_pause_time = random.uniform(0.01, 0.05)  # Random initial pause time between 0.01 and 0.05 seconds
                    acceleration = random.uniform(5)  # Random acceleration factor between 1.5 and 3

                    # Get current position of the slider
                    current_position = driver.execute_script("return arguments[0].getBoundingClientRect().left;", slider)
                    
                    # Convert to pixels
                    current_position -= driver.execute_script("return arguments[0].parentElement.getBoundingClientRect().left;", slider)
                    
                    if current_position < total_distance:
                        pause_time = initial_pause_time
                        
                        # Move the slider incrementally, increasing speed
                        while current_position < total_distance:
                            move = min(step_size, total_distance - current_position)
                            action.move_by_offset(move, 0).perform()
                            time.sleep(pause_time)
                            current_position += move
                            pause_time = max(pause_time / acceleration, 0.01)  # Ensure minimum pause time
                        
                        # Release the slider
                        action.release().perform()
                        print("Slider has been moved smoothly.")
                        handle_error(driver)
                    else:
                        print("Slider is already in the correct position.")
                    time.sleep(5)
                
                except Exception as e:
                    print(f"An error occurred while handling the slider: {e}")
        
                # Switch back to the main content
                driver.switch_to.default_content()
            
            except Exception as e:
                print(f"An error occurred while handling the iframe: {e}")
        
        except Exception as e:
            print(f"An error occurred while managing star ratings or iframe: {e}")

    
    time.sleep(5)
    # Get the updated page source
    page_source = driver.page_source

    # Parse the updated HTML content with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')
  
    comments = soup.find_all('div', class_='item')
    comment_data = []
    while len(comment_data) < num_comments:
        for i, comment in enumerate(comments):
            if len(comment_data) >= num_comments:
                break
            user_element = comment.find('div', class_='middle').find('span')
            message_element = comment.find('div', class_='content')
    
            user = user_element.text.strip() if user_element else ""
            content = message_element.text.strip() if message_element else ""
    
            sentiment = "Positive" if star_rating in [4, 5] else "Neutral" if star_rating == 3 else "Negative" if star_rating in [0, 1, 2] else "-"
    
            if user and content and is_valid_comment(content):
                comment_data.append({'User': user, 'Message': content, 'Sentiment': sentiment})
                print(f"Comment {len(comment_data)}: {user} - {content} - {sentiment}")
    
        if len(comment_data) >= num_comments:
            break
    
        SCROLL_PAUSE_TIME = 3
        scroll_increment = 200
        driver.execute_script(f"window.scrollBy(0, {scroll_increment});")
        time.sleep(SCROLL_PAUSE_TIME)
    
        try:
            next_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'next'))
            )
            if next_button.is_enabled():
                next_button.click()
                time.sleep(5)  # Wait for new content to load
                
                try:
                    iframe = WebDriverWait(driver, 10).until(
                        EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
                    )
                    print("Switched to iframe.")
                    
                    try:
                        slider = WebDriverWait(driver, 10).until(
                            EC.visibility_of_element_located((By.CSS_SELECTOR, "span#nc_1_n1z.btn_slide"))
                        )
                        
                        action = ActionChains(driver)
                        action.click_and_hold(slider).perform()
                        
                        slider_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').offsetWidth;")
                        container_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').parentElement.offsetWidth;")
                        total_distance = container_width - slider_width
                        
                        step_size = random.uniform(3, 7)  # Random step size between 3 and 7 pixels
                        initial_pause_time = random.uniform(0.01, 0.05)  # Random initial pause time between 0.01 and 0.05 seconds
                        acceleration = random.uniform(1.5, 3)  # Random acceleration factor between 1.5 and 3
                        
                        current_position = 0
                        pause_time = initial_pause_time
                        
                        while current_position < total_distance:
                            move = min(step_size, total_distance - current_position)
                            action.move_by_offset(move, 0).perform()
                            time.sleep(pause_time)
                            current_position += move
                            pause_time = max(pause_time / acceleration, 0.01)
                        action.release().perform()
                        print("Slider has been moved smoothly.")
                        handle_error(driver)
                    except Exception as e:
                        print(f"An error occurred while handling the slider: {e}")
                    
                    driver.switch_to.default_content()
                    
                    page_source = driver.page_source
                    soup = BeautifulSoup(page_source, 'html.parser')
                    comments = soup.find_all('div', class_='item')
                    
                except Exception as e:
                    print(f"An error occurred while handling the iframe: {e}")
            
            else:
                print("Next button is not enabled or not found. Exiting loop.")
                break
        
        except Exception as e:
            print(f"An error occurred while managing the 'next' button or overall process: {e}")
            break
    
    print(f'All Comments: {len(comment_data)}')
    
    df = pd.DataFrame(comment_data)
    # Save the DataFrame to Excel file

    df
    # Check if there are comments to be saved to Excel
#    if not df.empty:
#        # Get the path to the Downloads folder
#        downloads_path = os.path.join(os.path.expanduser("~"), "Downloads")

#        # Save DataFrame to Excel file in the Downloads folder
#        excel_file_path = os.path.join(downloads_path, "comments.xlsx")
#        df.to_excel(excel_file_path, index=False, na_rep='-')
#        print("Comments saved to Excel.")

    return df

In [1226]:
url = "https://www.lazada.co.th/products/24h-to-sendyouli-10pcs-pampers-i2804412437-s10218505636.html"
num_comments = 10
star_rating = 3
scrape_comments(url, num_comments, star_rating)

An error occurred while handling the slider: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=127.0.6533.120)
Stacktrace:
0   chromedriver                        0x000000011017f8b8 chromedriver + 5179576
1   chromedriver                        0x00000001101772ea chromedriver + 5145322
2   chromedriver                        0x000000010fcee2b0 chromedriver + 389808
3   chromedriver                        0x000000010fcc5614 chromedriver + 222740
4   chromedriver                        0x000000010fd648ed chromedriver + 874733
5   chromedriver                        0x000000010fd794a9 chromedriver + 959657
6   chromedriver                        0x000000010fd5c553 chromedriver + 841043
7   chromedriver                        0x000000010fd2d7f6 chromedriver + 649206
8   chromedriver                        0x000000010fd2e05e chromedriver + 651358
9   chromedriver                        0x0000000110142b20 chromedriver + 49303

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=127.0.6533.120)
Stacktrace:
0   chromedriver                        0x000000011017f8b8 chromedriver + 5179576
1   chromedriver                        0x00000001101772ea chromedriver + 5145322
2   chromedriver                        0x000000010fcee2b0 chromedriver + 389808
3   chromedriver                        0x000000010fcc5614 chromedriver + 222740
4   chromedriver                        0x000000010fd648ed chromedriver + 874733
5   chromedriver                        0x000000010fd794a9 chromedriver + 959657
6   chromedriver                        0x000000010fd5c553 chromedriver + 841043
7   chromedriver                        0x000000010fd2d7f6 chromedriver + 649206
8   chromedriver                        0x000000010fd2e05e chromedriver + 651358
9   chromedriver                        0x0000000110142b20 chromedriver + 4930336
10  chromedriver                        0x0000000110147a36 chromedriver + 4950582
11  chromedriver                        0x0000000110148105 chromedriver + 4952325
12  chromedriver                        0x0000000110124ee9 chromedriver + 4808425
13  chromedriver                        0x00000001101483f9 chromedriver + 4953081
14  chromedriver                        0x0000000110116844 chromedriver + 4749380
15  chromedriver                        0x00000001101675c8 chromedriver + 5080520
16  chromedriver                        0x0000000110167787 chromedriver + 5080967
17  chromedriver                        0x0000000110176ece chromedriver + 5144270
18  libsystem_pthread.dylib             0x00007fff207118fc _pthread_start + 224
19  libsystem_pthread.dylib             0x00007fff2070d443 thread_start + 15


In [13]:
def scrape_comments(url, star_ratings, num_comments_list):
    all_comments = []
    
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    time.sleep(3)
    
    move_slider(driver)  # Call the function to handle the slider if necessary
    
    # Close any popups or unwanted dialogs
    button_exit = driver.find_elements(By.CLASS_NAME, "sfo__close")
    if button_exit:
        button_exit[0].click()

    time.sleep(3)

    # Get the updated page source
    page_source = driver.page_source

    # Parse the updated HTML content with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')

    h1_element = soup.find('div', class_='pdp-mod-product-badge-wrapper')
    thisitem = h1_element.text.strip() if h1_element else "Item not found"
    print("-------------------------------")
    print(f"Item : {thisitem}")

    h2_elements = soup.find_all('a', class_='pdp-link pdp-link_size_s pdp-link_theme_blue pdp-review-summary__link')
    # Extract text from the first matching element (if available)
    quantity = h2_elements[0].text.strip() if h2_elements else "Quantity not found"
    quantity = quantity.replace("คะแนน", "").strip()
    print("-------------------------------")
    print(f"Quantity: {quantity}")
    
    SCROLL_PAUSE_TIME = 3
    scroll_increment = 400
    
    for star_rating, num_comments in zip(star_ratings, num_comments_list):
        driver.get(url)  # Reload the page for each star rating
        time.sleep(3)
        
        move_slider(driver)  # Move slider if needed
        
        while True:
            driver.execute_script(f"window.scrollBy(0, {scroll_increment});")
            time.sleep(SCROLL_PAUSE_TIME)
            
            try:
                filter_container = WebDriverWait(driver, 1).until(
                    EC.visibility_of_element_located((By.CLASS_NAME, "oper"))
                )
                filter_option = filter_container.find_element(By.XPATH, ".//span[@class='condition']")
                if filter_option.is_displayed():
                    filter_option.click()
                    break
            except Exception as e:
                print(f"Filter not found yet, continue scrolling... {e}")

            current_scroll_position = driver.execute_script("return window.scrollY;")
            total_height = driver.execute_script("return document.body.scrollHeight;")
            
            if current_scroll_position + scroll_increment >= total_height:
                print("Reached the bottom of the page. Stopping scrolling.")
                break
        
        time.sleep(3)

        if star_rating is not None:
            try:
                star_filter_options = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CLASS_NAME, "next-menu-content"))
                )
                star_options = star_filter_options.find_elements(By.CLASS_NAME, "next-menu-item")
                if 1 <= star_rating <= len(star_options):
                    star_options[6 - star_rating].click()
                else:
                    print(f"Invalid star rating: {star_rating}")
                time.sleep(5)
                
                try:
                    iframe = WebDriverWait(driver, 10).until(
                        EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
                    )
                    print("Switched to iframe.")
                    
                    try:
                        slider = WebDriverWait(driver, 10).until(
                            EC.visibility_of_element_located((By.CSS_SELECTOR, "span#nc_1_n1z.btn_slide"))
                        )
                        
                        action = ActionChains(driver)
                        action.click_and_hold(slider).perform()
                        
                        slider_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').offsetWidth;")
                        container_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').parentElement.offsetWidth;")
                        total_distance = container_width - slider_width
                        
                    # Parameters for movement with wider random ranges
                        step_size = random.choice([5, 10, 15, 20, 25])  # Random step size from a broader range
                        initial_pause_time = random.uniform(0.01, 0.5)  # Random initial pause time between 0.01 and 0.5 seconds
                        acceleration = random.uniform(1.2, 5)  # Random acceleration factor between 1.2 and 5
                        
                        current_position = 0
                        pause_time = initial_pause_time
                        
                        while current_position < total_distance:
                            move = min(step_size, total_distance - current_position)
                            action.move_by_offset(move, 0).perform()
                            time.sleep(pause_time)
                            current_position += move
                            pause_time = max(pause_time / acceleration, 0.01)
                        time.sleep(2)
                        action.release().perform()
                        print("Slider has been moved smoothly.")
                        driver.switch_to.default_content()
                        handle_error(driver)
                    except Exception as e:
                        print(f"An error occurred while handling the slider: {e}")
                
                except Exception as e:
                    print(f"An error occurred while handling the iframe: {e}")

            except Exception as e:
                print(f"An error occurred while managing star ratings or iframe: {e}")

        time.sleep(5)
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')
        
        comments = soup.find_all('div', class_='item')
        comment_data = []

        while len(comment_data) < num_comments:
            for comment in comments:
                if len(comment_data) >= num_comments:
                    break
                user_element = comment.find('div', class_='middle').find('span')
                message_element = comment.find('div', class_='content')
                
                user = user_element.text.strip() if user_element else ""
                content = message_element.text.strip() if message_element else ""
                
                sentiment = "Positive" if star_rating in [4, 5] else "Neutral" if star_rating == 3 else "Negative" if star_rating in [0, 1, 2] else "-"
                
                if user and content and is_valid_comment(content):
                    comment_data.append({'Item': thisitem,'Quantity': quantity,'User': user, 'Message': content, 'Sentiment': sentiment})
                    print(f"Comment {len(comment_data)}: {thisitem} - {quantity} - {user} - {content} - {sentiment}")

            if len(comment_data) >= num_comments:
                break
            
            SCROLL_PAUSE_TIME = 3
            scroll_increment = 200
            driver.execute_script(f"window.scrollBy(0, {scroll_increment});")
            time.sleep(SCROLL_PAUSE_TIME)
            
            try:
                next_button = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CLASS_NAME, 'next'))
                )
                if next_button.is_enabled():
                    next_button.click()
                    time.sleep(5)
                    
                    try:
                        iframe = WebDriverWait(driver, 10).until(
                            EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
                        )
                        print("Switched to iframe.")
                        
                        try:
                            slider = WebDriverWait(driver, 10).until(
                                EC.visibility_of_element_located((By.CSS_SELECTOR, "span#nc_1_n1z.btn_slide"))
                            )
                            
                            action = ActionChains(driver)
                            action.click_and_hold(slider).perform()
                            
                            slider_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').offsetWidth;")
                            container_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').parentElement.offsetWidth;")
                            total_distance = container_width - slider_width
                            
                        # Parameters for movement with wider random ranges
                            step_size = random.choice([5, 10, 15, 20, 25])  # Random step size from a broader range
                            initial_pause_time = random.uniform(0.01, 0.5)  # Random initial pause time between 0.01 and 0.5 seconds
                            acceleration = random.uniform(1.2, 5)  # Random acceleration factor between 1.2 and 5
                        
                            current_position = 0
                            pause_time = initial_pause_time
                            
                            while current_position < total_distance:
                                move = min(step_size, total_distance - current_position)
                                action.move_by_offset(move, 0).perform()
                                time.sleep(pause_time)
                                current_position += move
                                pause_time = max(pause_time / acceleration, 0.01)
                            time.sleep(2)
                            action.release().perform()
                            print("Slider has been moved smoothly.")
                            driver.switch_to.default_content()
                            handle_error(driver)
                        except Exception as e:
                            print(f"An error occurred while handling the slider: {e}")
                        
                        page_source = driver.page_source
                        soup = BeautifulSoup(page_source, 'html.parser')
                        comments = soup.find_all('div', class_='item')
                        
                    except Exception as e:
                        print(f"An error occurred while handling the iframe: {e}")

                else:
                    print("Next button is not enabled or not found. Exiting loop.")
                    break
            
            except Exception as e:
                print(f"An error occurred while managing the 'next' button or overall process: {e}")
                break
        
        all_comments.extend(comment_data)
    
    driver.quit()
    df = pd.DataFrame(all_comments)
    
    return df


In [15]:
# Define star ratings and corresponding number of comments to collect
star_ratings = [3, 4]  # List of star ratings you want to scrape
num_comments_list = [10, 12]  # Number of comments to collect for each star rating

# Call the scrape_comments function
df = scrape_comments(url, star_ratings, num_comments_list)

# Print the DataFrame to check the scraped comments
df

NameError: name 'url' is not defined

In [None]:
import undetected_chromedriver as uc 

def scrape_comments(urls, star_ratings, num_comments_list):
    all_comments = []
    #options = uc.ChromeOptions()
    
    Id_Item = 1  # Initialize the Id_Item
    
    for url, star_rating, num_comments in zip(urls, star_ratings, num_comments_list):
        if not isinstance(url, str):
            raise ValueError(f"URL must be a string, but got {type(url)}")
        
        driver = uc.Chrome(use_subprocess=False)
        driver.delete_all_cookies()
        driver.get(url)
        time.sleep(3)
        
        #move_slider(driver)  # Call the function to handle the slider if necessary
        
        # Close any popups or unwanted dialogs
        button_exit = driver.find_elements(By.CLASS_NAME, "sfo__close")
        if button_exit:
            button_exit[0].click()
        
        time.sleep(3)
        
        # Get the updated page source
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')
        
        h1_element = soup.find('div', class_='pdp-mod-product-badge-wrapper')
        thisitem = h1_element.text.strip() if h1_element else "Item not found"
        print("-------------------------------")
        print(f"Item : {thisitem}")
        
        h2_elements = soup.find_all('a', class_='pdp-link pdp-link_size_s pdp-link_theme_blue pdp-review-summary__link')
        quantity = h2_elements[0].text.strip() if h2_elements else "Quantity not found"
        quantity = quantity.replace("คะแนน", "").strip()
        print("-------------------------------")
        print(f"Quantity: {quantity}")

        SCROLL_PAUSE_TIME = 3
        scroll_increment = 400
        
        for star_rating, num_comments in zip(star_ratings, num_comments_list):
            driver.get(url)  # Reload the page for each star rating
            time.sleep(3)
            
            #move_slider(driver)  # Move slider if needed
            
            while True:
                driver.execute_script(f"window.scrollBy(0, {scroll_increment});")
                time.sleep(SCROLL_PAUSE_TIME)
                
                try:
                    filter_container = WebDriverWait(driver, 1).until(
                        EC.visibility_of_element_located((By.CLASS_NAME, "oper"))
                    )
                    filter_option = filter_container.find_element(By.XPATH, ".//span[@class='condition']")
                    if filter_option.is_displayed():
                        filter_option.click()
                        break
                except Exception as e:
                    print(f"Filter not found yet, continue scrolling... {e}")

                current_scroll_position = driver.execute_script("return window.scrollY;")
                total_height = driver.execute_script("return document.body.scrollHeight;")
                
                if current_scroll_position + scroll_increment >= total_height:
                    print("Reached the bottom of the page. Stopping scrolling.")
                    break
            
            time.sleep(3)

            if star_rating is not None:
                try:
                    star_filter_options = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CLASS_NAME, "next-menu-content"))
                    )
                    star_options = star_filter_options.find_elements(By.CLASS_NAME, "next-menu-item")
                    if 1 <= star_rating <= len(star_options):
                        star_options[6 - star_rating].click()
                    else:
                        print(f"Invalid star rating: {star_rating}")
                    time.sleep(5)
                    
                    try:
                        iframe = WebDriverWait(driver, 10).until(
                            EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
                        )
                        print("Switched to iframe.")
                        
                        try:
                            slider = WebDriverWait(driver, 10).until(
                                EC.visibility_of_element_located((By.CSS_SELECTOR, "span#nc_1_n1z.btn_slide"))
                            )
                            
                            action = ActionChains(driver)
                            action.click_and_hold(slider).perform()
                            
                            slider_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').offsetWidth;")
                            container_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').parentElement.offsetWidth;")
                            total_distance = container_width - slider_width
                            
                            # Parameters for movement with random ranges
                            min_step_size = 5
                            max_step_size = 25
                            min_pause_time = 0.01
                            max_pause_time = 0.5
                            min_acceleration = 1.2
                            max_acceleration = 5
                            
                            current_position = 0
                            
                            while current_position < total_distance:
                                step_size = random.randint(min_step_size, max_step_size)
                                pause_time = random.uniform(min_pause_time, max_pause_time)
                                acceleration = random.uniform(min_acceleration, max_acceleration)
                                
                                move = min(step_size, total_distance - current_position)
                                action.move_by_offset(move, 0).perform()
                                time.sleep(pause_time)
                                
                                current_position += move
                                pause_time = max(pause_time / acceleration, min_pause_time)
                            
                            # Release the slider after moving
                            time.sleep(2)
                            action.release().perform()
                            print("Slider has been moved smoothly.")
                            driver.switch_to.default_content()
                            handle_error(driver)
                        except Exception as e:
                            print(f"An error occurred while handling the slider: {e}")
                    
                    except Exception as e:
                        print(f"An error occurred while handling the iframe: {e}")

                except Exception as e:
                    print(f"An error occurred while managing star ratings or iframe: {e}")

            time.sleep(5)
            page_source = driver.page_source
            soup = BeautifulSoup(page_source, 'html.parser')
            
            comments = soup.find_all('div', class_='item')
            comment_data = []

            while len(comment_data) < num_comments:
                for comment in comments:
                    if len(comment_data) >= num_comments:
                        break
                    user_element = comment.find('div', class_='middle').find('span')
                    message_element = comment.find('div', class_='content')
                    
                    user = user_element.text.strip() if user_element else ""
                    content = message_element.text.strip() if message_element else ""
                    
                    sentiment = "Positive" if star_rating in [4, 5] else "Neutral" if star_rating == 3 else "Negative" if star_rating in [0, 1, 2] else "-"
                    
                    if user and content and is_valid_comment(content):
                        comment_data.append({'id': len(comment_data) + 1,'Id_Item': Id_Item,'Item': thisitem, 'Quantity': quantity, 'User': user, 'Message': content, 'Sentiment': sentiment, 'Star Rating': star_rating})
                        print(f"Comment {len(comment_data)}: {thisitem} - {quantity} - {user} - {content} - {sentiment} - {star_rating}")

                if len(comment_data) >= num_comments:
                    break
                
                SCROLL_PAUSE_TIME = 3
                scroll_increment = 200
                driver.execute_script(f"window.scrollBy(0, {scroll_increment});")
                time.sleep(SCROLL_PAUSE_TIME)
                
                try:
                    next_button = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CLASS_NAME, 'next'))
                    )
                    if next_button.is_enabled():
                        next_button.click()
                        time.sleep(5)
                        
                        try:
                            iframe = WebDriverWait(driver, 10).until(
                                EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
                            )
                            print("Switched to iframe.")
                            
                            try:
                                slider = WebDriverWait(driver, 10).until(
                                    EC.visibility_of_element_located((By.CSS_SELECTOR, "span#nc_1_n1z.btn_slide"))
                                )
                                
                                action = ActionChains(driver)
                                action.click_and_hold(slider).perform()
                                
                                slider_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').offsetWidth;")
                                container_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').parentElement.offsetWidth;")
                                total_distance = container_width - slider_width
                                
                                # Parameters for movement with random ranges
                                min_step_size = 5
                                max_step_size = 25
                                min_pause_time = 0.01
                                max_pause_time = 0.5
                                min_acceleration = 1.2
                                max_acceleration = 5
                                
                                current_position = 0
                                
                                while current_position < total_distance:
                                    step_size = random.randint(min_step_size, max_step_size)
                                    pause_time = random.uniform(min_pause_time, max_pause_time)
                                    acceleration = random.uniform(min_acceleration, max_acceleration)
                                    
                                    move = min(step_size, total_distance - current_position)
                                    action.move_by_offset(move, 0).perform()
                                    time.sleep(pause_time)
                                    
                                    current_position += move
                                    pause_time = max(pause_time / acceleration, min_pause_time)
                                
                                # Release the slider after moving
                                time.sleep(2)
                                action.release().perform()
                                print("Slider has been moved smoothly.")
                                driver.switch_to.default_content()
                                handle_error(driver)
                            except Exception as e:
                                print(f"An error occurred while handling the slider: {e}")
                            
                            page_source = driver.page_source
                            soup = BeautifulSoup(page_source, 'html.parser')
                            comments = soup.find_all('div', class_='item')
                            
                        except Exception as e:
                            print(f"An error occurred while handling the iframe: {e}")

                    else:
                        print("Next button is not enabled or not found. Exiting loop.")
                        break
                
                except Exception as e:
                    print(f"An error occurred while managing the 'next' button or overall process: {e}")
                    break
            
            all_comments.extend(comment_data)
    
    driver.quit()
    df = pd.DataFrame(all_comments)
    
    return df

In [59]:
import undetected_chromedriver as uc 

def scrape_comments(urls, star_ratings, num_comments_list):
    all_comments = []
    #options = uc.ChromeOptions()
    
    Id_Item = 1  # Initialize the Id_Item
    
    for url, star_rating, num_comments in zip(urls, star_ratings, num_comments_list):
        if not isinstance(url, str):
            raise ValueError(f"URL must be a string, but got {type(url)}")
        
        driver = uc.Chrome(use_subprocess=False)
        driver.delete_all_cookies()
        driver.get(url)
        time.sleep(3)
        
        #move_slider(driver)  # Call the function to handle the slider if necessary
        
        # Close any popups or unwanted dialogs
        button_exit = driver.find_elements(By.CLASS_NAME, "sfo__close")
        if button_exit:
            button_exit[0].click()
        
        time.sleep(3)
        
        # Get the updated page source
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')
        
        h1_element = soup.find('div', class_='pdp-mod-product-badge-wrapper')
        thisitem = h1_element.text.strip() if h1_element else "Item not found"
        print("-------------------------------")
        print(f"Item : {thisitem}")
        
        h2_elements = soup.find_all('a', class_='pdp-link pdp-link_size_s pdp-link_theme_blue pdp-review-summary__link')
        quantity = h2_elements[0].text.strip() if h2_elements else "Quantity not found"
        quantity = quantity.replace("คะแนน", "").strip()
        print("-------------------------------")
        print(f"Quantity: {quantity}")

        SCROLL_PAUSE_TIME = 3
        scroll_increment = 400
        
        for star_rating, num_comments in zip(star_ratings, num_comments_list):
            driver.get(url)  # Reload the page for each star rating
            time.sleep(3)
            
            #move_slider(driver)  # Move slider if needed
            
            while True:
                driver.execute_script(f"window.scrollBy(0, {scroll_increment});")
                time.sleep(SCROLL_PAUSE_TIME)
                
                try:
                    filter_container = WebDriverWait(driver, 1).until(
                        EC.visibility_of_element_located((By.CLASS_NAME, "oper"))
                    )
                    filter_option = filter_container.find_element(By.XPATH, ".//span[@class='condition']")
                    if filter_option.is_displayed():
                        filter_option.click()
                        break
                except Exception as e:
                    print(f"Filter not found yet, continue scrolling... {e}")

                current_scroll_position = driver.execute_script("return window.scrollY;")
                total_height = driver.execute_script("return document.body.scrollHeight;")
                
                if current_scroll_position + scroll_increment >= total_height:
                    print("Reached the bottom of the page. Stopping scrolling.")
                    break
            
            time.sleep(3)

            if star_rating is not None:
                try:
                    star_filter_options = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CLASS_NAME, "next-menu-content"))
                    )
                    star_options = star_filter_options.find_elements(By.CLASS_NAME, "next-menu-item")
                    if 1 <= star_rating <= len(star_options):
                        star_options[6 - star_rating].click()
                    else:
                        print(f"Invalid star rating: {star_rating}")
                    time.sleep(5)
                    
                    try:
                        iframe = WebDriverWait(driver, 10).until(
                            EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
                        )
                        print("Switched to iframe.")
                        
                        try:
                            slider = WebDriverWait(driver, 10).until(
                                EC.visibility_of_element_located((By.CSS_SELECTOR, "span#nc_1_n1z.btn_slide"))
                            )
                            
                            action = ActionChains(driver)
                            action.click_and_hold(slider).perform()
                            
                            slider_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').offsetWidth;")
                            container_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').parentElement.offsetWidth;")
                            total_distance = container_width - slider_width
                            
                            # Parameters for movement with wider random ranges
                            step_size = random.choice([5, 10, 15, 20, 25])  # Random step size from a broader range
                            initial_pause_time = random.uniform(0.01, 0.5)  # Random initial pause time between 0.01 and 0.5 seconds
                            acceleration = random.uniform(1.2, 5)  # Random acceleration factor between 1.2 and 5
                            
                            current_position = 0
                            pause_time = initial_pause_time
                            
                            while current_position < total_distance:
                                move = min(step_size, total_distance - current_position)
                                action.move_by_offset(move, 0).perform()
                                time.sleep(pause_time)
                                current_position += move
                                pause_time = max(pause_time / acceleration, 0.01)
                            time.sleep(2)
                            action.release().perform()
                            print("Slider has been moved smoothly.")
                            driver.switch_to.default_content()
                            handle_error(driver)
                        except Exception as e:
                            print(f"An error occurred while handling the slider: {e}")
                    
                    except Exception as e:
                        print(f"An error occurred while handling the iframe: {e}")

                except Exception as e:
                    print(f"An error occurred while managing star ratings or iframe: {e}")

            time.sleep(5)
            page_source = driver.page_source
            soup = BeautifulSoup(page_source, 'html.parser')
            
            comments = soup.find_all('div', class_='item')
            comment_data = []

            while len(comment_data) < num_comments:
                for comment in comments:
                    if len(comment_data) >= num_comments:
                        break
                    user_element = comment.find('div', class_='middle').find('span')
                    message_element = comment.find('div', class_='content')
                    
                    user = user_element.text.strip() if user_element else ""
                    content = message_element.text.strip() if message_element else ""
                    
                    sentiment = "Positive" if star_rating in [4, 5] else "Neutral" if star_rating == 3 else "Negative" if star_rating in [0, 1, 2] else "-"
                    
                    if user and content and is_valid_comment(content):
                        comment_data.append({'id': len(comment_data) + 1, 'Id_Item': Id_Item, 'Item': thisitem, 'Quantity': quantity, 'User': user, 'Message': content, 'Sentiment': sentiment})
                        print(f"Comment {len(comment_data)}: {Id_Item} - {thisitem} - {quantity} - {user} - {content} - {sentiment} - {star_rating}")

                if len(comment_data) >= num_comments:
                    break
                
                SCROLL_PAUSE_TIME = 3
                scroll_increment = 200
                driver.execute_script(f"window.scrollBy(0, {scroll_increment});")
                time.sleep(SCROLL_PAUSE_TIME)
                
                try:
                    next_button = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CLASS_NAME, 'next-btn next-btn-normal next-btn-medium next-pagination-item next'))
                    )
                    if next_button.is_enabled():
                        next_button.click()
                        time.sleep(5)
                        
                        try:
                            iframe = WebDriverWait(driver, 10).until(
                                EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
                            )
                            print("Switched to iframe.")
                            
                            try:
                                slider = WebDriverWait(driver, 10).until(
                                    EC.visibility_of_element_located((By.CSS_SELECTOR, "span#nc_1_n1z.btn_slide"))
                                )
                                
                                action = ActionChains(driver)
                                action.click_and_hold(slider).perform()
                                
                                slider_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').offsetWidth;")
                                container_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').parentElement.offsetWidth;")
                                total_distance = container_width - slider_width
                                
                                # Parameters for movement with wider random ranges
                                step_size = random.choice([5, 10, 15, 20, 25])  # Random step size from a broader range
                                initial_pause_time = random.uniform(0.01, 0.5)  # Random initial pause time between 0.01 and 0.5 seconds
                                acceleration = random.uniform(1.2, 5)  # Random acceleration factor between 1.2 and 5
                                
                                current_position = 0
                                pause_time = initial_pause_time
                                
                                while current_position < total_distance:
                                    move = min(step_size, total_distance - current_position)
                                    action.move_by_offset(move, 0).perform()
                                    time.sleep(pause_time)
                                    current_position += move
                                    pause_time = max(pause_time / acceleration, 0.01)
                                time.sleep(2)
                                action.release().perform()
                                print("Slider has been moved smoothly.")
                                driver.switch_to.default_content()
                                handle_error(driver)
                            except Exception as e:
                                print(f"An error occurred while handling the slider: {e}")
                            
                            page_source = driver.page_source
                            soup = BeautifulSoup(page_source, 'html.parser')
                            comments = soup.find_all('div', class_='item')
                            
                        except Exception as e:
                            print(f"An error occurred while handling the iframe: {e}")

                    else:
                        print("Next button is not enabled or not found. Exiting loop.")
                        break
                
                except Exception as e:
                    print(f"An error occurred while managing the 'next' button or overall process: {e}")
                    break
            
            all_comments.extend(comment_data)
            Id_Item += 1
            
    driver.quit()
    df = pd.DataFrame(all_comments)
    
    return df

In [73]:
def scrape_comments(urls, star_ratings, num_comments_list):
    all_comments = []
    comment_id = 1  # Initialize global comment ID

    for index, url in enumerate(urls):
        if not isinstance(url, str):
            raise ValueError(f"URL must be a string, but got {type(url)}")
        
        Id_Item = index + 1  # Use index to set Id_Item, starting from 1
        
        driver = uc.Chrome(use_subprocess=False)
        driver.delete_all_cookies()
        driver.get(url)
        time.sleep(3)
        
        # Close any popups or unwanted dialogs
        button_exit = driver.find_elements(By.CLASS_NAME, "sfo__close")
        if button_exit:
            button_exit[0].click()
        
        time.sleep(3)
        
        # Get the updated page source
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')
        
        h1_element = soup.find('div', class_='pdp-mod-product-badge-wrapper')
        thisitem = h1_element.text.strip() if h1_element else "Item not found"
        print("-------------------------------")
        print(f"Item : {thisitem}")
        
        h2_elements = soup.find_all('a', class_='pdp-link pdp-link_size_s pdp-link_theme_blue pdp-review-summary__link')
        quantity = h2_elements[0].text.strip() if h2_elements else "Quantity not found"
        quantity = quantity.replace("คะแนน", "").strip()
        print("-------------------------------")
        print(f"Quantity: {quantity}")

        for star_rating, num_comments in zip(star_ratings, num_comments_list):
            driver.get(url)  # Reload the page for each star rating
            time.sleep(3)
            
            # Scroll to reveal star filter options
            SCROLL_PAUSE_TIME = 3
            scroll_increment = 400
            
            while True:
                driver.execute_script(f"window.scrollBy(0, {scroll_increment});")
                time.sleep(SCROLL_PAUSE_TIME)
                
                try:
                    filter_container = WebDriverWait(driver, 1).until(
                        EC.visibility_of_element_located((By.CLASS_NAME, "oper"))
                    )
                    filter_option = filter_container.find_element(By.XPATH, ".//span[@class='condition']")
                    if filter_option.is_displayed():
                        filter_option.click()
                        break
                except Exception as e:
                    print(f"Filter not found yet, continue scrolling... {e}")

                current_scroll_position = driver.execute_script("return window.scrollY;")
                total_height = driver.execute_script("return document.body.scrollHeight;")
                
                if current_scroll_position + scroll_increment >= total_height:
                    print("Reached the bottom of the page. Stopping scrolling.")
                    break
            
            time.sleep(3)

            if star_rating is not None:
                try:
                    star_filter_options = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CLASS_NAME, "next-menu-content"))
                    )
                    star_options = star_filter_options.find_elements(By.CLASS_NAME, "next-menu-item")
                    if 1 <= star_rating <= len(star_options):
                        star_options[6 - star_rating].click()
                    else:
                        print(f"Invalid star rating: {star_rating}")
                    time.sleep(5)
                    
                    try:
                        iframe = WebDriverWait(driver, 10).until(
                            EC.frame_to_be_available_and_switch_to_it((By.ID, "baxia-dialog-content"))
                        )
                        print("Switched to iframe.")
                        
                        try:
                            slider = WebDriverWait(driver, 10).until(
                                EC.visibility_of_element_located((By.CSS_SELECTOR, "span#nc_1_n1z.btn_slide"))
                            )
                            
                            action = ActionChains(driver)
                            action.click_and_hold(slider).perform()
                            
                            slider_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').offsetWidth;")
                            container_width = driver.execute_script("return document.querySelector('span#nc_1_n1z.btn_slide').parentElement.offsetWidth;")
                            total_distance = container_width - slider_width
                            
                            # Parameters for movement with wider random ranges
                            step_size = random.choice([5, 10, 15, 20, 25])
                            initial_pause_time = random.uniform(0.01, 0.5)
                            acceleration = random.uniform(1.2, 5)
                            
                            current_position = 0
                            pause_time = initial_pause_time
                            
                            while current_position < total_distance:
                                move = min(step_size, total_distance - current_position)
                                action.move_by_offset(move, 0).perform()
                                time.sleep(pause_time)
                                current_position += move
                                pause_time = max(pause_time / acceleration, 0.01)
                            time.sleep(2)
                            action.release().perform()
                            print("Slider has been moved smoothly.")
                            driver.switch_to.default_content()
                            handle_error(driver)
                        except Exception as e:
                            print(f"An error occurred while handling the slider: {e}")
                    
                    except Exception as e:
                        print(f"An error occurred while handling the iframe: {e}")

                except Exception as e:
                    print(f"An error occurred while managing star ratings or iframe: {e}")

            time.sleep(5)
            page_source = driver.page_source
            soup = BeautifulSoup(page_source, 'html.parser')
            
            comments = soup.find_all('div', class_='item')
            comment_data = []

            while len(comment_data) < num_comments:
                for comment in comments:
                    if len(comment_data) >= num_comments:
                        break
                    user_element = comment.find('div', class_='middle').find('span')
                    message_element = comment.find('div', class_='content')
                    
                    user = user_element.text.strip() if user_element else ""
                    content = message_element.text.strip() if message_element else ""
                    
                    sentiment = "Positive" if star_rating in [4, 5] else "Neutral" if star_rating == 3 else "Negative" if star_rating in [0, 1, 2] else "-"
                    
                    if user and content and is_valid_comment(content):
                        comment_data.append({
                            'id': comment_id,  # Use global comment_id
                            'Id_Item': Id_Item,
                            'Item': thisitem,
                            'Quantity': quantity,
                            'User': user,
                            'Message': content,
                            'Sentiment': sentiment,
                            'Star_rating': star_rating
                        })
                        print(f"Comment {comment_id}: {Id_Item} - {thisitem} - {quantity} - {user} - {content} - {sentiment} - {star_rating}")
                        
                        comment_id += 1  # Increment the global comment_id

                if len(comment_data) >= num_comments:
                    break
                
                driver.execute_script(f"window.scrollBy(0, {scroll_increment});")
                time.sleep(SCROLL_PAUSE_TIME)
                
                try:
                    next_button = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CLASS_NAME, 'next-btn next-btn-normal next-btn-medium next-pagination-item next'))
                    )
                    if next_button.is_enabled():
                        next_button.click()
                        time.sleep(5)
                        
                        page_source = driver.page_source
                        soup = BeautifulSoup(page_source, 'html.parser')
                        comments = soup.find_all('div', class_='item')
                    else:
                        print("Next button is not enabled or not found. Exiting loop.")
                        break
                
                except Exception as e:
                    print(f"An error occurred while managing the 'next' button or overall process: {e}")
                    break
            
            all_comments.extend(comment_data)
            
    driver.quit()
    df = pd.DataFrame(all_comments)
    
    return df

In [75]:
# Example data
urls = [
    'https://www.lazada.co.th/products/the-whispercod-10pcs-mlxl-i5072137207-s21416264761.html',
    'https://www.lazada.co.th/products/uniqare-10-1-1000-mlxl-i4889182334-s20579202249.html'
]

# Corresponding star ratings for each URL (example: 4-star and 5-star filters)
star_ratings = [5,3]  # Each URL might need different star ratings

# Corresponding number of comments to scrape for each star rating
num_comments_list = [2,2]  # Adjust based on your needs

# Call the function
df = scrape_comments(urls, star_ratings, num_comments_list)

# Display the resulting DataFrame
print(df)

-------------------------------
Item : 【The whisper】COD ผ้าอ้อมผู้ใหญ่ 10PCS M/L/XL การดูดซับที่ดี รุ่นบางเบาระบายอากาศดี ใช้ได้ทั้งชายหญิง
-------------------------------
Quantity: 564
An error occurred while handling the iframe: Message: 
Stacktrace:
0   undetected_chromedriver             0x000000010c9e32a8 undetected_chromedriver + 5096104
1   undetected_chromedriver             0x000000010c9daada undetected_chromedriver + 5061338
2   undetected_chromedriver             0x000000010c565fd0 undetected_chromedriver + 389072
3   undetected_chromedriver             0x000000010c5b29ab undetected_chromedriver + 702891
4   undetected_chromedriver             0x000000010c5b2a81 undetected_chromedriver + 703105
5   undetected_chromedriver             0x000000010c5f6cd4 undetected_chromedriver + 982228
6   undetected_chromedriver             0x000000010c5d6f1d undetected_chromedriver + 851741
7   undetected_chromedriver             0x000000010c5f4250 undetected_chromedriver + 971344
8   undet

In [81]:
# Check if there are comments to be saved to Excel
if not df.empty:
    # Get the path to the Downloads folder
    downloads_path = os.path.join(os.path.expanduser("~"), "Downloads")
    # Save DataFrame to Excel file in the Downloads folder
    excel_file_path = os.path.join(downloads_path, "comments1.xlsx")
    df.to_excel(excel_file_path, index=False, na_rep='-')
    print("Comments saved to Excel.")

Comments saved to Excel.
