In [1]:
# Import necessary modules
from selenium import webdriver  # Controls the Chrome browser
from selenium.webdriver.common.by import By  # Helps locate elements on a webpage
import time  # Used to add delays in execution
import json  # Handles JSON data parsing
import csv  # Provides functionality for writing CSV files
import re  # Provides support for regular expressions

# Define the URL of the Lazada product page
url = "https://www.lazada.com.my/products/aiyishiswiss-original-genuine-automatic-mechanical-watch-mens-waterproof-luminous-calendar-imported-movement-business-steel-watch-i3115027473-s15622291423.html?pvid=c2637641-278d-487f-a76a-a48f0dbd34e0&search=jfy&scm=1007.45039.429976.0&priceCompare=skuId%3A15622291423%3Bsource%3Atpp-recommend-plugin-32104%3Bsn%3Ac2637641-278d-487f-a76a-a48f0dbd34e0%3BoriginPrice%3A2224%3BdisplayPrice%3A2224%3BsinglePromotionId%3A900000037192458%3BsingleToolCode%3ApromPrice%3BvoucherPricePlugin%3A0%3Btimestamp%3A1742043396081&spm=a2o4k.homepage.just4u.d_3115027473"
# Use a regular expression to extract the item ID from the URL
match = re.search(r"i(\d+)-s", url)
item_id = match.group(1)  # Extract the matched item ID

# Configure Selenium WebDriver options
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")  # Open Chrome in maximized mode
options.add_argument("--disable-blink-features=AutomationControlled")  # Prevent detection by websites

# Initialize the Selenium WebDriver (Chrome)
driver = webdriver.Chrome(options=options)

# Construct the Lazada review API URL using the extracted item ID
base_url = f'https://my.lazada.com.my/pdp/review/getReviewList?itemId={item_id}&pageSize=5&filter=0&sort=0&pageNo='

# Open the first review page to allow manual CAPTCHA solving
driver.get(base_url + "1")

# 🛑 Pause execution and wait for the user to solve the CAPTCHA manually
input("Solve the CAPTCHA in the opened browser, then press Enter to continue...")

# Open a CSV file to store the reviews
with open("SwissWatchReview.csv", "w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Buyer Name", "Review", "Date"])  # Write CSV header

    # Loop through the first 5 pages of reviews
    for page in range(1, 6):
        url = base_url + str(page)  # Construct the URL for the current page
        print(f"Fetching: {url}")  # Display the URL being fetched
        driver.get(url)  # Open the page in the browser
        time.sleep(3)  # Wait for the page to load

        # Extract the page content (which is in JSON format)
        page_source = driver.find_element(By.TAG_NAME, "body").text
        data = json.loads(page_source)  # Parse the JSON response

        # Extract reviews from "items", ensuring only top-level reviews are considered
        reviews = [review for review in data.get("model", {}).get("items", []) if "reviewTime" in review]

        # Loop through each review and extract relevant details
        for review in reviews:
            writer.writerow([
                review.get("buyerName", "N/A"),  # Extract buyer name
                review.get("reviewContent", "N/A").replace("\n", " "),  # Extract and clean review content
                review.get("reviewTime", "N/A")  # Extract review date
            ])

print("Reviews saved to SwissWatchReview.csv")  # Confirm successful saving

# Close the Selenium WebDriver
driver.quit()

Solve the CAPTCHA in the opened browser, then press Enter to continue... 


Fetching: https://my.lazada.com.my/pdp/review/getReviewList?itemId=3115027473&pageSize=5&filter=0&sort=0&pageNo=1
Fetching: https://my.lazada.com.my/pdp/review/getReviewList?itemId=3115027473&pageSize=5&filter=0&sort=0&pageNo=2
Fetching: https://my.lazada.com.my/pdp/review/getReviewList?itemId=3115027473&pageSize=5&filter=0&sort=0&pageNo=3
Fetching: https://my.lazada.com.my/pdp/review/getReviewList?itemId=3115027473&pageSize=5&filter=0&sort=0&pageNo=4
Fetching: https://my.lazada.com.my/pdp/review/getReviewList?itemId=3115027473&pageSize=5&filter=0&sort=0&pageNo=5
Reviews saved to SwissWatchReview.csv
