In [172]:
import time
import random
import re
import csv
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [173]:
SEARCH_QUERIES = ["laptop","smartphone","tablet","gaming mouse"]
NUM_PAGES = 5
MAX_PRODUCTS_PER_PAGE = 60

In [174]:
USER_AGENT ="Mozilla/5.0 (X11; Linux; en-AU; rv:135.0) Gecko/20161700 Firefox/135.0"

In [175]:
def human_typing(element, text):
    for char in text:
        element.send_keys(char)
        time.sleep(random.uniform(0.2, 0.5))


In [176]:
def close_popup_if_present(driver):
    try:
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, ".pop-close-btn"))
        )
        close_btn = driver.find_elements(By.CSS_SELECTOR, ".pop-close-btn")[0]
        close_btn.click()
        print("[x] Closed popup")
        time.sleep(1)
    except:
        print("[ ] No popup found")

In [177]:
def human_scroll(driver, total_scrolls=6, scroll_delay_range=(1, 2.5), scroll_height_step=700):
    for i in range(total_scrolls):
        scroll_by = scroll_height_step + random.randint(-100, 100)
        driver.execute_script(f"window.scrollBy(0, {scroll_by});")
        print(f"Scrolled by {scroll_by}px")
        time.sleep(random.uniform(*scroll_delay_range))

In [178]:
from datetime import datetime

def get_products_from_page(driver,max_products: int = 30)-> list[dict]:

    """
    Scrapes product information from the current AliExpress results page.

    Args:
        driver: The Selenium WebDriver instance.
        max_products: Maximum number of products to extract from the page.

    Returns:
        A list of dictionaries containing product data.
    """
    products = []
    product_cards = driver.find_elements(By.CSS_SELECTOR, ".hy_hz .hy_bn.search-item-card-wrapper-gallery")
    print(f"  [DEBUG] Found {len(product_cards)} products on current page")

    count = 0
    for card in product_cards:
        if count >= MAX_PRODUCTS_PER_PAGE:
            break
        try:
            # Product name
            name_elem = card.find_elements(By.CSS_SELECTOR, "h3.kt_ki")
            name = name_elem[0].text.strip() if name_elem else "N/A"

            # Product link
            link_elem = card.find_elements(By.TAG_NAME, "a")
            link = link_elem[0].get_attribute("href") if link_elem else "N/A"

            current_price_container = card.find_elements(By.CSS_SELECTOR, ".kt_lg")
            if current_price_container:
                spans = current_price_container[0].find_elements(By.TAG_NAME, "span")
                current_price_text = ''.join(span.text for span in spans).strip()
            else:
                price_text = "N/A"

            clean_current_price = re.sub(r"[^\d.]", "", current_price_text)
            current_price_value = float(clean_current_price) if clean_current_price else None

            original_price_container = card.find_elements(By.CSS_SELECTOR, ".kt_lh")
            if original_price_container:
                original_price_text = original_price_container[0].find_elements(By.TAG_NAME, "span")[0].text.strip()
            else:
                price_text = "N/A"                

            clean_original_price = re.sub(r"[^\d.]", "", original_price_text)
            original_price_value = float(clean_original_price) if clean_original_price else None


            # Rating
            rating_elem = card.find_elements(By.CSS_SELECTOR, ".kt_ll .kt_j1")
            rating = rating_elem[0].text.strip() if rating_elem else "N/A"

            # Orders
            order_elem = card.find_elements(By.CSS_SELECTOR, ".kt_j7")
            orders_text = order_elem[0].text.strip() if order_elem else "N/A"

            # Thumbnail image
            image_elem = card.find_elements(By.CSS_SELECTOR, ".mv_mx img.mv_ei")
            image_url = image_elem[0].get_attribute("src") if image_elem else "N/A"


            # Timestamp
            timestamp = datetime.now().isoformat()

            # Add to product list
            products.append({
                "name": name,
                "url": link,
                "original_price": original_price_value,
                "current_price": current_price_value,
                "rating": rating,
                "orders": orders_text,
                "thumbnail": image_url,
                "scrape_time": timestamp
            })

            print(f"  [+] {name} | {price_value} | {link}")
            count += 1

        except Exception as e:
            print(f"  [!] Skipped product due to error: {e}")
            continue

    return products


In [179]:
options = uc.ChromeOptions()
options.add_argument(f'user-agent={USER_AGENT}')
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--mute-audio")
options.add_argument("--start-maximized")

In [180]:
driver = uc.Chrome(options=options)
wait = WebDriverWait(driver, 15)


In [None]:
# SCRAPE EACH QUERY
for query in SEARCH_QUERIES:
    all_products = []
    try:
        print(f"\n=== Starting search for '{query}' ===")
        driver.get("https://www.aliexpress.com/?lan=en&shipCountry=US")
        time.sleep(5)
        close_popup_if_present(driver)

        search_box = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".search--keyword--15P08Ji")))
        search_box.clear()
        human_typing(search_box, query)
        time.sleep(1)
        search_box.send_keys(Keys.RETURN)
        time.sleep(4)

        for page_num in range(1, NUM_PAGES + 1):
            print(f"Scraping Page {page_num} for '{query}'")
        
            human_scroll(driver)
            time.sleep(2)
        
            page_products = get_products_from_page(driver, max_products=MAX_PRODUCTS_PER_PAGE)
            all_products.extend(page_products)
        
            # Go to next page
            if page_num < NUM_PAGES:
                try:
                    for _ in range(8):
                        driver.execute_script("window.scrollBy(0, 1000);")
                        time.sleep(1)
        
                    pagination_buttons = driver.find_elements(By.CSS_SELECTOR, ".comet-pagination-next .comet-pagination-item-link")
                    if pagination_buttons:
                        next_button = pagination_buttons[0]
                        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", next_button)
                        time.sleep(random.uniform(1.5, 3))
                        next_button.click()
                        print("Clicked Next Page")
                        time.sleep(3)
                    else:
                        print("No pagination buttons found.")
                        break
                except Exception as e:
                    print(f"Could not click next page: {e}")
                    break


    except Exception as e:
        print(f"[!] Error during search for '{query}': {e}")

    # Save results
    filename = f"{query}_aliexpress_products.csv"
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=["name","url","original_price","current_price","rating","orders","thumbnail","scrape_time"])
        writer.writeheader()
        writer.writerows(all_products)

    print(f"Finished '{query}'. {len(all_products)} products saved to {filename}")

driver.quit()
print("\n All done!")