In [1]:
import time
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

In [None]:
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")  # Open browser in full-screen
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# ✅ *Step 2: Login to 1688*
login_url = "https://login.1688.com/"
driver.get(login_url)

# Wait for login (you may need to log in manually if CAPTCHA appears)
input("🔹 Please log in manually and press Enter here when done...")

# ✅ *Step 3: Start Scraping Orders*
base_url = "https://trade.1688.com/order/buyer_order_list.htm?spm=a360q.8274423.0.0.7c864c9aIL0Os1&scene_type=&source=&old_buyer_order_list=y&purchase_company_name=null&product_name=&start_date=&end_date=&seller_login_id=&trade_status=&trade_type_search=&biz_type_search=&order_id_search=&is_his=y&is_hidden_canceled_offer=&apt=&related_code=&order_settle_flag=&company_name=&keywords=&receiver_tel=&receiver_name=&buyer_name=&down_stream_order_id=&batch_number=&total_fee=&page={}&page_size=30"
num_pages = 68  # Adjust as needed

data = []
for page in range(1, num_pages + 1):
    print(f"Scraping page {page}...")

    # Open the orders page
    driver.get(base_url.format(page))
    time.sleep(5)  # Wait for the page to load

    # Get page source and parse with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Find all orders
    order_list = soup.find("div", id="orderList")
    list_box = order_list.find("div", id="listBox") if order_list else None
    orders = list_box.find_all("li", class_="order-item") if list_box else []

    for order in orders:
        title = order.find('div', class_='order-title')

        # extract order number
        order_id_span = title.find("span", class_="order-id") if title else None
        order_number = order_id_span.text.replace("订单号：", "").strip() if order_id_span else "No Order ID"

        # extract order date
        date_span = title.find('span', class_='date')
        date = date_span.text.strip() if date_span else 'No date'

        # extract supplier's detail
        supplier_tag = title.find('a', class_='bannerCorp')

        # extract the supplier name
        supplier_name = supplier_tag['data-copytitle'] if supplier_tag else 'No name'

        # extract supplier's link
        supplier_link = supplier_tag['href'] if supplier_tag else "No link"

        # extract detaild of each item
        order_detail = order.find('div', class_='order-detail')

        
        # extract total paid
        total_td = order_detail.find('td', class_='s6')

        # without discount
        total_no_discount_tag = total_td.find('div', class_='sum-payment-original text-lesser') if total_td else None
        total_no_discount = total_no_discount_tag.text.strip() if total_no_discount_tag else 'No total'

        # with discount
        total_w_discount_tag = total_td.find('div', class_='total') if total_td else None
        total_w_discount = total_w_discount_tag.text.strip() if total_w_discount_tag else 'No total'

        # Extract item in the order
        items = order_detail.find_all('tr', style=True) if order_detail else []

        for item in items:
            # extract image_url
            image_td = item.find('td', class_='s1')
            image_tag = image_td.find('img') if image_td else None
            image = 'D:/download' + image_tag['src'] if image_tag else 'No image'

            # extract product link
            product_td = item.find("td", class_="s2")
            product_link_tag = product_td.find("a", class_="productName") if product_td else None
            product_link = product_link_tag["href"] if product_link_tag else "No Link"

            # extract price
            price_td = item.find('td', class_='s3')
            price_tag = price_td.find('div', {"data-unit-price": "isbuyer"}) if price_td else None
            price = price_tag.text.strip() if price_tag else 'No price'

            # extract quantity
            quantity_td = item.find('td', class_='s4')
            quantity = quantity_td.text.strip() if quantity_td else 'No quantity'

            data.append([order_number, date, supplier_name, supplier_link, image, product_link, price, quantity, total_no_discount, total_w_discount])

# ✅ *Step 4: Save Data to CSV*
with open("all_orders_2022.csv", "w", newline="", encoding="utf-8-sig") as file:
    writer = csv.writer(file)
    writer.writerow(['order_id', 'date', 'supplier', 'supplier_link', 'image', 'product_link', 'price', 'quantity', 'total_before_discount', 'total_after_discount'])
    writer.writerows(data)

print("✅ CSV file created successfully!")

# ✅ *Step 5: Close Browser*
driver.quit()
