In [7]:
import time
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup


# Initialize the WebDriver
driver = webdriver.Chrome()

# Function to scrape a page and extract product data
def scrape_page(url, page_num):
    driver.get(url)
    time.sleep(3)  # Wait for the page to load
    
    # Parse the page source with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Find all product divs in the list
    product_divs = soup.find_all('div', class_='product-info')
    
    if not product_divs:
        return None
    
    products = []
    for product_div in product_divs:
        try:
            # Extract product details
            product_name = product_div.find('h3', class_='product-title').get_text(strip=True)
            original_price = product_div.find('p', class_='product-market-price').get_text(strip=True)
            discounted_price = product_div.find('div', class_='product-price-main').get_text(strip=True)

            # Convert all fields to uppercase
            products.append({
                'Product Name': product_name.upper(),
                'Original Price': original_price.upper(),
                'Discounted Price': discounted_price.upper()
            })
        except Exception as e:
            print(f"Error extracting product info: {e}")
    
    return products

# Function to check if there's a next page
def has_next_page(page_num):
    try:
        next_button = driver.find_element(By.XPATH, f"//a[contains(@href, '?page={page_num + 1}')]")
        return next_button.is_displayed() and next_button.is_enabled()
    except Exception:
        return False

# Function to scrape a given category
def scrape_category(base_url, category_name):
    page_num = 1
    all_products = []

    print(f"Scraping category: {category_name}")

    while True:
        url = f"{base_url}?page={page_num}"
        print(f"Scraping page {page_num}...")
        
        products = scrape_page(url, page_num)
        if products is None:
            print(f"No products found on page {page_num}, stopping.")
            break
        
        all_products.extend(products)

        if not has_next_page(page_num):
            print("No more pages to scrape.")
            break
        
        page_num += 1

    return all_products

# Function to write products to a CSV file
def write_to_csv(products, filename):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=['Product Name', 'Original Price', 'Discounted Price'])
        writer.writeheader()
        for product in products:
            writer.writerow(product)

# Base URLs for scraping
vga_base_url = 'https://nguyencongpc.vn/vga-card-man-hinh'
cpu_base_url = 'https://nguyencongpc.vn/cpu-bo-vi-xu-ly'

# Scrape VGA (GPU) and CPU categories
vga_products = scrape_category(vga_base_url, "VGA")
cpu_products = scrape_category(cpu_base_url, "CPU")

# Write products to separate CSV files
write_to_csv(vga_products, 'NguyenCong_GPU_list.csv')
write_to_csv(cpu_products, 'NguyenCong_CPU_list.csv')

# Close the WebDriver
driver.quit()



Scraping category: VGA
Scraping page 1...
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Scraping page 2...
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extracting product info: 'NoneType' object has no attribute 'get_text'
Error extractin