In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import pandas as pd
import time
import re  # Import regular expressions module
import json

# Set up Selenium and open the page
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Run browser in headless mode
service = Service(r"C:\Users\User\Downloads\chromedriver.exe")  # Path to ChromeDriver
driver = webdriver.Chrome(service=service, options=options)

# Define the base URL
base_url = 'https://www.ebay.com/b/Cell-Phones-Smartphones/9355/bn_320094'

driver.get(base_url)

# Allow the page to load
time.sleep(3)

# Initialize the lists for storing data
data = []
item_numbers = []  # List to store item numbers

page_number = 1  # Start from the first page

while True:
    print(f"Scraping page {page_number}...")

    # Get the page source and pass it to BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Scrape the first 200 items on the page
    items = soup.find_all('li', class_='s-item', limit=200)

    for item in items:
        title = item.find('h3', class_='s-item__title')
        price = item.find('span', class_='s-item__price')
        link = item.find('a', class_='s-item__link')['href']

        # Extract the item number using a regular expression
        match = re.search(r'/itm/(\d+)', link)
        if match:
            item_number = match.group(1)
            clean_link = f'https://www.ebay.com/itm/{item_number}'  # Extract the dynamic item number
            item_numbers.append(item_number)  # Store the item number

        # Check if title and price exist to avoid NoneType errors
        if title and price and match:
            data.append({
                'Title': title.text,
                'Price': price.text,
                'Link': clean_link
            })

    # Try to find the "Next" button on the page to go to the next page
    next_button = soup.find('a', class_='pagination__next')

    # If no next button is found, we've reached the last page
    if not next_button:
        break

    # If the "Next" button is found, click it to go to the next page
    next_page_link = next_button['href']
    driver.get(next_page_link)

    # Allow the next page to load
    time.sleep(3)

    page_number += 1  # Increment the page number

# Close the browser
driver.quit()

# Save the data into a CSV file
df = pd.DataFrame(data)
df.to_csv('Link_ebay_scraped_data.csv', index=False)
print("Scraping completed and data saved to Link_ebay_scraped_data.csv")

# Save the item numbers to a .txt file
with open('item_numbers1.txt', 'w') as f:
    f.write(','.join(item_numbers))

# Save the item numbers to a .json file
with open('item_numbers1.json', 'w') as f:
    json.dump(item_numbers, f)

print(f"{len(item_numbers)} item numbers have been extracted and saved to item_numbers.txt and item_numbers.json")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping completed and data saved to Link_ebay_scraped_data.csv
182 item numbers have been extracted and saved to item_numbers.txt and item_numbers.json
