## Scrap Data from EBay

In [2]:
## Import the libraries

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options

from selenium.webdriver.common.by import By # to find tags or elements
from selenium.webdriver.support.ui import WebDriverWait # a good way to wait
from selenium.webdriver.support import expected_conditions as EC # for scrolling

import time
import pandas as pd
import os, re

In [3]:
## Function to get product details
def get_product_details(driver):
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Extract product details
    feature_list = soup.find('div', class_ = 'kdwhd-description').find_all('ul')
    features = []
    if feature_list:
        feature_items = feature_list[0].find_all('li')
        for feature in feature_items:
            features.append(feature.get_text(strip=True))

    product_details = {
        'product_name': soup.find('span', class_='ux-textspans ux-textspans--BOLD').text.strip(),
        # 'brand': soup.find('div', class_='-pvxs').find_all('a')[0].text.strip(),
        'price': int(re.sub(r'[^\d]', '', soup.find('div', class_='x-price-primary').find('span').text.strip()))*1500,
        # 'description': soup.find('div', class_='markup -mhm -pvl -oxa -sc').text.strip(),
        'features': '. '.join(features) + '.',
        'specification': soup.find('div', class_ = 'kdwhd-description').find('p').text.strip()
    }

    return product_details

In [4]:
## Get Ebay product reviews
def get_ebay_reviews(driver):
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    feedback_container = soup.find('ul', class_ = "fdbk-detail-list__cards")
    all_feedbacks = feedback_container.find_all('li', class_ = "fdbk-container")

    ebay_reviews = []

    for feedbacks in all_feedbacks:
        # Review Date
        try:
            review_date = feedbacks.find('span', class_ = "fdbk-container__details__info__divide__time").text
        except AttributeError:
            review_date = ''

        # Reviewer Name
        try:
            name_text = feedbacks.find('div', class_ = "fdbk-container__details__info__username").text
            reviewer_name = name_text.split('-')[0]
        except AttributeError:
            reviewer_name = ''

        # Reviewer Comment
        try:
            reviewer_comment = feedbacks.find('div', class_ = 'fdbk-container__details__comment').text
        except AttributeError:
            reviewer_comment = ''

        ebay_reviews.append({
            'review_date' : review_date,
            'reviewer_name' : reviewer_name,
            'reviewer_comment' : reviewer_comment
        })

    return ebay_reviews

In [8]:
# Run the scrap function
def ebay_scrap(product_url):
    # Configure Selenium EdgeDriver options
    options = Options()
    options.use_chromium = True
    options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.59") # code to prevent the website from detecting bot activity
    options.add_argument("--disable-blink-features=AutomationControlled") # Disable the automation line at the top of the browser
    options.add_argument('headless')
    options.add_argument('disable-gpu')
    service = Service(executable_path=r'Driver\msedgedriver.exe') # Always check to confirm that the version of edge driver matches the version or MS Edge browser
    driver = webdriver.Edge(service=service, options=options) # Initialize the webdriver

    driver.get(product_url)
    time.sleep(8) # To load the product page

    # Get the product details
    product_details = get_product_details(driver)
    product_name = re.split(r"[\'\"]", product_details['product_name'])[0]

    # Get the 'see more reviews' button
    all_feedback_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
        (By.XPATH, '//a[@class="fdbk-detail-list__tabbed-btn fake-btn fake-btn--large fake-btn--secondary"]')
    ))

    # scroll down to where the button is
    driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", all_feedback_button) # Scroll the page to view the button for Next Page
    time.sleep(2)  # Wait for the scroll

    # click the button
    all_feedback_button.click()
    time.sleep(5)

    # The ewview opens up on another browser tab. Change driver focus to point to new page
    driver.switch_to.window(driver.window_handles[1])
    time.sleep(3)

    # Scrape all reviews - using a while loop
    all_product_reviews = []

    while True:
        d_reviews = get_ebay_reviews(driver)
        all_product_reviews.extend(d_reviews)

        try:
            # Get the next page button
            next_page_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, '//a[@class="pagination__next icon-link" and @aria-label="Next page"]')
            ))
            # driver.find_element(By.XPATH, '//a[@aria-label="Next Page"]')

            # Scroll to the next page button area
            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", next_page_button)
            time.sleep(2)  # Wait for the scroll

            next_page_button.click() # Clicks the next page button
            time.sleep(5)  # Wait for the next page to load

        except Exception as e:
            print("No more pages to load")
            break

    driver.quit()

    # Save to CSV
    output_dir = r'Reviews\Ebay'
    file_name = f'{product_name}_reviews.csv'
    output_path = os.path.join(output_dir, file_name)
    os.makedirs(output_dir, exist_ok=True)

    product_df = pd.DataFrame(all_product_reviews)
    product_df.to_csv(output_path, index=False)
    print(f'{len(product_df)} {product_name} reviews successfully written to {output_path}! Nice work!!!')


In [None]:
# Update the start URL
start_url = 'https://www.ebay.com/itm/254462710891?_skw=phone&epid=12019864188&itmmeta=01JMEAZEY3GMWKERYX7PYB1HCH&hash=item3b3f28d46b:g:tywAAOSwIpReYWMu&itmprp=enc%3AAQAKAAAA4FkggFvd1GGDu0w3yXCmi1cBdigaVfzLvfokvWKNvNFT%2B6qzkuicL0D%2FGnbsagx4u9M8pSJvwd7CZutUHbKvV2CfBHpjndCQ6zaEqqBLgGVmp%2BQBfyMXrJ3Qo6Xy3Bn3Z20g0GfJQM1ddiFL%2F6Kkv31Acl8weR%2FhErhCfvu0pW160YevJV2MhvHZIst%2FcqsS3Ybf4WgTJ5sb69yobsuhbh5JQsKF61w355FA5aLovsbkuqzH6XrMIaA0yI3HoDXf6wSB1vmOSlLsTi9l2%2BLL%2BZvuaLF%2BkV3r15PXKvWrE9hq%7Ctkp%3ABk9SR-jw_cqjZQ'
# less review data - 'https://www.ebay.com/itm/385169573354?_skw=laptop+stand&itmmeta=01JKZQE3TG4DVJXYE7P5XDHPT0&hash=item59ade545ea:g:C-YAAOSw545i3usQ&itmprp=enc%3AAQAKAAAA0FkggFvd1GGDu0w3yXCmi1c%2Bw0OO6Zxtqq3No1Eqy4Ai8szJzA7faigTtPc%2FqhI8jKb9XsQo2NVfAOnfFOCDgRg9NcQ6qGjzopi0sinFhZHhsf79jAqtePIUbsTdXBQ7Q7ljUdbLStVWPyTq0uHuJp%2F3EUuNOggxP0rrpKW2dqgsneQ4RYNnCWrCO1iR2kEG20o82DNvspVMfUAZJkObB4wY0or%2FucpGjjDUwGyZCGdx3qy80gWdx3tNAyoPqOurVyNd3EEI6soQe7CE7b4GpcI%3D%7Ctkp%3ABk9SR8S9uPefZQ'
# more review data - 'https://www.ebay.com/itm/385305869021?_skw=laptop+stand&itmmeta=01JKZQE3TGP5HCCWH64GZV6QX7&hash=item59b604fadd:g:lhgAAOSwzTJjolF5&itmprp=enc%3AAQAKAAAA0FkggFvd1GGDu0w3yXCmi1fmooQExts6zjYVSHvCsMZ%2FzF%2F8B6kR6bxlLKiMyCSmW9Bu8dy1cam%2BioAU%2Fz1Dk3ToONgmkfJZu%2BJ6tdiSotvjMJWO9%2FcDzg531PVtEfZCwyNlkjGiZIaaFasJFLHv7jQr4UqzacL%2FSLx2Fy3TcoS8Nh%2B470zyhgfZrfpff%2FRAD0vBHpDPuSoHFpZctDfjncBgm%2BPuuI21GItBP0bP8MMc4pOchbuYPoyXebEHHIXBMVA5RczE5pCTQoP6wsp%2BZro%3D%7Ctkp%3ABk9SR8S9uPefZQ'
# product_name = 'Laptop Stand Alluminium'

ebay_scrap(start_url)