## Scrap Data from EBay

In [48]:
## Import the libraries

from bs4 import BeautifulSoup
import requests as rq
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options

from selenium.webdriver.common.by import By # to find tags or elements
from selenium.webdriver.support.ui import WebDriverWait # a good way to wait
from selenium.webdriver.support import expected_conditions as EC # for scrolling

import time
import pandas as pd
import os, re
from datetime import datetime, timedelta

In [54]:
## Function to get product details
def get_product_details(driver):
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    product_details = {
        'product_name': '',
        'price': 0,
        'features': '',
        'specification': ''
    }

    # Product Name - Adjust this selector if needed
    product_name_elem = soup.find('span', class_='ux-textspans ux-textspans--BOLD')
    if product_name_elem:
        product_details['product_name'] = product_name_elem.text.strip()

    # Price - Adjust this selector if needed
    price_elem = soup.find('div', class_='x-price-primary')
    if price_elem:
        price_span = price_elem.find('span')
        if price_span:
            price_text = price_span.text.strip()
            price_match = re.search(r'\d+\.\d+', price_text)
            if price_match:
                price = float(price_match.group())
                product_details['price'] = price

    # Brand
    brand_dl = soup.find('dl', class_='ux-labels-values--brand')
    if brand_dl:
        brand_value = brand_dl.find('dd', class_='ux-labels-values__values')
        if brand_value:
            product_details['brand'] = brand_value.find('span', class_='ux-textspans').text.strip()


    specifics_section = soup.find('div', class_='ux-layout-section--features')
    if not specifics_section:
        print("ux-layout-section--features not found")
        #return product_details

    dl_elements = specifics_section.find_all('dl', class_='ux-labels-values')

    spec_pairs = []

    for dl in dl_elements:
        label = dl.find('dt', class_='ux-labels-values__labels')
        value = dl.find('dd', class_='ux-labels-values__values')
        
        if label and value:
            label_text = label.find('span', class_='ux-textspans').text.strip()
            value_text = value.find('span', class_='ux-textspans').text.strip()

            if label_text.lower() == 'features':
                product_details['features'] = value_text + '.'
            else:
                spec_pairs.append(f"{label_text}: {value_text}")

    if spec_pairs:
        product_details['specification'] = ', '.join(spec_pairs)

    return product_details

In [55]:
## Get Ebay product reviews
def get_ebay_reviews(driver):
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    feedback_container = soup.find('ul', class_ = "fdbk-detail-list__cards")
    all_feedbacks = feedback_container.find_all('li', class_ = "fdbk-container")

    ebay_reviews = []

    for feedbacks in all_feedbacks:
        # Review Date
        try:
            review_date = feedbacks.find('span', class_ = "fdbk-container__details__info__divide__time").text
        except AttributeError:
            review_date = ''

        # Reviewer Name
        try:
            name_text = feedbacks.find('div', class_ = "fdbk-container__details__info__username").text
            reviewer_name = name_text.split('-')[0]
        except AttributeError:
            reviewer_name = ''

        # Reviewer Comment
        try:
            reviewer_comment = feedbacks.find('div', class_ = 'fdbk-container__details__comment').text
        except AttributeError:
            reviewer_comment = ''

        ebay_reviews.append({
            'review_date' : review_date,
            'reviewer_name' : reviewer_name,
            'reviewer_comment' : reviewer_comment
        })

    return ebay_reviews

In [56]:
## Function to Transform the datafram before saving
def transform_df(df):
    s_df = df.copy()
    new_df = pd.DataFrame()
    
    new_df['Brand'] = s_df['brand']
    new_df['Product Name'] = s_df['product_name']
    new_df['Price'] = s_df['price'] * 1500 # USD rate
    new_df['Features'] = s_df['features']
    new_df['Specification'] = s_df['specification']
    new_df['Reviewer Name'] = [f'Reviewer_{i+1}' for i in range(len(s_df))]

    current_date = datetime.now()
    def transform_date(review_date):
        review_date = review_date.lower().strip() if isinstance(review_date, str) else ''
        if 'past month' in review_date:
            # First date of the previous month
            past_month = current_date.replace(day=1) - timedelta(days=1)
            return past_month.replace(day=1).strftime('%d-%m-%Y')
        elif 'past 6 months' in review_date:
            # First date 6 months ago
            past_six_months = current_date.replace(day=1) - timedelta(days=180)
            return past_six_months.replace(day=1).strftime('%d-%m-%Y')
        elif 'more than a year ago' in review_date or 'year' in review_date:
            # First date of the current month last year
            past_year = current_date.replace(year=current_date.year - 1, day=1)
            return past_year.strftime('%d-%m-%Y')
        else:
            # Default to current date if format is unrecognized
            return current_date.strftime('%d-%m-%Y')
    
    new_df['Review Date'] = s_df['review_date'].apply(transform_date)

    new_df['Rating'] = ''
    new_df['Reviewer Comment'] = s_df['reviewer_comment']
    new_df.insert(0, 'Store', 'Ebay')
    new_df.insert(1, 'Product Type', 'Phone')

    return new_df

In [57]:
# Run the scrap function
def ebay_scrap(product_url):
    # Configure Selenium EdgeDriver options
    options = Options()
    options.use_chromium = True
    options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.59") # code to prevent the website from detecting bot activity
    options.add_argument("--disable-blink-features=AutomationControlled") # Disable the automation line at the top of the browser
    options.add_argument('headless')
    options.add_argument('disable-gpu')
    service = Service(executable_path=r'Driver\msedgedriver.exe') # Always check to confirm that the version of edge driver matches the version or MS Edge browser
    driver = webdriver.Edge(service=service, options=options) # Initialize the webdriver

    driver.get(product_url)
    time.sleep(8) # To load the product page

    # Get the product details
    product_details = get_product_details(driver)
    product_name = re.split(r"[\'\"]", product_details['product_name'])[0]

    # Get the 'see more reviews' button
    all_feedback_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
        (By.XPATH, '//a[@class="fdbk-detail-list__tabbed-btn fake-btn fake-btn--large fake-btn--secondary"]')
    ))

    # scroll down to where the button is
    driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", all_feedback_button) # Scroll the page to view the button for Next Page
    time.sleep(2)  # Wait for the scroll

    # click the button
    all_feedback_button.click()
    time.sleep(5)

    # The ewview opens up on another browser tab. Change driver focus to point to new page
    driver.switch_to.window(driver.window_handles[1])
    time.sleep(3)

    # Scrape all reviews - using a while loop
    all_product_reviews = []

    while True:
        d_reviews = get_ebay_reviews(driver)
        all_product_reviews.extend(d_reviews)

        try:
            # Get the next page button
            next_page_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
                (By.XPATH, '//a[@class="pagination__next icon-link" and @aria-label="Next page"]')
            ))
            # driver.find_element(By.XPATH, '//a[@aria-label="Next Page"]')

            # Scroll to the next page button area
            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", next_page_button)
            time.sleep(2)  # Wait for the scroll

            next_page_button.click() # Clicks the next page button
            time.sleep(5)  # Wait for the next page to load

        except Exception as e:
            print("No more pages to load")
            break

    driver.quit()

    # Joing product details and the reviews
    for review in all_product_reviews:
        review.update(product_details)

    # Save to CSV
    product_df = pd.DataFrame(all_product_reviews)

    real_data_df = transform_df(product_df)

    output_dir = r'Reviews\Ebay'
    file_name = f'{product_name}_reviews.csv'
    output_path = os.path.join(output_dir, file_name)
    os.makedirs(output_dir, exist_ok=True)

    
    real_data_df.to_csv(output_path, index=False)
    print(f'{len(product_df)} {product_name} reviews successfully written to {output_path}! Nice work!!!')


In [67]:
# Update the start URL
start_url = 'https://www.ebay.com/itm/224785456700?_skw=iphone&epid=10073689362&itmmeta=01JMMZ2R6F9PZDBH3X28TCR1E3&hash=item345641de3c:g:uvoAAOSwTiplmuQs&itmprp=enc%3AAQAKAAAAwFkggFvd1GGDu0w3yXCmi1fdQ9EbAS46pKPqgYbIh43GjYtrhq3YYLnyKo6I%2FPSRKUSB1mCbT3nBbFuGD7voQjvYIi8OEa8TP5y5EFwtG%2BnlXgbizRKisSEAvEJjkqYtdVlUWT9Fhq3NpXRoReFQPoZF%2BA2O5imtg69xCO%2Byp9nOJRvK97%2BKtreilu%2FbbqP6LTeuQkO6VuI4g0PL1U7YFnw%2BCHUqFx8IrQ2QWRCZ%2Fec84%2Bv97fRRRuW6juHQNzfJdw%3D%3D%7Ctkp%3ABk9SR9iDi5-lZQ'

ebay_scrap(start_url)

# https://www.ebay.com/itm/354341990110?_skw=samsung+galaxy&epid=28043687656&itmmeta=01JMMWHYRASGGGT3M4ERH7G80X&hash=item52806daede:g:8fUAAOSwKQFi5E5W&itmprp=enc%3AAQAKAAAA8FkggFvd1GGDu0w3yXCmi1dz%2BFV%2FvqWGNcTqSJ2%2FLGr4pIq3FvuRRuR8nVzh5ywX%2BpyFFVAesdJaddNYuUUm0VTHBuSg8H7SrXdQrG8c7CTgFsoe7rSqgKiCN6AvDTtmHRVRJaliKGY3qzq1nKIQrpgpsZUMv%2FCpGBUN1Ds5PgajT1v89D2x6%2FC%2FxG0an4eEC6og65c4uZ2gJ%2BQBbHPZSdynVkSShOxm3QNQeUUbQewyo0%2F3ZnotMWWSCQum%2BP511TcuDNQ6ehXZZfyvlu0XIVpv%2FgOuEul23rXEQ8mTOow%2FKpfS047zsrcFEuyVrCxwRQ%3D%3D%7Ctkp%3ABFBMiO3HnKVl
# https://www.ebay.com/itm/256412927142?_skw=samsung+galaxy&epid=21065009430&itmmeta=01JMMWHYRAKWQMH56K57SWAT6A&hash=item3bb366c4a6:g:QKEAAOSwLalnIQma&itmprp=enc%3AAQAKAAAA0FkggFvd1GGDu0w3yXCmi1f1l23DhLEal8Rz5XWubyHX%2FzKTDC0QQgaKFE%2FEGpuaDDdt9ebU3jjBVO2XBnyFssP65V6HUFxFdIWNTjqVeKf%2FIuUbby2RSjpIG4hKwfzZdTt40bqU87%2BAb1KoY6U86O2xig0z1Awr5UjPs%2BCVcYPKKAbwlB1eicIYJ2g6QWLEHIgQXFJCXPp1hebnTBoSwzrqpyAQfKD1OJ9NduHxAfsya85gnj6roAV7esr8sNrdwpJQX1uYwIv%2BNrv%2FQzawRlM%3D%7Ctkp%3ABk9SR4jtx5ylZQ
# https://www.ebay.com/itm/256275686595?_skw=samsung+galaxy&epid=8055657110&itmmeta=01JMMWHYRABSB0G0Z8ATW06EA4&hash=item3bab38a4c3:g:oBoAAOSw0HxnKSCQ&itmprp=enc%3AAQAKAAAA8FkggFvd1GGDu0w3yXCmi1cyLrdaNwUqf%2F296kVOXkt3o2sxEyAScX1loXJ%2FAtQ%2FYS0ynwdMW9j4Arq2X80v9UCoFuvfrKR30D0HZp3YSfGoFdkhIJyM2AwI903ro8Qyb4RrRtaWB0NIO0pJGq18vIEfmElnprcT%2BOKF2RnGpSep2j%2FmXr%2FUe9EVtP8DiyNyxiJrP0SHjwx5GsMZymqbGE1V3UQTDD%2FJ3ZRrcz8D5a6vsJhx0wYeT6APl%2F43KJppbHLXp%2F91r3ME4CyNpoJe%2Fun5nwQe39hnFvhJDM91LuN5UFCrZA1EyQiATYN%2BbNJ4pw%3D%3D%7Ctkp%3ABFBMiu3HnKVl
# https://www.ebay.com/itm/256350256078?_skw=iphone&epid=12052764234&itmmeta=01JMMZ2R6FZGNMTVS3Q8T02CED&hash=item3bafaa7bce:g:fKcAAOSwnPlnK4Vr&itmprp=enc%3AAQAKAAAAwFkggFvd1GGDu0w3yXCmi1dC0UF9g35H3J9mdfMdTa6iQdkdqc2Tqs9PcwldAUOiW8BmExrEv9W4ieoSycYKbAWHhZQXzKPyfdiDYYAMWtL41eRRj7LY6H44pN%2Fd68rzM7FGHdN6HNgotUOJ%2F5jPA8TCCZuf3xscoq3FnQKGqXCKj1oQqA3rLWgKB05RFzxrZTxLqVSpM8MX7NLIxNLX1RPoNtqHT4Y9WykD%2FLP2zxLAHLJceRd0wuUeBlI5S6V6Mw%3D%3D%7Ctkp%3ABk9SR9SDi5-lZQ
# https://www.ebay.com/itm/256446109451?_skw=iphone&epid=28049285668&itmmeta=01JMMZ2R6F3PGERK1DBQ30EGA2&hash=item3bb561170b:g:iGoAAOSwe8pnK6HS&itmprp=enc%3AAQAKAAAA4FkggFvd1GGDu0w3yXCmi1ddmj4EyTyaZlGwWbfhi55Kxm40xx%2F0ZT7BLBBGuLUSjSNVfg%2BZAYx0k7XpcPXBnZ8G4AJehYM2Brl0jC8bhgBT6SJRlptO%2BSvemKy7sD8qSJ35HtIb3TvRygacEv7AGYMbizkj%2B2cBYTNl6AVr6y3XOPVQ6%2FEZQZIgQCqtJdu2F3YypqTSbQpCY6H529gIHjtIC87zxh7kfPv2G8tHJ9UGKHJXzvO%2Bo4rQwKHbqKs1Vf7WNoUA%2F9lEJeGlVKnUMr26eT%2FJ2EgrrXt4QDiYkJrr%7Ctkp%3ABFBM1IOLn6Vl
# https://www.ebay.com/itm/144195247770?_skw=iphone&epid=21034217912&itmmeta=01JMMZ2R6GSZAEFVC29XENQVNS&hash=item2192b4de9a:g:N0MAAOSw9JVhO7M9&itmprp=enc%3AAQAKAAAA4FkggFvd1GGDu0w3yXCmi1dC5M3B9EUyBZtixkNPzlTDIxA%2FfkwIyAru5W49FXOXaa1Hcq%2FWeAPR6N3HTD5a4%2F4wbeOJj5E2HcU49JtCsqw08MxL1wK4AQLlv4NelD2fe0nhzDy8DhIijw8YnpcFp6guUhtHbpNQPuDy5bYIwmdi%2BOVs7P%2B54iUBNe%2Fbc89VkTD3sfiIDx9a%2FOLxuXLuK1bcC4gshMLXHMQyS%2B%2BMJXD85mZpVu6jpMWl3zEfVTMiw5lZtFqWSt4Eoh0OZThRfu1PHyMRuTuAs2ieu%2Fr4aCvy%7Ctkp%3ABFBM2IOLn6Vl
# https://www.ebay.com/itm/226036692897?_skw=iphone&epid=6023706166&itmmeta=01JMMZ2R6GDYZ66YP4P5543E8H&hash=item34a0d637a1:g:eh8AAOSwYHNlMC~-&itmprp=enc%3AAQAKAAAAwFkggFvd1GGDu0w3yXCmi1cCO5DDTRO%2FNMQdDsi8n3yYpxeIse1Gprhe0AmK0kNJjKATQErcir1l89SrLft6EwY%2BljcmcZjBVBYlCIM8C%2BcQLgREYEgCoBKBb7i5lYR%2FMJt1lsKPF6BtHYPYfKh9GibkpfnYjNzv0Ed9anl1rgBYTAsnffE3S2t0qR7%2FcjfD3xdvKpS9Smnau9zHumQd8usC9gR0VT9HRWZAJfMU87vPvpS6osq9PNGU%2B7uyoCEhYQ%3D%3D%7Ctkp%3ABk9SR9iDi5-lZQ
# https://www.ebay.com/itm/186312828253?_skw=iphone&epid=8062766781&itmmeta=01JMMZ2R6G7HKM9JGH8EDXX1PB&hash=item2b611c255d:g:VJUAAOSwwWZl2PI8&itmprp=enc%3AAQAKAAAA4FkggFvd1GGDu0w3yXCmi1ev%2F0JSfkdmkqAviq5Xdswg2somhwtWu1%2BkIkBkliWXUFTQv7hyefymPAuHK313Pghp7bP6u1vwqCfu9wILZiaV%2FIdcEmtpGHezPqrE7H4t7UxCm5HgmDrDfezmxBZM9uT2E2mLwphfMFuW3mIh4tvjmfC1HwdIwItrJ%2BO056Kgw8ommnqXKIgvOVYbxlPj9gF%2Fx9WM2aBWwu4yEOv8jH%2BGB3ynbSPOM13SXyQhY3nEvx5TN4rren%2FdHpz3soS2vBF%2F0cV0bck2SgkPiNzZZJp4%7Ctkp%3ABFBM2IOLn6Vl
# https://www.ebay.com/itm/255020221191?_skw=samsung+galaxy&epid=22043687761&itmmeta=01JMMWHYRAJM2VPH15W5H1T668&hash=item3b6063c307:g:XsYAAOSw5ElnKotF&itmprp=enc%3AAQAKAAAA8FkggFvd1GGDu0w3yXCmi1cBzF6xMCcRN36iP1ik2uHD11pquKmU58whFLmiZ7D9qwRJpIpVUK31r8ikOYIJlumbakr66SbX84nFvXgoukvzDYxdC8Krw2bHpFM5CH%2B3vE7o13MwC8nCpmWCMN2ZN83GqcNtEwVrQd1mkR5mvoSmjiBZ4g8XJsVPqKX0JTDUrEOB4J%2B36w7oglRb4B9LgUH%2F1eKjNCndw7lApiSqW6a5k30dytq2%2B2vcnD7E1N5XbppS%2FhnnMahhtjiMw6N1OINLsFT0ABIQ46myCKA%2BaKV9EPKmMozlrhf%2BSDaGejLTFg%3D%3D%7Ctkp%3ABk9SR4rtx5ylZQ
# https://www.ebay.com/itm/224785456700?_skw=iphone&epid=10073689362&itmmeta=01JMMZ2R6F9PZDBH3X28TCR1E3&hash=item345641de3c:g:uvoAAOSwTiplmuQs&itmprp=enc%3AAQAKAAAAwFkggFvd1GGDu0w3yXCmi1fdQ9EbAS46pKPqgYbIh43GjYtrhq3YYLnyKo6I%2FPSRKUSB1mCbT3nBbFuGD7voQjvYIi8OEa8TP5y5EFwtG%2BnlXgbizRKisSEAvEJjkqYtdVlUWT9Fhq3NpXRoReFQPoZF%2BA2O5imtg69xCO%2Byp9nOJRvK97%2BKtreilu%2FbbqP6LTeuQkO6VuI4g0PL1U7YFnw%2BCHUqFx8IrQ2QWRCZ%2Fec84%2Bv97fRRRuW6juHQNzfJdw%3D%3D%7Ctkp%3ABk9SR9iDi5-lZQ

No more pages to load
647 Apple iPhone 11 64GB - Fully Unlocked - ALL CARRIERS  VERY GOOD Condition reviews successfully written to Reviews\Ebay\Apple iPhone 11 64GB - Fully Unlocked - ALL CARRIERS  VERY GOOD Condition_reviews.csv! Nice work!!!
