In [1]:
import pandas as pd
from bs4 import BeautifulSoup as BS
import httpx as hx
import parsel as pc
import nested_lookup
import tkinter as tk
from tkinter import ttk
from functools import partial

In [3]:
session = hx.Client(
    headers={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
    },
    http2=True,
    follow_redirects=True
)

In [4]:
from parsel import Selector
import httpx

def parse_product(response: httpx.Response) -> dict:
    """Parse Ebay's product listing page for core product data"""
    sel = Selector(response.text)
    # define helper functions that chain the extraction process
    css_join = lambda css: "".join(sel.css(css).getall()).strip()  # join all CSS selected elements
    css = lambda css: sel.css(css).get("").strip()  # take first CSS selected element and strip of leading/trailing spaces

    item = {}
    item["url"] = css('link[rel="canonical"]::attr(href)')
    item["id"] = item["url"].split("/itm/")[1].split("?")[0]  # we can take ID from the URL
    item["price"] = css('.x-price-primary>span::text')
    item["name"] = css_join("h1 span::text")
    item["seller_name"] = css_join("[data-testid=str-title] a ::text")
    item["seller_url"] = css("[data-testid=str-title] a::attr(href)").split("?")[0]
    item["photos"] = sel.css('.ux-image-filmstrip-carousel-item.image img::attr("src")').getall()  # carousel images
    item["photos"].extend(sel.css('.ux-image-carousel-item.image img::attr("src")').getall())  # main image
    # description is an iframe (independant page). We can keep it as an URL or scrape it later.
    item["description_url"] = css("div.d-item-description iframe::attr(src)")
    if not item["description_url"]:
        item["description_url"] = css("div#desc_div iframe::attr(src)")
    # feature details from the description table:
    feature_table = sel.css("div.ux-layout-section--features")
    features = {}
    for ft_label in feature_table.css(".ux-labels-values__labels"):
        # iterate through each label of the table and select first sibling for value:
        label = "".join(ft_label.css(".ux-textspans::text").getall()).strip(":\n ")
        ft_value = ft_label.xpath("following-sibling::div[1]")
        value = "".join(ft_value.css(".ux-textspans::text").getall()).strip()
        features[label] = value
    item["features"] = features
    return item

# establish our HTTP2 client with browser-like headers
session = httpx.Client(
    headers={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
    },
    http2=True,
    follow_redirects=True
)
# example use: scrape this item and parse the data
response = session.get("https://www.ebay.com/itm/332562282948")
item = parse_product(response)
import json
print(json.dumps(item, indent=2))

{
  "url": "https://www.ebay.com/itm/332562282948",
  "id": "332562282948",
  "price": "US $13.49",
  "name": "Sanei Kirby 5.5\" Plush Stuffed Doll (KP01) - Kirby Adventure All Star Collection",
  "seller_name": "ToysCollections",
  "seller_url": "https://www.ebay.com/str/huskylover228",
  "photos": [
    "https://i.ebayimg.com/thumbs/images/g/ITEAAOSw9p9ajK16/s-l500.jpg",
    "https://i.ebayimg.com/images/g/ITEAAOSw9p9ajK16/s-l1600.jpg"
  ],
  "description_url": "https://vi.vipr.ebaydesc.com/ws/eBayISAPI.dll?ViewItemDescV4&item=332562282948&t=1678153940000&category=69528&seller=the_northeshop&excSoj=1&excTrk=1&lsite=0&ittenable=true&domain=ebay.com&descgauge=1&cspheader=1&oneClk=2&secureDesc=1",
  "features": {
    "Condition": "New: A brand-new, unused, unopened, undamaged item (including handmade items). See the seller's ... Read moreNew: A brand-new, unused, unopened, undamaged item (including handmade items). See the seller's listing for full details. See all condition definitions

In [None]:


def search_ebay_by_title(search_query):
    base_url = 'https://www.ebay.com/sch/i.html'
    params = {'_nkw': search_query}

    # Make a request to eBay search page
    response = requests.get(base_url, params=params)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract relevant information (adjust according to website changes)
    titles = soup.select('.s-item-title')
    prices = soup.select('.s-item__price')

    for title, price in zip(titles, prices):
        print(f'Title: {title.text}, Price: {price.text}')

def on_search_button_click(entry):
    search_query = entry.get()
    search_ebay_by_title(search_query)

# Create the main Tkinter window
root = tk.Tk()
root.title("eBay Scraper")

# Create and place the entry widget
entry = ttk.Entry(root, width=30)
entry.grid(row=0, column=0, padx=10, pady=10)

# Create and place the search button
search_button = ttk.Button(root, text="Search", command=partial(on_search_button_click, entry))
search_button.grid(row=0, column=1, padx=10, pady=10)

# Start the Tkinter event loop
root.mainloop()