In [1]:
import requests
from bs4 import BeautifulSoup

In [5]:
import requests
from bs4 import BeautifulSoup

# URL of the Macy's living room furniture page
url = "https://www.wayfair.com/furniture/sb0/sofas-c413892.html"

# Set headers to mimic a browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send a GET request to fetch the page content
response = requests.get(url, headers=headers)
response.raise_for_status()  # Check if the request was successful

with open('macys_living_room_furniture.html', 'w', encoding='utf-8') as file:
    file.write(response.text)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find the relevant elements containing the furniture items
# This might need adjustments based on the actual structure of the webpage
items = soup.find_all('div', class_='productThumbnail')

# Extract information about each item
furniture_data = []
for item in items:
    try:
        name = item.find('a', class_='productDescLink').get_text(strip=True)
        price = item.find('div', class_='priceInfo').get_text(strip=True)
        link = item.find('a', class_='productDescLink')['href']
        furniture_data.append({
            'name': name,
            'price': price,
            'link': link
        })
    except AttributeError:
        continue

2024-06-15 16:14:13 [urllib3.connectionpool] DEBUG: Starting new HTTPS connection (1): www.wayfair.com:443
2024-06-15 16:14:13 [urllib3.connectionpool] DEBUG: https://www.wayfair.com:443 "GET /furniture/sb0/sofas-c413892.html HTTP/1.1" 200 None


# Diffbot to scrape Wayfair

In [3]:
import requests
import json
from base64 import b64decode

# Your Diffbot API token
API_TOKEN = 'f0f351486afc71bd5f4aec4cfe57ddc6'

# The URL you want to analyze
url = 'https://www.wayfair.com/furniture/sb0/sofas-c413892.html'

# Diffbot's Analyze API endpoint
api_endpoint = 'https://api.diffbot.com/v3/analyze'

# Parameters for the API request
params = {
    'token': API_TOKEN,
    'url': url,
    'mode': 'product',
    'discussion': False
}

# Make the API request
response = requests.get(api_endpoint, params=params)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    
    # Print and save the response
    with open('diffbot_wayfair_response.json', 'w') as json_file:
        json.dump(data, json_file, indent=4)
    
    print(json.dumps(data, indent=4))
else:
    print(f"Error: {response.status_code}")
    print(response.json())


{
    "request": {
        "options": [
            "discussion=False",
            "mode=product"
        ],
        "pageUrl": "https://www.wayfair.com/furniture/sb0/sofas-c413892.html",
        "api": "analyze",
        "version": 3
    },
    "humanLanguage": "en",
    "objects": [],
    "type": "list",
    "title": "Sofas & Couches | Wayfair"
}


# WayFair using scrapy

In [7]:
import scrapy

class WayfairSpider(scrapy.Spider):
    name = 'wayfair'
    allowed_domains = ['wayfair.com']
    start_urls = ['https://www.wayfair.com/furniture/sb0/sofas-c413892.html']

    def parse(self, response):
        products = response.css('div.PlProductCard-content')  # Adjust the selector based on the HTML structure

        for product in products:
            title = product.css('span.PlProductCard-title::text').get().strip()
            price = product.css('div.PlProductCard-price::text').get().strip()
            image_url = product.css('img::attr(src)').get()
            link = product.css('a::attr(href)').get()

            yield {
                'title': title,
                'price': price,
                'image_url': image_url,
                'link': response.urljoin(link)
            }

        # Follow pagination links (if any)
        next_page = response.css('a[rel="next"]::attr(href)').get()
        if next_page is not None:
            yield response.follow(next_page, self.parse)


# Ebay API 

## Getting key using OAuth

In [9]:
import os
from dotenv import load_dotenv
import requests
import base64
import json

# Load environment variables from .env file
load_dotenv()

# Get the environment variables
CLIENT_ID = os.getenv('EBAY_APP_ID')
CLIENT_SECRET = os.getenv('EBAY_CERT_ID')
OAUTH_URL = 'https://api.ebay.com/identity/v1/oauth2/token'

# Encode the client ID and client secret
credentials = base64.b64encode(f'{CLIENT_ID}:{CLIENT_SECRET}'.encode('utf-8')).decode('utf-8')

headers = {
    'Content-Type': 'application/x-www-form-urlencoded',
    'Authorization': f'Basic {credentials}'
}

data = {
    'grant_type': 'client_credentials',
    'scope': 'https://api.ebay.com/oauth/api_scope'
}

response = requests.post(OAUTH_URL, headers=headers, data=data)
if response.status_code == 200:
    access_token = response.json()['access_token']
    # print(f'Access Token: {access_token}')
else:
    print(f'Error: {response.status_code}')
    print(response.json())


## Using item_summary/search

In [10]:
import requests
import json

# Replace with your eBay OAuth token
ACCESS_TOKEN = access_token  # Use the token obtained from the previous step
endpoint = 'https://api.ebay.com/buy/browse/v1/item_summary/search'

# Set up the request headers
headers = {
    'Authorization': f'Bearer {ACCESS_TOKEN}',
    'Content-Type': 'application/json',
}

# Set up the query parameters
params = {
    'q': 'sofa pink',  # Search keyword
    'limit': 3      # Number of results per page
}

# Make the API request
response = requests.get(endpoint, headers=headers, params=params)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    items = data.get('itemSummaries', [])

    # Print and save the response
    with open('ebay_browse_search_results.json', 'w') as file:
        json.dump(items, file, indent=4)
    
    # Print the results
    for item in items:
        title = item.get('title', 'No title')
        price = item.get('price', {}).get('value', 'No price')
        currency = item.get('price', {}).get('currency', 'No currency')
        item_url = item.get('itemWebUrl', 'No URL')
        print(f'Title: {title}')
        print(f'Price: {price} {currency}')
        print(f'Item URL: {item_url}')
        print('\n')
else:
    print(f'Error: {response.status_code}')
    print(response.json())


Title: 2 Seater Velvet Sofa Modern Small Loveseat Couch for Bedroom Living Room Studio
Price: 139.99 USD
Item URL: https://www.ebay.com/itm/196217192737?var=496063550789&hash=item2daf74c121:g:fbcAAOSwL6pluGCf&amdata=enc%3AAQAJAAAA4PEaUTtyjRomc%2FfZSCEH8KizbMoUo%2FEgtt7RvBI5iveetYdLhg41zuttrVtMTz0dl6dz8jVZPkbhgIL8sgBG2EufumXDvLBUinE7Vnh9UNzS7On4DooXzYGf9SvAyhDDVH%2Fs07a4q54sGy2whO7QI5l7qL5Zlea0HUoHgva4uSAdNWYgrzf2kdWAoMzfgESZ9%2BjC1pEgUck6gGYrkD2Oj3gNzFkalnwuEmYRqehJ0fCik%2F4VaK8w9gv2lFntqn9l18K3wgUPENhfTxlE20UoP8IC3hgipdoiHWfuZOKeu5pf


Title: FREE SHIPPING!! futon sofa couch bed sleeper In PINK
Price: 225.00 USD
Item URL: https://www.ebay.com/itm/375458391698?hash=item576b106692:g:dcgAAOSwuFxmWIml&amdata=enc%3AAQAJAAAA4NiAsWLxb55ZmGnJAUngvU8zE1mGmH30kv9I6GMaUM%2BMhG9bDg5j8oSp%2B%2BX6kbJkSqvnxpzcXcRNKCGw6%2FhBbYs2xeutAg9pTBniEcloAZImF6h5Zm1PHlsxhNrv5qM9K5iKXCmWyF5NlZtZzf%2BlcIXA5hDX1OOjNRfIgBFeq%2BOypjNuBq4shaE4stnR9J1ZW3gcs0OlrdZiEl005K0bQ6PLki8GqKf8XBgS5BtoSBOEVJyc%2BXqPV6VXHCIJxh7xa