In [2]:
from bs4 import BeautifulSoup
import requests
from datetime import datetime
from math import ceil
import pandas as pd

In [1]:
def sellerURL(url=None,bs=None):
    #returns the url of a sellers items given the url of the seller
    assert url or bs, 'no inputs'
    if url and not bs:
        html = requests.get(url + '&_pgn=1').text
        bs = BeautifulSoup(html,'html.parser')
    
    new_url = bs.find('div',class_='si-pd-a').find('a',href=True)['href']
    return new_url

def getResults(url=None,bs=None):
    #returns the number of results of an ebay screen given its url
    assert url or bs, 'no inputs'
    if url and not bs:
        html = requests.get(url + '&_pgn=1').text
        bs = BeautifulSoup(html,'html.parser')
        
    results = bs.body.find('span',class_='rcnt')
    if results:
        return int(results.string.replace(',',''))
    
    results = bs.body.find('h2',class_='srp-controls__count-heading').string
    return int(results.split()[-2].replace(',',''))

def getResultsPerPage(url=None,bs=None):

    assert url or bs, 'no inputs'
    if url and not bs:
        html = requests.get(url + '&_pgn=1').text
        bs = BeautifulSoup(html,'html.parser')
        
    return len(bs.body.find(class_='container').find_all(class_='s-item isKebab'))

In [44]:
def getEbayData(url):
    
    html = requests.get(url + '&_pgn=1').text
    bs = BeautifulSoup(html,'html.parser')
    
    nResults = getResults(bs=bs)
    resultsPerPage = getResultsPerPage(bs=bs)
    npages = ceil(nResults/resultsPerPage)
    data = []

    for page in range(1,npages):
        html = requests.get(url + '&_pgn=' + str(page)).text
        sub = BeautifulSoup(html,'html.parser')
        info = sub.body.find(class_='container')

        search = sub.title.string

        for item in info.find_all(class_='s-item isKebab'):
            dic = {}
        
        
            price = item.find(class_='s-item__price')
            if price:
                dic['price'] = float(price.string[3:])
            
            
            title = item.find(class_="s-item__title").string
            if title:
                dic['title'] = title
            else:
                title = item.find(class_="LIGHT_HIGHLIGHT")
                if title:
                    dic['title'] = title.next_sibling.string
                else:
                    dic['title'] = item.find(class_="s-item__title s-item__title--has-tags").find('div').next_sibling.string

                
            shipping = item.find(class_='s-item__shipping s-item__logisticsCost').string[3:-9]
            if shipping == 'e': 
                dic['shipping'] = 0
            else: 
                dic['shipping'] = float(shipping)

                
            time_item = item.find(class_='s-item__time')
            if time_item:
                bid_time = time_item.find(class_='clipped').string
                
                try: bid_time = datetime.strptime(bid_time,'Ending %d %b at %H:%M EDT')
                except: pass
                try: bid_time = datetime.strptime(bid_time,'%d %b at %H:%M')
                except: pass
                try: bid_time = datetime.strptime(bid_time,'Ending %d %b at %H:%M EST')
                except: pass
                
                assert type(bid_time) != str, bid_time #i give up give me a hint
                
                time_left = item.find(class_='s-item__time').find(class_='s-item__time-left').string #difficult to parse

                bid_time = bid_time.replace(year=datetime.now().year)  
                dic['end'] = bid_time

                
            dic['url'] = item.find(class_='s-item__link')['href']
            sURL = sellerURL(dic['url'])
            dic['seller_items'] = getResults(url=sURL)

            data.append(dic)

    df = pd.DataFrame(data,columns=['title','price','shipping','end','seller_items','url'])
    return df

In [None]:
url = 'https://www.ebay.ca/b/NVIDIA-GeForce-GTX-1070-Graphics-Video-Cards/27386/bn_110679507?rt=nc&LH_Auction=1&_ipg=200&LH_Complete=1'
filename = 'gtx1070 history.csv'
df = getEbayData(url)
df.to_csv(filename, sep=',')