In [2]:
from bs4 import BeautifulSoup
import requests
from datetime import datetime
from math import ceil
import pandas as pd

In [79]:
def sellerURL(url=None,bs=None):
    #returns the url of a sellers items given the url of the seller
    assert url or bs, 'no inputs'
    if url and not bs:
        html = requests.get(url + '&_pgn=1').text
        bs = BeautifulSoup(html,'html.parser')
    
    new_url = bs.find('div',class_='si-pd-a').find('a',href=True)['href']
    return new_url

def getResults(url=None,bs=None):
    #returns the number of results of an ebay screen given its url
    assert url or bs, 'no inputs'
    if url and not bs:
        html = requests.get(url + '&_pgn=1').text
        bs = BeautifulSoup(html,'html.parser')
        
    results = bs.body.find('span',class_='rcnt')
    if results:
        return int(results.string.replace(',',''))
    try:
        results = bs.body.find('h2',class_='srp-controls__count-heading').string
    except:
        return 0 #One or more of the seller User IDs you entered was not found.
    return int(results.split()[-2].replace(',',''))

def getResultsPerPage(url=None,bs=None):

    assert url or bs, 'no inputs'
    if url and not bs:
        html = requests.get(url + '&_pgn=1').text
        bs = BeautifulSoup(html,'html.parser')
        
    return len(bs.body.find(class_='container').find_all(class_='s-item isKebab'))

In [75]:
def getEbayData(url):
    
    html = requests.get(url + '&_pgn=1').text
    bs = BeautifulSoup(html,'html.parser')
    
    nResults = getResults(bs=bs)
    resultsPerPage = getResultsPerPage(bs=bs)
    npages = ceil(nResults/resultsPerPage)
    data = []

    for page in range(1,npages):
        html = requests.get(url + '&_pgn=' + str(page)).text
        sub = BeautifulSoup(html,'html.parser')
        info = sub.body.find(class_='container')

        search = sub.title.string

        for item in info.find_all(class_='s-item isKebab'):
            dic = {}
        
            #doesnt work with USD
            price = item.find(class_='s-item__price')
            if price:
                dic['price'] = float(price.string[3:].replace(',',''))
            
            
            title = item.find(class_="s-item__title").string
            if title:
                dic['title'] = title
            else:
                title = item.find(class_="LIGHT_HIGHLIGHT")
                if title:
                    dic['title'] = title.next_sibling.string
                else:
                    dic['title'] = item.find(class_="s-item__title s-item__title--has-tags").find('div').next_sibling.string

            try:
                shipping = item.find(class_='s-item__shipping s-item__logisticsCost').string[3:-9]
                dic['shipping'] = float(shipping.replace(',',''))
            except:
                dic['shipping'] = 0
                    

                
            time_item = item.find(class_='s-item__time')
            if time_item:
                bid_time = time_item.find(class_='clipped').string
                
                try: bid_time = datetime.strptime(bid_time,'Ending %d %b at %H:%M EDT')
                except: pass
                try: bid_time = datetime.strptime(bid_time,'%d %b at %H:%M')
                except: pass
                try: bid_time = datetime.strptime(bid_time,'Ending %d %b at %H:%M EST')
                except: pass
                try: 
                    bid_time = datetime.strptime(bid_time,'Today at %H:%M EDT')
                    bid_time.replace(day=datetime.now().day,month=datetime.now().month)
                except: pass
                try: 
                    bid_time = datetime.strptime(bid_time,'Today at %H:%M EST')
                    bid_time.replace(day=datetime.now().day,month=datetime.now().month)
                except: pass
                try: 
                    bid_time = datetime.strptime(bid_time,'Ending Today at %H:%M EDT')
                    bid_time.replace(day=datetime.now().day,month=datetime.now().month)
                except: pass
                try: 
                    bid_time = datetime.strptime(bid_time,'Ending Today at %H:%M EST')
                    bid_time.replace(day=datetime.now().day,month=datetime.now().month)
                except: pass
                
                
                
                assert type(bid_time) == datetime, bid_time #i give up give me a hint
                time_left = item.find(class_='s-item__time').find(class_='s-item__time-left').string #difficult to parse
            
                bid_time = bid_time.replace(year=datetime.now().year) 
                
                dic['end'] = bid_time

                
            dic['url'] = item.find(class_='s-item__link')['href']
            sURL = sellerURL(dic['url'])
            dic['seller_items'] = getResults(url=sURL)

            data.append(dic)

    df = pd.DataFrame(data,columns=['title','price','shipping','end','seller_items','url'])
    return df

In [80]:
url = 'https://www.ebay.ca/b/NVIDIA-GeForce-GTX-1070-Graphics-Video-Cards/27386/bn_110679507?rt=nc&LH_Auction=1&_ipg=200&LH_Complete=1'
filename = 'gtx1070 history.csv'
df = getEbayData(url)
df.to_csv(filename, sep=',')

In [92]:
df = pd.read_csv('ebay data/gtx1070 history.csv',index_col=0)
df['total_price'] = df['price'] + df['shipping']
df.sort_values(by=['total_price'],inplace=True)
pd.set_option('display.max_colwidth', -1)
df[['total_price','seller_items','url']].head(50)

Unnamed: 0,total_price,seller_items,url
179,76.36,3935,https://www.ebay.ca/itm/NVIDIA-GeForce-GTX-SLI-HB-Bridge-3-Slot/123542651943?epid=2255448227&hash=item1cc3b74427:g:s80AAOSwhfRcEn7D
634,82.69,322,https://www.ebay.ca/itm/SLI-HB-Bridge-3-SLOT-NVIDIA-Geforce-GTX/283406534555?epid=1478803093&hash=item41fc58879b:g:mRgAAOSw8U9cLZFj
641,109.38,1,https://www.ebay.ca/itm/NVIDIA-Geforce-GTX-SLI-HB-Bridge-3-SLOT-for-Titan-X-Xp-GTX-1080-Ti-1080-1070/233162416066?hash=item3649901fc2:g:7dIAAOSw1UdchUh-
676,112.03,137,https://www.ebay.ca/itm/MSI-GEFORCE-GTX1070-DONOR/292990219939?hash=item443793f6a3:g:7nEAAOSwya1ce7Z~
659,156.38,0,https://www.ebay.ca/itm/Gigabyte-GeForce-GTX-1070-Mini-ITX-8G-Graphics-Card/173798379137?epid=19028028575&hash=item287730ce81:g:aKQAAOSwR0dcR~5K
650,156.5,137,https://www.ebay.ca/itm/EVGA-GTX-1070-FTW-8GB-GDDR5-Graphics-Card-nVIDIA-NO-POWER-READ-DESC/273676732458?epid=2008756634&hash=item3fb867882a:g:Jd0AAOSwCYJcT61u
651,158.65,136,https://www.ebay.ca/itm/PNY-GTX-1070-OC-8GB-For-Parts-or-not-working/323731228135?hash=item4b5fe285e7:g:KC8AAOSwBsxchcdD
666,160.05,0,https://www.ebay.ca/itm/Galax-KFA2-Nvidia-GTX-1070-8Gb-FAULTY-Please-read-carefully/264127864740?hash=item3d7f3f5fa4:g:K3MAAOSw-gFbo7dI
727,168.38,0,https://www.ebay.ca/itm/Gigabyte-GeForce-GTX-1070-Mini-ITX-8G-Graphics-Card/173787084378?epid=19028028575&hash=item287684765a:g:aKQAAOSwR0dcR~5K
722,177.06,267,https://www.ebay.ca/itm/PALiT-NVIDIA-GeForce-GTX-1070-Dual-8GB-Gaming-Graphics-Card-READ-DESCRIPTION/123676171856?epid=2254417182&hash=item1ccbac9e50:g:PDUAAOSw0d1cARlp
