In [1]:
import requests #pip install requests
from bs4 import BeautifulSoup #pip install bs4
import os
import time
import json
import re
import pandas as pd

API's used:
- Beautiful Soup: https://www.crummy.com/software/BeautifulSoup/bs4/doc/
- Requests : https://requests.readthedocs.io/en/master/

Futher Work:
- Database: https://medium.com/analytics-vidhya/tutorial-amazon-price-tracker-using-python-and-mongodb-part-1-aece6347ec63
- Proxy Servers, Exceptions, crawlers : https://blog.hartleybrody.com/scrape-amazon/

In [2]:
url="https://www.goodreads.com/book/show/10399742-private-london?ac=1&from_search=true&qid=MkvcCoAwzb&rank=1"

In [3]:
headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"}


In [4]:
def ParseHTML(url, printHTML = False):
    r = requests.get(url,headers=headers)
        
    if r.status_code != 200:        
        print("Page %s must have been blocked by eBay as the status code was %d"%(url,r.status_code))
        return None
    
    print("Successfully parsed page")
    soup = BeautifulSoup(r.content, features="lxml")
    
    if (printHTML):
        print(soup)
    
    return soup

In [5]:
html = ParseHTML(url, False)

Successfully parsed page


In [6]:
def GetBookName_goodreads(html):
    nameTag = html.find(id='bookTitle')
    name = nameTag.text
    return name.strip()

In [7]:
GetBookName_goodreads(html)

'Private London'

In [8]:
def GetBookRating_goodreads(html):
    ratingTag = html.find(itemprop="ratingValue")
    rating = ratingTag.text
    return float(rating.strip())
    

In [9]:
GetBookRating_goodreads(html)

3.79

In [10]:
def GetSimiliarBooks_goodreads(html):
    suggestionsHtml = html.find_all("li", attrs={"class": "cover"})
    suggestionsDict = {}

    for suggestion in suggestionsHtml:
        url = suggestion.find("a")['href']
        name = suggestion.find("img")['alt']
        splitname = name.split(' |')
        suggestionsDict[splitname[0]] = url

    return suggestionsDict


In [11]:
GetSimiliarBooks_goodreads(html)


{'Private Down Under: by James Patterson & Michael White': 'https://www.goodreads.com/book/show/24168606-private-down-under',
 'Private L.A.: by James Patterson and Mark Sullivan -- Review': 'https://www.goodreads.com/book/show/20938684-private-l-a',
 'Firing Point (Jack Ryan Universe,  #29)': 'https://www.goodreads.com/book/show/48733083-firing-point',
 'Private (Private, #1)': 'https://www.goodreads.com/book/show/7134202-private',
 'Private #1 Suspect (Private, #2)': 'https://www.goodreads.com/book/show/10808006-private-1-suspect',
 'Private Games (Private, #3)': 'https://www.goodreads.com/book/show/11343348-private-games',
 'Private Berlin (Private, #5)': 'https://www.goodreads.com/book/show/14781219-private-berlin',
 'Private L.A. (Private, #6)': 'https://www.goodreads.com/book/show/17724749-private-l-a',
 'Private: Oz (Private, #7)': 'https://www.goodreads.com/book/show/16009141-private',
 'Private India (Private, #8)': 'https://www.goodreads.com/book/show/19186402-private-india',

In [12]:
def MonitorRating(url, minimumRating = 4, maximumTime = 10):
    
    # Parses the html to obtain the product name and current price
    html = ParseHTML(url)
    bookRating = GetBookRating_goodreads(html)
    bookName = GetBookName_goodreads(html)
    
    # sanity check
    if (bookRating > minimumRating ):
        return " GO READ"

    print("Monitoring book: " + bookName + " \n")
    startTime = time.time()
    
    
    # Two conditions to keep checking:
        # 1. The minimum rating is higher than the current book rating
        # 2. The time we have spend running the bot is less than the maximumTime
    while ( bookRating < minimumRating and time.time()-startTime < maximumTime):

        html = ParseHTML(url)
        bookRating = GetBookRating_goodreads(html)
        
        print(" Time: " + str(time.ctime()) )
        print(" Rating: " + str(bookRating) + " \n")
        
        if (bookRating > minimumRating):
            return "GO READ!!!"
        
        # Wait for 3 seconds before checking again
        time.sleep(3)

        
    similiarBooks = GetSimiliarBooks_goodreads(html)
    print("Ratings did not match required value in given time frame. \n")
    print("Some similiar books:")
    
    for book in similiarBooks.keys():
        print(book)
        print(similiarBooks[book])
        print()
    
    return "Sorry"

In [13]:
MonitorRating(url, 4, 10)

Successfully parsed page
Monitoring book: Private London 

Successfully parsed page
 Time: Sat Oct  3 03:32:25 2020
 Rating: 3.79 

Successfully parsed page
 Time: Sat Oct  3 03:32:32 2020
 Rating: 3.79 

Ratings did not match required value in given time frame. 

Some similiar books:
Private Down Under: by James Patterson & Michael White
https://www.goodreads.com/book/show/24168606-private-down-under

Private L.A.: by James Patterson and Mark Sullivan -- Review
https://www.goodreads.com/book/show/20938684-private-l-a

Firing Point (Jack Ryan Universe,  #29)
https://www.goodreads.com/book/show/48733083-firing-point

Private (Private, #1)
https://www.goodreads.com/book/show/7134202-private

Private #1 Suspect (Private, #2)
https://www.goodreads.com/book/show/10808006-private-1-suspect

Private Games (Private, #3)
https://www.goodreads.com/book/show/11343348-private-games

Private Berlin (Private, #5)
https://www.goodreads.com/book/show/14781219-private-berlin

Private L.A. (Private, #6)

'Sorry'