from __future__ import print_function
import ConfigParser
from goodreads import client
from bs4 import BeautifulSoup
import sys
import csv
# Downloaded HTML for
EXAMPLE_NOTES_HTML_FILE = "annotatedBookItem__knhLink.html"
def get_notes_urls(filename):
with open(filename) as html_fp:
soup = BeautifulSoup(html_fp, 'html.parser')
tags = soup.find_all('a', class_="annotatedBookItem__knhLink")
return [ tag.get('href') for tag in tags ]
def get_api_creds():
# return api_key, api_secret tuple
config = ConfigParser.RawConfigParser()'api-key.ini')
return (config.get('developer', 'key'), config.get('developer', 'secret'))
def url_to_book_dict(url, gc):
# take a notes URL and a good reads client and get the book object's dict
# ex:
book_num = int(url[len(PREFIX):].split("-")[0])
book_dict =
book_dict['highlight_url'] = url
book_dict['book_num'] = book_num
return book_dict
def main():
gc = client.GoodreadsClient(*get_api_creds())
MAX = None # set to 1 for faster debugging
books = [url_to_book_dict(url, gc) for url in get_notes_urls(EXAMPLE_NOTES_HTML_FILE)[0:MAX]]
FIELDS = "title book_num num_pages average_rating link highlight_url ratings_count rating_dist".split()
writer = csv.DictWriter(sys.stdout, fieldnames=FIELDS, extrasaction='ignore')
if __name__ == "__main__":