## Inspect Data

You can perform a volumes search by sending an HTTP GET request to the following URI:


https://www.googleapis.com/books/v1/volumes?q=search+terms
This request has a single required parameter:

q - Search for volumes that contain this text string. There are special keywords you can specify in the search terms to search in particular fields, such as:
- intitle: Returns results where the text following this keyword is found in the title.
- inauthor: Returns results where the text following this keyword is found in the author.
- inpublisher: Returns results where the text following this keyword is found in the publisher.
- subject: Returns results where the text following this keyword is listed in the category list of the volume.
- isbn: Returns results where the text following this keyword is the ISBN number.
- lccn: Returns results where the text following this keyword is the Library of Congress Control Number.
- oclc: Returns results where the text following this keyword is the Online Computer Library Center number.

### Request
Here is an example of searching for Daniel Keyes' "Flowers for Algernon":


GET https://www.googleapis.com/books/v1/volumes?q=flowers+inauthor:keyes&key=yourAPIKey

## Get Authors

In [None]:
import requests

# Define the search parameters
category = 'fiction'
start_index = 0
max_results = 100000 

# API URL
url = f"https://openlibrary.org/search.json?q=subject:{category}&start={start_index}&limit={max_results}&fields=author_name"

# Send the GET request
response = requests.get(url)
data = response.json()
authors = []

# Check if 'docs' is in the response
if 'docs' in data:
    # Print the total number of books found
    print(f"Total books found: {data['num_found']}")
    
    # Iterate through the books and print their details
    for idx, book in enumerate(data['docs']):
        author = book.get('author_name', ['No author available'])
        if len(author) == 1:
            authors.append(author[0])
        else:
            pass
else:
    print("No books found or 'docs' key is missing in the response.")

# Filter out duplicate authors
authors_filtered = set(authors)

# Store the filtered authors in a text file
data_path = "../data/authors_filtered.txt"
with open(data_path, 'w') as f:
    for author in authors_filtered:
        f.write(f"{author}\n")

## Scraping Google Books

In [174]:
import requests

API_KEY = "AIzaSyDSSplnRCPbL_k_ukQovrz6-lOH94RA13U"
# https://developers.google.com/books/docs/v1/using#APIKey

authors_path = "../data/authors_filtered.txt"
with open(authors_path, 'r') as f:
    authors = [line.strip() for line in f.readlines()]

In [None]:

def setting_api_params(author, start_index=0, language='en', country='US', max_results=40):
    params = {
        'q': f'inauthor:{author}',
        'startIndex': start_index,
        'langRestrict': language,
        'country': country,
        'maxResults': max_results,
        'printType': 'books',
        'fields': '''totalItems,items(
                kind,id,etag,selfLink,
                volumeInfo(
                    title,subtitle,authors,publisher,publishedDate,description,
                    pageCount,printType,mainCategory,categories,averageRating,
                    ratingsCount,language,previewLink,infoLink,canonicalVolumeLink,
                    imageLinks(smallThumbnail,thumbnail)
                ),
                saleInfo(
                    country,saleability,
                    listPrice(amount,currencyCode),
                    retailPrice(amount,currencyCode),
                    buyLink
                )
                    )'''.replace('\n', '').replace(' ', ''),
                    'key': API_KEY,
            }
    
    return params

def get_books(author, start_index=0):
    url = f"https://www.googleapis.com/books/v1/volumes"
    params = setting_api_params(author, start_index)
    response = requests.get(url, params=params)
    books = response.json().get('items', [])
    return books

In [265]:
books, total_items = get_books("Stephen King", 81)
total_items

1000000

In [240]:
books

[{'kind': 'books#volume',
  'id': 'i1CXzQEACAAJ',
  'etag': '8gQdulofmb8',
  'selfLink': 'https://www.googleapis.com/books/v1/volumes/i1CXzQEACAAJ',
  'volumeInfo': {'title': 'The Lost Work of Stephen King',
   'authors': ['Stephen Spignesi', 'Stephen King'],
   'publishedDate': '2020-10-31',
   'printType': 'BOOK',
   'language': 'en',
   'previewLink': 'http://books.google.com/books?id=i1CXzQEACAAJ&dq=inauthor:Stephen+King&hl=&as_pt=BOOKS&cd=82&source=gbs_api',
   'infoLink': 'http://books.google.com/books?id=i1CXzQEACAAJ&dq=inauthor:Stephen+King&hl=&as_pt=BOOKS&source=gbs_api',
   'canonicalVolumeLink': 'https://books.google.com/books/about/The_Lost_Work_of_Stephen_King.html?hl=&id=i1CXzQEACAAJ'},
  'saleInfo': {'country': 'US', 'saleability': 'NOT_FOR_SALE'}},
 {'kind': 'books#volume',
  'id': 'TumyoQEACAAJ',
  'etag': 'a8hruWcqESk',
  'selfLink': 'https://www.googleapis.com/books/v1/volumes/TumyoQEACAAJ',
  'volumeInfo': {'title': 'Stephen King Omnibus',
   'authors': ['Stephen Ki

In [215]:
books = []

for author in authors[:3]:
    print(f"Fetching books for author: {author}")
    books_by_author = get_books(author)
    books.extend(books_by_author)


Fetching books for author: Estelle Laure
Fetching books for author: Diana Velez
Fetching books for author: Brod, Max


In [None]:

def filter_books(author, url, params):
    # total items is unreliable, using an euristic of 3 iterations to gather more results per author
    ids = []
    iterations = 0
    while iterations < 3:
        books = get_books(author, start_index=iterations * 40)
        # Get the current iteration length
        current_iteration_length = len(books)

        # Filter books
        for book_idx, book in enumerate(books):
            if book['volumeInfo'].get('authors', []) != [author]:
                books.pop(0)

        ids.extend([book['id'] for book in books])

        # Update the current iteration length
        iterations += 1
    return ids


[{'kind': 'books#volume',
  'id': 'vMLRYgEACAAJ',
  'etag': 'd8tlTIWeoDo',
  'selfLink': 'https://www.googleapis.com/books/v1/volumes/vMLRYgEACAAJ',
  'volumeInfo': {'title': 'Franz Kafka',
   'subtitle': 'By Max Brod',
   'authors': ['MAX. BROD'],
   'publishedDate': '1960',
   'printType': 'BOOK',
   'language': 'en',
   'previewLink': 'http://books.google.com/books?id=vMLRYgEACAAJ&dq=inauthor:Brod,+Max&hl=&as_pt=BOOKS&cd=1&source=gbs_api',
   'infoLink': 'http://books.google.com/books?id=vMLRYgEACAAJ&dq=inauthor:Brod,+Max&hl=&as_pt=BOOKS&source=gbs_api',
   'canonicalVolumeLink': 'https://books.google.com/books/about/Franz_Kafka.html?hl=&id=vMLRYgEACAAJ'},
  'saleInfo': {'country': 'US', 'saleability': 'NOT_FOR_SALE'}},
 {'kind': 'books#volume',
  'id': '9B0ioAEACAAJ',
  'etag': 'I9gUKTung5s',
  'selfLink': 'https://www.googleapis.com/books/v1/volumes/9B0ioAEACAAJ',
  'volumeInfo': {'title': 'Letters. Max Brod (manuscript)',
   'subtitle': '',
   'authors': ['Max Brod', 'Marc Lavry'

In [244]:

books.pop(0)

{'kind': 'books#volume',
 'id': 'i1CXzQEACAAJ',
 'etag': '8gQdulofmb8',
 'selfLink': 'https://www.googleapis.com/books/v1/volumes/i1CXzQEACAAJ',
 'volumeInfo': {'title': 'The Lost Work of Stephen King',
  'authors': ['Stephen Spignesi', 'Stephen King'],
  'publishedDate': '2020-10-31',
  'printType': 'BOOK',
  'language': 'en',
  'previewLink': 'http://books.google.com/books?id=i1CXzQEACAAJ&dq=inauthor:Stephen+King&hl=&as_pt=BOOKS&cd=82&source=gbs_api',
  'infoLink': 'http://books.google.com/books?id=i1CXzQEACAAJ&dq=inauthor:Stephen+King&hl=&as_pt=BOOKS&source=gbs_api',
  'canonicalVolumeLink': 'https://books.google.com/books/about/The_Lost_Work_of_Stephen_King.html?hl=&id=i1CXzQEACAAJ'},
 'saleInfo': {'country': 'US', 'saleability': 'NOT_FOR_SALE'}}

In [None]:
books[:] = [book for book in books if book['volumeInfo'].get('authors', []) == ["Stephen King"]]


In [264]:
books

[]

In [266]:
book['volumeInfo'].get('authors', [])

KeyError: 'volumeInfo'