# CSV + API

In this reboot, we are going to use:

- The [Goodreads books](https://www.kaggle.com/jealousleopard/goodreadsbooks) dataset from Kaggle.
- The [Open Library Books API](https://openlibrary.org/dev/docs/api/books)

The goal of this livecode is to load the data from a CSV + loop over rows to enrich each row with information such as:

- List of subjects (Science, Humor, Travel, etc.)
- The cover URL of the book
- Other information you'd find useful in the JSON API

In [66]:
!curl -L https://gist.githubusercontent.com/ssaunier/351b17f5a7a009808b60aeacd1f4a036/raw/books.csv > data/books.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1509k  100 1509k    0     0  1558k      0 --:--:-- --:--:-- --:--:-- 1563k


In [69]:
import pandas as pd
df = pd.read_csv('data/books.csv')

In [84]:
df = df.sample(5)[['title','isbn13']]
df

Unnamed: 0,title,isbn13
4578,Cromartie High School Vol. 06,9781413902624
2540,Saint Joan/Major Barbara/Androcles and the Lion,9780394604800
5237,Strangers,9780425181119
12429,Paris: After the Liberation 1944-1949,9780142437926
6826,Sudden Fiction: American Short-Short Stories,9780879052652


In [85]:
import requests
url = 'https://openlibrary.org/api/books?bibkeys=ISBN%3A9780141182674&format=json&jscmd=data'
book_metadata = requests.get(url).json()

In [86]:
book_metadata['ISBN:9780141182674']

{'url': 'https://openlibrary.org/books/OL22464055M/On_the_Road',
 'key': '/books/OL22464055M',
 'title': 'On the Road',
 'authors': [{'url': 'https://openlibrary.org/authors/OL21491A/Jack_Kerouac',
   'name': 'Jack Kerouac'}],
 'number_of_pages': 280,
 'pagination': 'xxxii, 280 p. ;',
 'by_statement': 'Jack Kerouac ; introduction by Ann Charters.',
 'identifiers': {'librarything': ['3207'],
  'goodreads': ['2552'],
  'isbn_10': ['0141182679'],
  'isbn_13': ['9780141182674'],
  'openlibrary': ['OL22464055M']},
 'classifications': {'lc_classifications': ['PS3521.E735 .O5 1991'],
  'dewey_decimal_class': ['813/.54']},
 'publishers': [{'name': 'Penguin Books'}],
 'publish_places': [{'name': 'London'}, {'name': 'New York'}],
 'publish_date': '2000',
 'subjects': [{'name': 'Beat generation',
   'url': 'https://openlibrary.org/subjects/beat_generation'},
  {'name': 'Fiction', 'url': 'https://openlibrary.org/subjects/fiction'},
  {'name': 'Autobiographical fiction',
   'url': 'https://openlibr

In [87]:
import json
with open('book.json', 'w') as f:
    json.dump(book_metadata, f)

In [88]:
book_metadata["ISBN:9780141182674"].keys()

dict_keys(['url', 'key', 'title', 'authors', 'number_of_pages', 'pagination', 'by_statement', 'identifiers', 'classifications', 'publishers', 'publish_places', 'publish_date', 'subjects', 'subject_places', 'subject_people', 'subject_times', 'excerpts', 'notes', 'table_of_contents', 'links', 'cover'])

In [89]:
book_metadata["ISBN:9780141182674"]['cover']['medium']

'https://covers.openlibrary.org/b/id/13133935-M.jpg'

# API request

In [90]:
def fetch_book(isbn13):
    base_url = 'https://openlibrary.org/api/books'
    params = {
        'bibkeys' : f'ISBN:{isbn13}',
        'format' : 'json',
        'jscmd' : 'data'
    }
    return requests.get(url=base_url, params=params).json()

In [91]:
fetch_book(9780141182674)

{'ISBN:9780141182674': {'url': 'https://openlibrary.org/books/OL22464055M/On_the_Road',
  'key': '/books/OL22464055M',
  'title': 'On the Road',
  'authors': [{'url': 'https://openlibrary.org/authors/OL21491A/Jack_Kerouac',
    'name': 'Jack Kerouac'}],
  'number_of_pages': 280,
  'pagination': 'xxxii, 280 p. ;',
  'by_statement': 'Jack Kerouac ; introduction by Ann Charters.',
  'identifiers': {'librarything': ['3207'],
   'goodreads': ['2552'],
   'isbn_10': ['0141182679'],
   'isbn_13': ['9780141182674'],
   'openlibrary': ['OL22464055M']},
  'classifications': {'lc_classifications': ['PS3521.E735 .O5 1991'],
   'dewey_decimal_class': ['813/.54']},
  'publishers': [{'name': 'Penguin Books'}],
  'publish_places': [{'name': 'London'}, {'name': 'New York'}],
  'publish_date': '2000',
  'subjects': [{'name': 'Beat generation',
    'url': 'https://openlibrary.org/subjects/beat_generation'},
   {'name': 'Fiction', 'url': 'https://openlibrary.org/subjects/fiction'},
   {'name': 'Autobiogra

In [92]:
def find_cover(book_json):
    if isinstance(book_json, dict):
        if book_json:
            isbn = next(iter(book_json.keys()))
            return book_json.get(isbn, {}).get('cover', {}).get('medium', {})
        else:
            return 'no book'

In [63]:
find_cover(fetch_book(9780141182674))

'https://covers.openlibrary.org/b/id/13133935-M.jpg'

In [127]:
from IPython.display import clear_output

df['cover'] = ''
for index, row in df.iterrows():
    print(f'📖fetching book: {row["title"]}')
    book_json = fetch_book(row['isbn13'])
    cover_url = find_cover(book_json)
    print(f'found cover with url {cover_url}')

    df.loc[index, 'cover'] = cover_url if cover_url else 'not found'
    clear_output(wait=True)


📖fetching book: Sudden Fiction: American Short-Short Stories
found cover with url https://covers.openlibrary.org/b/id/11723301-M.jpg


In [115]:
df['cover'] = df['isbn13'].map(fetch_book).map(find_cover)

In [125]:
index

12429

In [128]:
df

Unnamed: 0,title,isbn13,cover
4578,Cromartie High School Vol. 06,9781413902624,https://covers.openlibrary.org/b/id/758686-M.jpg
2540,Saint Joan/Major Barbara/Androcles and the Lion,9780394604800,https://covers.openlibrary.org/b/id/10516535-M...
5237,Strangers,9780425181119,https://covers.openlibrary.org/b/id/12884728-M...
12429,Paris: After the Liberation 1944-1949,9780142437926,not found
6826,Sudden Fiction: American Short-Short Stories,9780879052652,https://covers.openlibrary.org/b/id/11723301-M...


In [101]:
from IPython.display import Image
Image(url='https://covers.openlibrary.org/b/id/13133935-M.jpg')

# multiple isbn at a time

In [77]:
url = 'https://openlibrary.org/api/books?bibkeys=ISBN:9780439785969,ISBN:9780439554893,ISBN:9780976540601&format=json&jscmd=data'
len(requests.get(url).json())

3

In [142]:
list(df['isbn13'])

[9781413902624, 9780394604800, 9780425181119, 9780142437926, 9780879052652]

In [140]:
(',').join(f'ISBN:{isbn}' for isbn in list(df['isbn13']))

'ISBN:9781413902624,ISBN:9780394604800,ISBN:9780425181119,ISBN:9780142437926,ISBN:9780879052652'

In [141]:
def fetch_multiple_books(isbn13_list):
    base_url = 'https://openlibrary.org/api/books'
    isbn13_list_str = (',').join(f'ISBN:{isbn}' for isbn in isbn13_list)
    params = {
        'bibkeys' : isbn13_list_str,
        'format' : 'json',
        'jscmd' : 'data'
    }
    return requests.get(url=base_url, params=params).json()

In [144]:
books = fetch_multiple_books(list(df['isbn13']))

In [146]:
len(books)

5

In [163]:
import numpy as np
for df_slice in np.array_split(df, 2):
    books = fetch_multiple_books(list(df_slice['isbn13']))

    for book in books:
        image_url = books.get(book, {}).get('cover', {}).get('medium', {})
        df.loc[df['isbn13'] == int(book.strip('ISBN:')),'cover_url'] = image_url

In [154]:
df_slice

Unnamed: 0,title,isbn13,cover
12429,Paris: After the Liberation 1944-1949,9780142437926,not found
6826,Sudden Fiction: American Short-Short Stories,9780879052652,https://covers.openlibrary.org/b/id/11723301-M...


In [156]:
len(books)

2

In [164]:
df

Unnamed: 0,title,isbn13,cover,cover_url
4578,Cromartie High School Vol. 06,9781413902624,https://covers.openlibrary.org/b/id/758686-M.jpg,https://covers.openlibrary.org/b/id/758686-M.jpg
2540,Saint Joan/Major Barbara/Androcles and the Lion,9780394604800,https://covers.openlibrary.org/b/id/10516535-M...,https://covers.openlibrary.org/b/id/10516535-M...
5237,Strangers,9780425181119,https://covers.openlibrary.org/b/id/12884728-M...,https://covers.openlibrary.org/b/id/12884728-M...
12429,Paris: After the Liberation 1944-1949,9780142437926,not found,
6826,Sudden Fiction: American Short-Short Stories,9780879052652,https://covers.openlibrary.org/b/id/11723301-M...,https://covers.openlibrary.org/b/id/11723301-M...
