# Virtual_library

## imports


In [1]:
import requests
import pandas as pd
import numpy as np

## Load books from CSV

In [2]:
books_df = pd.read_csv('data/books.csv', on_bad_lines='skip')
books_df = books_df.drop(columns=['bookID', 'isbn', 'average_rating', 'language_code', 'ratings_count', 'text_reviews_count'])
display(books_df.head())
books_df.info()

Unnamed: 0,title,authors,isbn13,num_pages,publication_date,publisher
0,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,9780439785969,652,9/16/2006,Scholastic Inc.
1,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,9780439358071,870,9/1/2004,Scholastic Inc.
2,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,9780439554893,352,11/1/2003,Scholastic
3,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,9780439655484,435,5/1/2004,Scholastic Inc.
4,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,9780439682589,2690,9/13/2004,Scholastic


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11123 entries, 0 to 11122
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   title             11123 non-null  object
 1   authors           11123 non-null  object
 2   isbn13            11123 non-null  int64 
 3     num_pages       11123 non-null  int64 
 4   publication_date  11123 non-null  object
 5   publisher         11123 non-null  object
dtypes: int64(2), object(4)
memory usage: 521.5+ KB


## Changing publication_date column to datetime format

In [3]:
books_df["publication_date"] = pd.to_datetime(
                                                books_df["publication_date"],
                                                format="%f/%M/%Y"
                                                )
books_df.dtypes

title                       object
authors                     object
isbn13                       int64
  num_pages                  int64
publication_date    datetime64[ns]
publisher                   object
dtype: object

## Creating a URL column

In [4]:
books_df['cover_url'] = None
books_df.head()

Unnamed: 0,title,authors,isbn13,num_pages,publication_date,publisher,cover_url
0,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,9780439785969,652,2006-01-01 00:16:00.900,Scholastic Inc.,
1,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,9780439358071,870,2004-01-01 00:01:00.900,Scholastic Inc.,
2,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,9780439554893,352,2003-01-01 00:01:00.110,Scholastic,
3,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,9780439655484,435,2004-01-01 00:01:00.500,Scholastic Inc.,
4,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,9780439682589,2690,2004-01-01 00:13:00.900,Scholastic,


## API - Open Library

In [5]:
def fetch_book(isbn):
    url = 'https://openlibrary.org/api/books'
    
    params = {
            'bibkeys': f'ISBN:{isbn}',
            'format': 'json',
            'jscmd': 'data'
            }
    
    response = requests.get(url, params=params).json()
    
    # Check whether ISBN is in the response
    if f'ISBN:{isbn}' in response:
        return response[f'ISBN:{isbn}']
    else:
        return ''
    
# fetch_book(isbn="9780439655484")

In [6]:
%%time

for index, row in books_df.head(20).iterrows():
    if row['cover_url'] is None:
        isbn = row['isbn13']
        print(f"Fetching cover for {row['title']}")      
        book = fetch_book(isbn)
        if book:
            cover_url = book.get('cover', {}).get('large', '')
            books_df.loc[index, 'cover_url'] = cover_url
        else:
            books_df.loc[index, 'cover_url'] = ''

Fetching cover for Harry Potter and the Half-Blood Prince (Harry Potter  #6)
Fetching cover for Harry Potter and the Order of the Phoenix (Harry Potter  #5)
Fetching cover for Harry Potter and the Chamber of Secrets (Harry Potter  #2)
Fetching cover for Harry Potter and the Prisoner of Azkaban (Harry Potter  #3)
Fetching cover for Harry Potter Boxed Set  Books 1-5 (Harry Potter  #1-5)
Fetching cover for Unauthorized Harry Potter Book Seven News: "Half-Blood Prince" Analysis and Speculation
Fetching cover for Harry Potter Collection (Harry Potter  #1-6)
Fetching cover for The Ultimate Hitchhiker's Guide: Five Complete Novels and One Story (Hitchhiker's Guide to the Galaxy  #1-5)
Fetching cover for The Ultimate Hitchhiker's Guide to the Galaxy (Hitchhiker's Guide to the Galaxy  #1-5)
Fetching cover for The Hitchhiker's Guide to the Galaxy (Hitchhiker's Guide to the Galaxy  #1)
Fetching cover for The Hitchhiker's Guide to the Galaxy (Hitchhiker's Guide to the Galaxy  #1)
Fetching cover fo

In [7]:
books_df.columns

Index(['title', 'authors', 'isbn13', '  num_pages', 'publication_date',
       'publisher', 'cover_url'],
      dtype='object')

In [8]:
books_df.index += 1
books_df

Unnamed: 0,title,authors,isbn13,num_pages,publication_date,publisher,cover_url
1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,9780439785969,652,2006-01-01 00:16:00.900,Scholastic Inc.,https://covers.openlibrary.org/b/id/9326654-L.jpg
2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,9780439358071,870,2004-01-01 00:01:00.900,Scholastic Inc.,https://covers.openlibrary.org/b/id/12025650-L...
3,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,9780439554893,352,2003-01-01 00:01:00.110,Scholastic,https://covers.openlibrary.org/b/id/10301720-L...
4,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,9780439655484,435,2004-01-01 00:01:00.500,Scholastic Inc.,https://covers.openlibrary.org/b/id/8778528-L.jpg
5,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,9780439682589,2690,2004-01-01 00:13:00.900,Scholastic,https://covers.openlibrary.org/b/id/278981-L.jpg
...,...,...,...,...,...,...,...
11119,Expelled from Eden: A William T. Vollmann Reader,William T. Vollmann/Larry McCaffery/Michael He...,9781560254416,512,2004-01-01 00:21:00.120,Da Capo Press,
11120,You Bright and Risen Angels,William T. Vollmann,9780140110876,635,1988-01-01 00:01:00.120,Penguin Books,
11121,The Ice-Shirt (Seven Dreams #1),William T. Vollmann,9780140131963,415,1993-01-01 00:01:00.800,Penguin Books,
11122,Poor People,William T. Vollmann,9780060878825,434,2007-01-01 00:27:00.200,Ecco,


## Calling the API with multiple ISBNs at a time


In [9]:
isbns = [9780439785969, 9780439358071, 9780439554930]
[f"ISBN:{isbn}" for isbn in isbns]

['ISBN:9780439785969', 'ISBN:9780439358071', 'ISBN:9780439554930']

In [10]:
",".join([f"ISBN:{isbn}" for isbn in isbns])

'ISBN:9780439785969,ISBN:9780439358071,ISBN:9780439554930'

In [11]:
def fetch_books(isbns):
    url = "https://openlibrary.org/api/books"
    bibkeys = ",".join([f"ISBN:{isbn}" for isbn in isbns])
    
    params = {
        'bibkeys': bibkeys,
        'format': 'json',
        'jscmd': 'data'
    }
    
    response = requests.get(url, params=params).json()
    
    return response

fetch_books(isbns='ISBN:9780439785969,ISBN:9780439358071,ISBN:9780439554930')

{'ISBN:9': {'url': 'https://openlibrary.org/books/OL31367271M/Novel_Districts',
  'key': '/books/OL31367271M',
  'title': 'Novel Districts',
  'subtitle': 'Critical Readings of Monika Fagerholm',
  'authors': [{'url': 'https://openlibrary.org/authors/OL8908091A/Mia_Österlund',
    'name': 'Mia Österlund'}],
  'number_of_pages': 262,
  'pagination': '1 electronic resource (262 p.)',
  'identifiers': {'isbn_10': ['9'],
   'isbn_13': ['9789522227942', '9789522227959'],
   'openlibrary': ['OL31367271M']},
  'publishers': [{'name': 'Finnish Literature Society / SKS'}],
  'publish_places': [{'name': 'Helsinki'}],
  'publish_date': '2016',
  'subjects': [{'name': 'Scandinavia Northern Europe',
    'url': 'https://openlibrary.org/subjects/scandinavia_northern_europe'},
   {'name': 'Finland', 'url': 'https://openlibrary.org/subjects/finland'},
   {'name': 'Literary studies: fiction, novelists & prose writers',
    'url': 'https://openlibrary.org/subjects/literary_studies:_fiction,_novelists_&_p

## Setting ISBN column as index

In [12]:
display(books_df.head(5))
books_df.columns

Unnamed: 0,title,authors,isbn13,num_pages,publication_date,publisher,cover_url
1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,9780439785969,652,2006-01-01 00:16:00.900,Scholastic Inc.,https://covers.openlibrary.org/b/id/9326654-L.jpg
2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,9780439358071,870,2004-01-01 00:01:00.900,Scholastic Inc.,https://covers.openlibrary.org/b/id/12025650-L...
3,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,9780439554893,352,2003-01-01 00:01:00.110,Scholastic,https://covers.openlibrary.org/b/id/10301720-L...
4,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,9780439655484,435,2004-01-01 00:01:00.500,Scholastic Inc.,https://covers.openlibrary.org/b/id/8778528-L.jpg
5,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,9780439682589,2690,2004-01-01 00:13:00.900,Scholastic,https://covers.openlibrary.org/b/id/278981-L.jpg


Index(['title', 'authors', 'isbn13', '  num_pages', 'publication_date',
       'publisher', 'cover_url'],
      dtype='object')

In [14]:
books_df.set_index("isbn13", inplace=True)

In [15]:
books_df

Unnamed: 0_level_0,title,authors,num_pages,publication_date,publisher,cover_url
isbn13,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9780439785969,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,652,2006-01-01 00:16:00.900,Scholastic Inc.,https://covers.openlibrary.org/b/id/9326654-L.jpg
9780439358071,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,870,2004-01-01 00:01:00.900,Scholastic Inc.,https://covers.openlibrary.org/b/id/12025650-L...
9780439554893,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,352,2003-01-01 00:01:00.110,Scholastic,https://covers.openlibrary.org/b/id/10301720-L...
9780439655484,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,435,2004-01-01 00:01:00.500,Scholastic Inc.,https://covers.openlibrary.org/b/id/8778528-L.jpg
9780439682589,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,2690,2004-01-01 00:13:00.900,Scholastic,https://covers.openlibrary.org/b/id/278981-L.jpg
...,...,...,...,...,...,...
9781560254416,Expelled from Eden: A William T. Vollmann Reader,William T. Vollmann/Larry McCaffery/Michael He...,512,2004-01-01 00:21:00.120,Da Capo Press,
9780140110876,You Bright and Risen Angels,William T. Vollmann,635,1988-01-01 00:01:00.120,Penguin Books,
9780140131963,The Ice-Shirt (Seven Dreams #1),William T. Vollmann,415,1993-01-01 00:01:00.800,Penguin Books,
9780060878825,Poor People,William T. Vollmann,434,2007-01-01 00:27:00.200,Ecco,


## Playing around with tqdm library

In [16]:
!pip install tqdm


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [17]:
%%time

from tqdm import tqdm

for group in tqdm(np.array_split(books_df.head(100), 5)): # 5 groups of 20 books
    books = fetch_books(list(group.index))
    
    for isbn_code, book in books.items():
        isbn = int(isbn_code.strip("ISBN:"))
        books_df.loc[isbn, "cover_url"] = book.get("cover", {}).get("large", "")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:11<00:00,  2.34s/it]

CPU times: user 1.89 s, sys: 70.8 ms, total: 1.96 s
Wall time: 11.9 s



