In [1]:
!pip install python-dotenv



In [2]:
#importing relevant libraries an modules for project
import os
import logging 
import requests
import time
import psycopg2
# import dotenv
# from dotenv import load_dotenv
import pandas as pd 

# configuring logging
logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s')

# # Load the environment variables 
# load_dotenv()

In [3]:
# project requirement 
extract_set = ["add-cover", "add-book", "edit-book", "merge-authors"]
api_start_date = "2023-12-01"
api_end_date = "2023-12-03"

# API restriction 
offset = 0
limit = 1


def extract_data(specific_date, kind, offset_value, limit_value):
    """
    This function returns the data extracted in a list format from the API
    based on specific date and using API restrictions including offset to
    paginate through response

    specific_date(str) : date in YYYY/MM/DD
    kind(str): book kind in in the set (add-cover, add-book, edit-book, merge-authors)
    offset_value(int): specifying API offset
    limit_value(int): specifying API limit

    return a list of data extracted
    """
    data_extracted = []
    offset = offset_value
    limit = limit_value
    try:
        while True:
            logging.info(f"Connecting to API to extract data from {specific_date} of kind {kind} and current offset is {offset}")

            # params = {"limit": limit , "offset":offset} # query string for API
            params = {"limit": limit } # query string for API

            url = f"http://openlibrary.org/recentchanges/{specific_date}/{kind}.json"
            response = requests.get(url, params=params)
            response.raise_for_status() # Raise HTTPError for bad responses

            data = response.json()

            data_extracted.extend(data)

            break

            # # checking if data returned is empty as API returns a list so I can stop data extraction based on offset
            # checkpoint = data if isinstance(data, list) else [data]

            
            # if not checkpoint:
            #     #Verifying data returned from API is empty
            #     print(checkpoint)
            #     break
            
            # #appending data extracted to 
            # data_extracted.extend(checkpoint)
            # offset += limit

            # logging.info(f"Extracted data from API  from {specific_date} of kind {kind} and new offset is {offset}")
            
    except requests.RequestException as e:
        logging.error(f"Error fetching data from {url}: {str(e)}")
        return None
    
    return data_extracted

    

#generate dates for data extraction using pandas
def generate_date(start_date, end_date):
    """
    This function generate dates based on 
    start date and end date 

    start_date(string) in YYYY-MM-DD
    end_date(string) in YYYY-MM-DD

    return a pandas datetimeindex
    """
    date_range = pd.date_range(start=start_date, end=end_date)
    return date_range


In [4]:
 dates = generate_date(api_start_date,api_end_date)


In [5]:
extracted_data_frames = []  # To store DataFrames for each endpoint's data

for single_date in dates:
    formatted_date = single_date.strftime("%Y/%m/%d")
    for single_kind in extract_set:
        extracted_data = extract_data(formatted_date, single_kind, offset, limit)
        if extracted_data is not None:
            # Convert extracted data into a DataFrame
            df = pd.DataFrame(extracted_data)
            # Add columns for date and kind
            df['date'] = formatted_date
            df['kind'] = single_kind
            # Append the DataFrame to the list
            extracted_data_frames.append(df)

# Concatenate all DataFrames into a single DataFrame
final_df = pd.concat(extracted_data_frames, ignore_index=True)

2024-04-06 21:33:04,990 - INFO - Connecting to API to extract data from 2023/12/01 of kind add-cover and current offset is 0
2024-04-06 21:33:12,933 - INFO - Connecting to API to extract data from 2023/12/01 of kind add-book and current offset is 0
2024-04-06 21:33:14,468 - ERROR - Error fetching data from http://openlibrary.org/recentchanges/2023/12/01/add-book.json: Expecting value: line 1 column 1 (char 0)
2024-04-06 21:33:14,470 - INFO - Connecting to API to extract data from 2023/12/01 of kind edit-book and current offset is 0
2024-04-06 21:33:16,189 - ERROR - Error fetching data from http://openlibrary.org/recentchanges/2023/12/01/edit-book.json: Expecting value: line 1 column 1 (char 0)
2024-04-06 21:33:16,191 - INFO - Connecting to API to extract data from 2023/12/01 of kind merge-authors and current offset is 0
2024-04-06 21:33:18,707 - ERROR - Error fetching data from http://openlibrary.org/recentchanges/2023/12/01/merge-authors.json: Expecting value: line 1 column 1 (char 0)

In [6]:
final_df.tail(5)

Unnamed: 0,id,kind,timestamp,comment,changes,author,ip,data,date
2,126924717,merge-authors,2023-12-02T22:22:45.469162,merge authors,"[{'key': '/authors/OL3170992A', 'revision': 2}...",{'key': '/people/mheiman9541'},,"{'master': '/authors/OL4356353A', 'duplicates'...",2023/12/02
3,126937209,add-cover,2023-12-03T23:52:59.189725,//covers.openlibrary.org/b/id/14550834-S.jpg,"[{'key': '/books/OL19529488M', 'revision': 7}]",{'key': '/people/suzannetf'},,{'url': ''},2023/12/03
4,126937177,add-book,2023-12-03T23:48:04.196078,Added new book.,"[{'key': '/works/OL37250406W', 'revision': 1},...",{'key': '/people/freso'},,{},2023/12/03
5,126936907,edit-book,2023-12-03T22:57:47.769029,Fixed title; added coauthor,"[{'key': '/works/OL7730196W', 'revision': 3}, ...",{'key': '/people/marjole'},,{},2023/12/03
6,126937230,merge-authors,2023-12-03T23:59:13.661195,merge authors,"[{'key': '/authors/OL8076429A', 'revision': 2}...",{'key': '/people/tfmorris'},,"{'master': '/authors/OL282451A', 'duplicates':...",2023/12/03


In [7]:
pd.set_option('display.max_colwidth', None)

In [8]:
final_df.tail(1)["changes"]

6    [{'key': '/authors/OL8076429A', 'revision': 2}, {'key': '/authors/OL9080556A', 'revision': 2}, {'key': '/authors/OL9080558A', 'revision': 2}, {'key': '/authors/OL9152667A', 'revision': 2}, {'key': '/authors/OL9216291A', 'revision': 2}, {'key': '/works/OL36475468W', 'revision': 2}, {'key': '/books/OL43465526M', 'revision': 2}, {'key': '/books/OL31844041M', 'revision': 2}, {'key': '/works/OL24140605W', 'revision': 2}, {'key': '/works/OL21574076W', 'revision': 2}, {'key': '/books/OL37834890M', 'revision': 4}, {'key': '/books/OL28982323M', 'revision': 2}, {'key': '/works/OL24534128W', 'revision': 3}, {'key': '/books/OL32202998M', 'revision': 2}, {'key': '/books/OL37841619M', 'revision': 3}, {'key': '/works/OL33901125W', 'revision': 2}, {'key': '/works/OL24331160W', 'revision': 2}, {'key': '/books/OL32423482M', 'revision': 2}, {'key': '/works/OL21886769W', 'revision': 3}, {'key': '/works/OL26399408W', 'revision': 2}, {'key': '/books/OL35498588M', 'revision': 2}, {'key': '/books/OL34013

In [9]:
final_df.tail(1)["data"]

6    {'master': '/authors/OL282451A', 'duplicates': ['/authors/OL8076429A', '/authors/OL9080556A', '/authors/OL9080558A', '/authors/OL9152667A', '/authors/OL9216291A']}
Name: data, dtype: object

In [10]:
def filter_books(changes_list):
    """
    Filter the 'changes' column to only include entries containing '/books/'

    changes_list: List of dictionaries containing changes
    return: List of dictionaries containing only '/books/' entries
    """
    books_changes = [change for change in changes_list if isinstance(change, dict) and '/books/' in change.get('key', '')]
    return books_changes

In [11]:
omo_df = final_df.copy()

In [12]:
omo_df

Unnamed: 0,id,kind,timestamp,comment,changes,author,ip,data,date
0,126912344,add-cover,2023-12-01T23:57:58.884516,//covers.openlibrary.org/b/id/14550162-S.jpg,"[{'key': '/books/OL49633366M', 'revision': 2}]",{'key': '/people/lephemere'},,{'url': 'https://m.media-amazon.com/images/I/71naEmEcpqL._SL1331_.jpg'},2023/12/01
1,126925272,edit-book,2023-12-02T23:53:27.021123,,"[{'key': '/works/OL37249080W', 'revision': 2}, {'key': '/books/OL50211745M', 'revision': 2}]",{'key': '/people/buddhistuniversity'},,{},2023/12/02
2,126924717,merge-authors,2023-12-02T22:22:45.469162,merge authors,"[{'key': '/authors/OL3170992A', 'revision': 2}, {'key': '/authors/OL3915304A', 'revision': 2}, {'key': '/authors/OL7503541A', 'revision': 2}, {'key': '/authors/OL11469886A', 'revision': 2}, {'key': '/works/OL35431245W', 'revision': 2}, {'key': '/works/OL9063443W', 'revision': 3}, {'key': '/books/OL9032300M', 'revision': 5}, {'key': '/books/OL43703900M', 'revision': 2}, {'key': '/books/OL43695637M', 'revision': 2}, {'key': '/books/OL45735786M', 'revision': 2}, {'key': '/works/OL31524220W', 'revision': 2}, {'key': '/books/OL9032302M', 'revision': 4}, {'key': '/books/OL12699318M', 'revision': 7}, {'key': '/works/OL19631642W', 'revision': 2}, {'key': '/works/OL35316904W', 'revision': 2}, {'key': '/books/OL26852589M', 'revision': 2}, {'key': '/works/OL9063445W', 'revision': 3}, {'key': '/works/OL31483360W', 'revision': 2}, {'key': '/works/OL9063446W', 'revision': 2}, {'key': '/books/OL47730158M', 'revision': 2}, {'key': '/works/OL31978893W', 'revision': 2}, {'key': '/books/OL12699239M', 'revision': 7}, {'key': '/books/OL43144306M', 'revision': 2}, {'key': '/works/OL31987750W', 'revision': 2}, {'key': '/works/OL33765644W', 'revision': 2}, {'key': '/works/OL9063444W', 'revision': 3}, {'key': '/books/OL47855655M', 'revision': 2}, {'key': '/books/OL12461818M', 'revision': 7}, {'key': '/books/OL43189362M', 'revision': 2}, {'key': '/authors/OL4356353A', 'revision': 16}]",{'key': '/people/mheiman9541'},,"{'master': '/authors/OL4356353A', 'duplicates': ['/authors/OL3170992A', '/authors/OL3915304A', '/authors/OL7503541A', '/authors/OL11469886A']}",2023/12/02
3,126937209,add-cover,2023-12-03T23:52:59.189725,//covers.openlibrary.org/b/id/14550834-S.jpg,"[{'key': '/books/OL19529488M', 'revision': 7}]",{'key': '/people/suzannetf'},,{'url': ''},2023/12/03
4,126937177,add-book,2023-12-03T23:48:04.196078,Added new book.,"[{'key': '/works/OL37250406W', 'revision': 1}, {'key': '/books/OL50213129M', 'revision': 1}]",{'key': '/people/freso'},,{},2023/12/03
5,126936907,edit-book,2023-12-03T22:57:47.769029,Fixed title; added coauthor,"[{'key': '/works/OL7730196W', 'revision': 3}, {'key': '/books/OL44746730M', 'revision': 3}]",{'key': '/people/marjole'},,{},2023/12/03
6,126937230,merge-authors,2023-12-03T23:59:13.661195,merge authors,"[{'key': '/authors/OL8076429A', 'revision': 2}, {'key': '/authors/OL9080556A', 'revision': 2}, {'key': '/authors/OL9080558A', 'revision': 2}, {'key': '/authors/OL9152667A', 'revision': 2}, {'key': '/authors/OL9216291A', 'revision': 2}, {'key': '/works/OL36475468W', 'revision': 2}, {'key': '/books/OL43465526M', 'revision': 2}, {'key': '/books/OL31844041M', 'revision': 2}, {'key': '/works/OL24140605W', 'revision': 2}, {'key': '/works/OL21574076W', 'revision': 2}, {'key': '/books/OL37834890M', 'revision': 4}, {'key': '/books/OL28982323M', 'revision': 2}, {'key': '/works/OL24534128W', 'revision': 3}, {'key': '/books/OL32202998M', 'revision': 2}, {'key': '/books/OL37841619M', 'revision': 3}, {'key': '/works/OL33901125W', 'revision': 2}, {'key': '/works/OL24331160W', 'revision': 2}, {'key': '/books/OL32423482M', 'revision': 2}, {'key': '/works/OL21886769W', 'revision': 3}, {'key': '/works/OL26399408W', 'revision': 2}, {'key': '/books/OL35498588M', 'revision': 2}, {'key': '/books/OL34013552M', 'revision': 2}, {'key': '/books/OL29469786M', 'revision': 4}, {'key': '/books/OL32518662M', 'revision': 2}, {'key': '/books/OL29848024M', 'revision': 2}, {'key': '/works/OL24475029W', 'revision': 2}, {'key': '/works/OL24140606W', 'revision': 2}, {'key': '/books/OL33499513M', 'revision': 2}, {'key': '/books/OL29443202M', 'revision': 5}, {'key': '/works/OL25163650W', 'revision': 3}, {'key': '/works/OL21685327W', 'revision': 2}, {'key': '/works/OL21558717W', 'revision': 3}, {'key': '/books/OL47145797M', 'revision': 2}, {'key': '/books/OL28906127M', 'revision': 3}, {'key': '/works/OL27723947W', 'revision': 2}, {'key': '/books/OL35640842M', 'revision': 3}, {'key': '/works/OL21406671W', 'revision': 2}, {'key': '/works/OL31771555W', 'revision': 2}, {'key': '/books/OL29714504M', 'revision': 2}, {'key': '/books/OL45886495M', 'revision': 2}, {'key': '/works/OL27729496W', 'revision': 3}, {'key': '/books/OL35501364M', 'revision': 2}, {'key': '/books/OL49295438M', 'revision': 2}, {'key': '/books/OL31844040M', 'revision': 2}, {'key': '/books/OL44888284M', 'revision': 2}, {'key': '/books/OL28565950M', 'revision': 3}, {'key': '/books/OL34010136M', 'revision': 2}, {'key': '/books/OL34587123M', 'revision': 2}, {'key': '/books/OL39528371M', 'revision': 2}, {'key': '/works/OL21341691W', 'revision': 2}, {'key': '/works/OL25450920W', 'revision': 2}, {'key': '/works/OL21103235W', 'revision': 3}, {'key': '/books/OL35495535M', 'revision': 2}, {'key': '/works/OL34793736W', 'revision': 2}, {'key': '/books/OL29499665M', 'revision': 2}, {'key': '/books/OL29250585M', 'revision': 4}, {'key': '/works/OL21653360W', 'revision': 5}, {'key': '/works/OL28792745W', 'revision': 2}, {'key': '/books/OL29280580M', 'revision': 2}, {'key': '/works/OL33027436W', 'revision': 2}, {'key': '/books/OL34127783M', 'revision': 2}, {'key': '/authors/OL282451A', 'revision': 9}]",{'key': '/people/tfmorris'},,"{'master': '/authors/OL282451A', 'duplicates': ['/authors/OL8076429A', '/authors/OL9080556A', '/authors/OL9080558A', '/authors/OL9152667A', '/authors/OL9216291A']}",2023/12/03


In [13]:
omo_df['changes'] = omo_df['changes'].apply(filter_books)

In [14]:
omo_df.tail(5)

Unnamed: 0,id,kind,timestamp,comment,changes,author,ip,data,date
2,126924717,merge-authors,2023-12-02T22:22:45.469162,merge authors,"[{'key': '/books/OL9032300M', 'revision': 5}, {'key': '/books/OL43703900M', 'revision': 2}, {'key': '/books/OL43695637M', 'revision': 2}, {'key': '/books/OL45735786M', 'revision': 2}, {'key': '/books/OL9032302M', 'revision': 4}, {'key': '/books/OL12699318M', 'revision': 7}, {'key': '/books/OL26852589M', 'revision': 2}, {'key': '/books/OL47730158M', 'revision': 2}, {'key': '/books/OL12699239M', 'revision': 7}, {'key': '/books/OL43144306M', 'revision': 2}, {'key': '/books/OL47855655M', 'revision': 2}, {'key': '/books/OL12461818M', 'revision': 7}, {'key': '/books/OL43189362M', 'revision': 2}]",{'key': '/people/mheiman9541'},,"{'master': '/authors/OL4356353A', 'duplicates': ['/authors/OL3170992A', '/authors/OL3915304A', '/authors/OL7503541A', '/authors/OL11469886A']}",2023/12/02
3,126937209,add-cover,2023-12-03T23:52:59.189725,//covers.openlibrary.org/b/id/14550834-S.jpg,"[{'key': '/books/OL19529488M', 'revision': 7}]",{'key': '/people/suzannetf'},,{'url': ''},2023/12/03
4,126937177,add-book,2023-12-03T23:48:04.196078,Added new book.,"[{'key': '/books/OL50213129M', 'revision': 1}]",{'key': '/people/freso'},,{},2023/12/03
5,126936907,edit-book,2023-12-03T22:57:47.769029,Fixed title; added coauthor,"[{'key': '/books/OL44746730M', 'revision': 3}]",{'key': '/people/marjole'},,{},2023/12/03
6,126937230,merge-authors,2023-12-03T23:59:13.661195,merge authors,"[{'key': '/books/OL43465526M', 'revision': 2}, {'key': '/books/OL31844041M', 'revision': 2}, {'key': '/books/OL37834890M', 'revision': 4}, {'key': '/books/OL28982323M', 'revision': 2}, {'key': '/books/OL32202998M', 'revision': 2}, {'key': '/books/OL37841619M', 'revision': 3}, {'key': '/books/OL32423482M', 'revision': 2}, {'key': '/books/OL35498588M', 'revision': 2}, {'key': '/books/OL34013552M', 'revision': 2}, {'key': '/books/OL29469786M', 'revision': 4}, {'key': '/books/OL32518662M', 'revision': 2}, {'key': '/books/OL29848024M', 'revision': 2}, {'key': '/books/OL33499513M', 'revision': 2}, {'key': '/books/OL29443202M', 'revision': 5}, {'key': '/books/OL47145797M', 'revision': 2}, {'key': '/books/OL28906127M', 'revision': 3}, {'key': '/books/OL35640842M', 'revision': 3}, {'key': '/books/OL29714504M', 'revision': 2}, {'key': '/books/OL45886495M', 'revision': 2}, {'key': '/books/OL35501364M', 'revision': 2}, {'key': '/books/OL49295438M', 'revision': 2}, {'key': '/books/OL31844040M', 'revision': 2}, {'key': '/books/OL44888284M', 'revision': 2}, {'key': '/books/OL28565950M', 'revision': 3}, {'key': '/books/OL34010136M', 'revision': 2}, {'key': '/books/OL34587123M', 'revision': 2}, {'key': '/books/OL39528371M', 'revision': 2}, {'key': '/books/OL35495535M', 'revision': 2}, {'key': '/books/OL29499665M', 'revision': 2}, {'key': '/books/OL29250585M', 'revision': 4}, {'key': '/books/OL29280580M', 'revision': 2}, {'key': '/books/OL34127783M', 'revision': 2}]",{'key': '/people/tfmorris'},,"{'master': '/authors/OL282451A', 'duplicates': ['/authors/OL8076429A', '/authors/OL9080556A', '/authors/OL9080558A', '/authors/OL9152667A', '/authors/OL9216291A']}",2023/12/03


In [15]:
final_df_filtered = omo_df[omo_df['changes'].apply(lambda x: len(x) > 0)]

In [16]:
final_df_filtered

Unnamed: 0,id,kind,timestamp,comment,changes,author,ip,data,date
0,126912344,add-cover,2023-12-01T23:57:58.884516,//covers.openlibrary.org/b/id/14550162-S.jpg,"[{'key': '/books/OL49633366M', 'revision': 2}]",{'key': '/people/lephemere'},,{'url': 'https://m.media-amazon.com/images/I/71naEmEcpqL._SL1331_.jpg'},2023/12/01
1,126925272,edit-book,2023-12-02T23:53:27.021123,,"[{'key': '/books/OL50211745M', 'revision': 2}]",{'key': '/people/buddhistuniversity'},,{},2023/12/02
2,126924717,merge-authors,2023-12-02T22:22:45.469162,merge authors,"[{'key': '/books/OL9032300M', 'revision': 5}, {'key': '/books/OL43703900M', 'revision': 2}, {'key': '/books/OL43695637M', 'revision': 2}, {'key': '/books/OL45735786M', 'revision': 2}, {'key': '/books/OL9032302M', 'revision': 4}, {'key': '/books/OL12699318M', 'revision': 7}, {'key': '/books/OL26852589M', 'revision': 2}, {'key': '/books/OL47730158M', 'revision': 2}, {'key': '/books/OL12699239M', 'revision': 7}, {'key': '/books/OL43144306M', 'revision': 2}, {'key': '/books/OL47855655M', 'revision': 2}, {'key': '/books/OL12461818M', 'revision': 7}, {'key': '/books/OL43189362M', 'revision': 2}]",{'key': '/people/mheiman9541'},,"{'master': '/authors/OL4356353A', 'duplicates': ['/authors/OL3170992A', '/authors/OL3915304A', '/authors/OL7503541A', '/authors/OL11469886A']}",2023/12/02
3,126937209,add-cover,2023-12-03T23:52:59.189725,//covers.openlibrary.org/b/id/14550834-S.jpg,"[{'key': '/books/OL19529488M', 'revision': 7}]",{'key': '/people/suzannetf'},,{'url': ''},2023/12/03
4,126937177,add-book,2023-12-03T23:48:04.196078,Added new book.,"[{'key': '/books/OL50213129M', 'revision': 1}]",{'key': '/people/freso'},,{},2023/12/03
5,126936907,edit-book,2023-12-03T22:57:47.769029,Fixed title; added coauthor,"[{'key': '/books/OL44746730M', 'revision': 3}]",{'key': '/people/marjole'},,{},2023/12/03
6,126937230,merge-authors,2023-12-03T23:59:13.661195,merge authors,"[{'key': '/books/OL43465526M', 'revision': 2}, {'key': '/books/OL31844041M', 'revision': 2}, {'key': '/books/OL37834890M', 'revision': 4}, {'key': '/books/OL28982323M', 'revision': 2}, {'key': '/books/OL32202998M', 'revision': 2}, {'key': '/books/OL37841619M', 'revision': 3}, {'key': '/books/OL32423482M', 'revision': 2}, {'key': '/books/OL35498588M', 'revision': 2}, {'key': '/books/OL34013552M', 'revision': 2}, {'key': '/books/OL29469786M', 'revision': 4}, {'key': '/books/OL32518662M', 'revision': 2}, {'key': '/books/OL29848024M', 'revision': 2}, {'key': '/books/OL33499513M', 'revision': 2}, {'key': '/books/OL29443202M', 'revision': 5}, {'key': '/books/OL47145797M', 'revision': 2}, {'key': '/books/OL28906127M', 'revision': 3}, {'key': '/books/OL35640842M', 'revision': 3}, {'key': '/books/OL29714504M', 'revision': 2}, {'key': '/books/OL45886495M', 'revision': 2}, {'key': '/books/OL35501364M', 'revision': 2}, {'key': '/books/OL49295438M', 'revision': 2}, {'key': '/books/OL31844040M', 'revision': 2}, {'key': '/books/OL44888284M', 'revision': 2}, {'key': '/books/OL28565950M', 'revision': 3}, {'key': '/books/OL34010136M', 'revision': 2}, {'key': '/books/OL34587123M', 'revision': 2}, {'key': '/books/OL39528371M', 'revision': 2}, {'key': '/books/OL35495535M', 'revision': 2}, {'key': '/books/OL29499665M', 'revision': 2}, {'key': '/books/OL29250585M', 'revision': 4}, {'key': '/books/OL29280580M', 'revision': 2}, {'key': '/books/OL34127783M', 'revision': 2}]",{'key': '/people/tfmorris'},,"{'master': '/authors/OL282451A', 'duplicates': ['/authors/OL8076429A', '/authors/OL9080556A', '/authors/OL9080558A', '/authors/OL9152667A', '/authors/OL9216291A']}",2023/12/03


In [17]:
# Function to extract '/books/OLID' from the 'changes' column
def extract_book(changes_list):
    """
    Extract '/books/OLID' from the 'changes' column

    changes_list: List of dictionaries containing changes
    return: '/books/OLID' or None
    """
    for change in changes_list:
        if '/books/' in change.get('key', ''):
            return change.get('key', '').split('/')[2]
    return None

In [18]:
final_df_filtered_copy = final_df_filtered.copy()

In [19]:
final_df_filtered_copy.loc[:, 'book'] = final_df_filtered['changes'].apply(extract_book)

In [20]:
final_df_filtered_copy

Unnamed: 0,id,kind,timestamp,comment,changes,author,ip,data,date,book
0,126912344,add-cover,2023-12-01T23:57:58.884516,//covers.openlibrary.org/b/id/14550162-S.jpg,"[{'key': '/books/OL49633366M', 'revision': 2}]",{'key': '/people/lephemere'},,{'url': 'https://m.media-amazon.com/images/I/71naEmEcpqL._SL1331_.jpg'},2023/12/01,OL49633366M
1,126925272,edit-book,2023-12-02T23:53:27.021123,,"[{'key': '/books/OL50211745M', 'revision': 2}]",{'key': '/people/buddhistuniversity'},,{},2023/12/02,OL50211745M
2,126924717,merge-authors,2023-12-02T22:22:45.469162,merge authors,"[{'key': '/books/OL9032300M', 'revision': 5}, {'key': '/books/OL43703900M', 'revision': 2}, {'key': '/books/OL43695637M', 'revision': 2}, {'key': '/books/OL45735786M', 'revision': 2}, {'key': '/books/OL9032302M', 'revision': 4}, {'key': '/books/OL12699318M', 'revision': 7}, {'key': '/books/OL26852589M', 'revision': 2}, {'key': '/books/OL47730158M', 'revision': 2}, {'key': '/books/OL12699239M', 'revision': 7}, {'key': '/books/OL43144306M', 'revision': 2}, {'key': '/books/OL47855655M', 'revision': 2}, {'key': '/books/OL12461818M', 'revision': 7}, {'key': '/books/OL43189362M', 'revision': 2}]",{'key': '/people/mheiman9541'},,"{'master': '/authors/OL4356353A', 'duplicates': ['/authors/OL3170992A', '/authors/OL3915304A', '/authors/OL7503541A', '/authors/OL11469886A']}",2023/12/02,OL9032300M
3,126937209,add-cover,2023-12-03T23:52:59.189725,//covers.openlibrary.org/b/id/14550834-S.jpg,"[{'key': '/books/OL19529488M', 'revision': 7}]",{'key': '/people/suzannetf'},,{'url': ''},2023/12/03,OL19529488M
4,126937177,add-book,2023-12-03T23:48:04.196078,Added new book.,"[{'key': '/books/OL50213129M', 'revision': 1}]",{'key': '/people/freso'},,{},2023/12/03,OL50213129M
5,126936907,edit-book,2023-12-03T22:57:47.769029,Fixed title; added coauthor,"[{'key': '/books/OL44746730M', 'revision': 3}]",{'key': '/people/marjole'},,{},2023/12/03,OL44746730M
6,126937230,merge-authors,2023-12-03T23:59:13.661195,merge authors,"[{'key': '/books/OL43465526M', 'revision': 2}, {'key': '/books/OL31844041M', 'revision': 2}, {'key': '/books/OL37834890M', 'revision': 4}, {'key': '/books/OL28982323M', 'revision': 2}, {'key': '/books/OL32202998M', 'revision': 2}, {'key': '/books/OL37841619M', 'revision': 3}, {'key': '/books/OL32423482M', 'revision': 2}, {'key': '/books/OL35498588M', 'revision': 2}, {'key': '/books/OL34013552M', 'revision': 2}, {'key': '/books/OL29469786M', 'revision': 4}, {'key': '/books/OL32518662M', 'revision': 2}, {'key': '/books/OL29848024M', 'revision': 2}, {'key': '/books/OL33499513M', 'revision': 2}, {'key': '/books/OL29443202M', 'revision': 5}, {'key': '/books/OL47145797M', 'revision': 2}, {'key': '/books/OL28906127M', 'revision': 3}, {'key': '/books/OL35640842M', 'revision': 3}, {'key': '/books/OL29714504M', 'revision': 2}, {'key': '/books/OL45886495M', 'revision': 2}, {'key': '/books/OL35501364M', 'revision': 2}, {'key': '/books/OL49295438M', 'revision': 2}, {'key': '/books/OL31844040M', 'revision': 2}, {'key': '/books/OL44888284M', 'revision': 2}, {'key': '/books/OL28565950M', 'revision': 3}, {'key': '/books/OL34010136M', 'revision': 2}, {'key': '/books/OL34587123M', 'revision': 2}, {'key': '/books/OL39528371M', 'revision': 2}, {'key': '/books/OL35495535M', 'revision': 2}, {'key': '/books/OL29499665M', 'revision': 2}, {'key': '/books/OL29250585M', 'revision': 4}, {'key': '/books/OL29280580M', 'revision': 2}, {'key': '/books/OL34127783M', 'revision': 2}]",{'key': '/people/tfmorris'},,"{'master': '/authors/OL282451A', 'duplicates': ['/authors/OL8076429A', '/authors/OL9080556A', '/authors/OL9080558A', '/authors/OL9152667A', '/authors/OL9216291A']}",2023/12/03,OL43465526M


In [21]:
def extract_book_data(book_id):
    """
    This function returns the data extracted in a list format from the API
    based on specific book id (OLID)

    book_id(str) : book olid extracted

    return a list of data extracted
    """
    data_extracted = []

    try:
        logging.info(f"Connecting to API to extract book data {book_id}")

        url = f"http://openlibrary.org/books/{book_id}.json"
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses

        data = response.json()

        data_extracted.append(data)  # Append extracted data
    except requests.RequestException as e:
        logging.error(f"Error fetching data from {url}: {str(e)}")
    
    return data_extracted

In [22]:
# Create an empty list to store extracted data
extracted_data2 = []

# Iterate over 'book' column in final_df_filtered_copy
for book_id in final_df_filtered_copy['book']:
    # Extract data for each book ID
    book_data = extract_book_data(book_id)
    if book_data is not None:
        # Extend extracted_data list with data for this book along with the book ID
        for data in book_data:
            data['book'] = book_id
        extracted_data2.extend(book_data)


second_final_df  = pd.DataFrame(extracted_data2)

2024-04-06 21:33:30,138 - INFO - Connecting to API to extract book data OL49633366M
2024-04-06 21:33:30,315 - INFO - Connecting to API to extract book data OL50211745M
2024-04-06 21:33:30,755 - INFO - Connecting to API to extract book data OL9032300M
2024-04-06 21:33:30,934 - INFO - Connecting to API to extract book data OL19529488M
2024-04-06 21:33:31,110 - INFO - Connecting to API to extract book data OL50213129M
2024-04-06 21:33:31,295 - INFO - Connecting to API to extract book data OL44746730M
2024-04-06 21:33:31,470 - INFO - Connecting to API to extract book data OL43465526M


In [26]:
# Drop duplicates based on one column
df_without_duplicates = second_final_df.drop_duplicates(subset=['book'])

In [27]:
second_final_df[["title","book"]]


Unnamed: 0,title,book
0,L'Élève Ducobu - Tome 11,OL49633366M
1,The Bhikkhu’s Rules: A Guide for Laypeople,OL50211745M
2,Eine Richtige Ehe,OL9032300M
3,Antoine et Alfred,OL19529488M
4,Resurrections,OL50213129M
5,The modern hospital,OL44746730M
6,FATALE (HORS SERIE LITTERATURE),OL43465526M
