In [6]:
import requests

# Google Books API URL
BASE_URL = "https://www.googleapis.com/books/v1/volumes"

# Function to fetch books with pagination and filter industryIdentifiers
def fetch_books(query, max_results=1000, results_per_page=40, api_key = None):
    all_books = []
    start_index = 0  # Initial index to start fetching
    
    while start_index < max_results:
        # Send GET request with the query and pagination parameters
        response = requests.get(BASE_URL, params={
            'q': query,
            'startIndex': start_index,
            'maxResults': results_per_page
        })
        
        # If the response is successful
        if response.status_code == 200:
            data = response.json()
            books = data.get('items', [])
            
            # Filter industryIdentifiers to include only "ISBN_13" and "ISBN_10"
            for book in books:
                volume_info = book.get('volumeInfo', {})
                
                # Safely get 'industryIdentifiers', default to empty list if not found
                industry_identifiers = volume_info.get('industryIdentifiers', [])
                
                # Filter the industryIdentifiers by type (ISBN_13 and ISBN_10)
                filtered_identifiers = [identifier for identifier in industry_identifiers if identifier.get('type') in ['ISBN_13', 'ISBN_10']]
                
                # If there are any valid identifiers, concatenate them into a single string
                if filtered_identifiers:
                    concatenated_identifiers = ' | '.join([f"{identifier['type']}: {identifier['identifier']}" for identifier in filtered_identifiers])
                    volume_info['industryIdentifiers'] = concatenated_identifiers  # Store as a single string
                    all_books.append(book)
            
            # If we fetched less than requested, we are done
            if len(books) < results_per_page:
                break
            
            # Increment startIndex to fetch the next page of results
            start_index += results_per_page
        else:
            print(f"Error fetching data: {response.status_code}")
            break
    
    return all_books

# Example usage
query = "python programming"  # Search query (e.g., books related to Python programming)
api_key = "AIzaSyBa_59ZupTnQEOzwpiXh2w9sQuayhveCcY" 
books = fetch_books(query, max_results=1000, results_per_page=40, api_key=api_key)

# Print out the titles and concatenated industryIdentifiers of the fetched books
for idx, book in enumerate(books, start=1):
    title = book['volumeInfo'].get('title', 'No Title')
    concatenated_identifiers = book['volumeInfo'].get('industryIdentifiers', 'No Identifiers')
    
    # Display the book title and the concatenated industryIdentifiers
    print(f"{idx}. {title}")
    print(f"   - Identifiers: {concatenated_identifiers}")


1. Learning Python
   - Identifiers: ISBN_13: 9781449355715 | ISBN_10: 1449355714
2. Python Tutorial 3.11.3
   - Identifiers: ISBN_10: 1312571659 | ISBN_13: 9781312571655
3. Introduction to Python Programming
   - Identifiers: ISBN_13: 9781351013215 | ISBN_10: 1351013211
4. Python Programming
   - Identifiers: ISBN_13: 9781887902991 | ISBN_10: 1887902996
5. Ultimate Python Programming
   - Identifiers: ISBN_13: 9789355516558 | ISBN_10: 935551655X
6. Taming PYTHON By Programming
   - Identifiers: ISBN_13: 9789386173348 | ISBN_10: 9386173344
7. Introduction to Computation and Programming Using Python, revised and expanded edition
   - Identifiers: ISBN_13: 9780262316668 | ISBN_10: 0262316668
8. Python for Kids
   - Identifiers: ISBN_13: 9781593274948 | ISBN_10: 1593274947
9. Modern Python Programming using ChatGPT
   - Identifiers: ISBN_13: 9789365894318 | ISBN_10: 936589431X
10. LEARN PYTHON WITH 200 PROGRAMS
   - Identifiers: ISBN_13: 9781649510747 | ISBN_10: 1649510748
11. Python Cras

In [25]:
import requests

# Set up your API key and base URL
API_KEY = 'AIzaSyBa_59ZupTnQEOzwpiXh2w9sQuayhveCcY'
BASE_URL = 'https://www.googleapis.com/books/v1/volumes'

# Function to fetch books based on query and handle pagination
def fetch_books(query, max_results=40, total_books=1000):
    books = []
    start_index = 0
    while len(books) < total_books:
        # Build the URL for the API request
        url = f"{BASE_URL}?q={query}&startIndex={start_index}&maxResults={max_results}&key={API_KEY}"
        response = requests.get(url)
        data = response.json()
        
        # Check if the response contains items
        if data:
            for index, item in enumerate(data.get("items", [])):
                volume_info = item.get('volumeInfo', {})
                sale_info = item.get('saleInfo', {})
                industry_Identifiers = volume_info.get('industryIdentifiers', [])
                book_data = {
                    'book_id' : item.get('id', 'N/A'),  # Book ID
                    'search_key' : query,  # Search key (selfLink is unique)
                    'book_title' : volume_info.get('title', 'N/A'),  # Book title
                    'book_subtitle' : volume_info.get('subtitle', 'N/A'),  # Book subtitle
                    'book_authors' : ', '.join(volume_info.get('authors', ['N/A'])),  # Authors
                    'book_description' : volume_info.get('description', 'No description available.'),  # Description
            
                    # Concatenate all types in industryIdentifiers directly
                    'industryIdentifiers' : ''.join([identifier.get('type', 'N/A') for identifier in industry_Identifiers]),  # Concatenate types
            
                    'text_readingModes' : volume_info.get('readingModes', {}).get('text', 'False'),  # Text reading mode
                    'image_readingModes' : volume_info.get('readingModes', {}).get('image', 'False') , # Image reading mode
                    'pageCount' : volume_info.get('pageCount', '0'),  # Page count
                    'categories' : ', '.join(volume_info.get('categories', ['N/A'])),  # Categories
                    'language' : volume_info.get('language', 'N/A'),  # Language
                    'imageLinks' : volume_info.get('imageLinks', {}).get('thumbnail', 'No thumbnail available.'),  # Image links
                    'ratingsCount' : volume_info.get('ratingsCount', '0'),  # Ratings count
                    'averageRating' : volume_info.get('averageRating', '0'),  # Average rating
                    'country' : volume_info.get('country', 'N/A'),  # Country (publication)
                    'saleability' : sale_info.get('saleability', 'N/A'),  # Saleability
                    'isEbook' : sale_info.get('isEbook', 'False'),  # Is ebook
                    'amount_listPrice' : sale_info.get('listPrice', {}).get('amount', '0.0'),  # Amount listPrice
                    'currencyCode_listPrice' : sale_info.get('listPrice', {}).get('currencyCode', 'N/A'),  # Currency code listPrice
                    'amount_retailPrice' : sale_info.get('retailPrice', {}).get('amount', '0.0'),  # Amount retailPrice
                    'currencyCode_retailPrice' : sale_info.get('retailPrice', {}).get('currencyCode', 'N/A'),  # Currency code retailPrice
                    'buyLink' : volume_info.get('infoLink', 'NA'),  # Buy link
                    'year' : volume_info.get('publishedDate', '0000')[:4],  # Year (from publishedDate)
                    'publisher' : volume_info.get('publisher', 'Not available'),  # Publisher
                }
                books.append(book_data)
        
        # Increment the startIndex for the next page of results
        start_index += max_results
        
        # If no more items, break the loop
        if 'items' not in data or len(data['items']) == 0:
            break
    
    return books

# Example usage
query = 'python'
books = fetch_books(query)

# Display the first few books
for book in books[:5]:
    print(book)


{'book_id': 'pVP8AQAAQBAJ', 'search_key': 'python', 'book_title': 'Introduction to Computation and Programming Using Python', 'book_subtitle': 'N/A', 'book_authors': 'John V. Guttag, John Guttag', 'book_description': 'An introductory text that teaches students the art of computational problem solving, covering topics that range from simple algorithms to information visualization.', 'industryIdentifiers': 'ISBN_13ISBN_10', 'text_readingModes': False, 'image_readingModes': True, 'pageCount': 315, 'categories': 'Computers', 'language': 'en', 'imageLinks': 'http://books.google.com/books/content?id=pVP8AQAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api', 'ratingsCount': '0', 'averageRating': '0', 'country': 'N/A', 'saleability': 'NOT_FOR_SALE', 'isEbook': False, 'amount_listPrice': '0.0', 'currencyCode_listPrice': 'N/A', 'amount_retailPrice': '0.0', 'currencyCode_retailPrice': 'N/A', 'buyLink': 'http://books.google.co.in/books?id=pVP8AQAAQBAJ&dq=python&hl=&source=gbs_api', '

In [26]:
import pandas as pd
df1 = pd.DataFrame(books)
df1

Unnamed: 0,book_id,search_key,book_title,book_subtitle,book_authors,book_description,industryIdentifiers,text_readingModes,image_readingModes,pageCount,...,country,saleability,isEbook,amount_listPrice,currencyCode_listPrice,amount_retailPrice,currencyCode_retailPrice,buyLink,year,publisher
0,pVP8AQAAQBAJ,python,Introduction to Computation and Programming Us...,,"John V. Guttag, John Guttag",An introductory text that teaches students the...,ISBN_13ISBN_10,False,True,315,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=pVP8AQAAQBA...,2013,MIT Press
1,HFeGvgEACAAJ,python,The Quick Python Book,,"Vernon L. Ceder, Naomi R. Ceder","Introduces the programming language's syntax, ...",ISBN_10ISBN_13,False,False,336,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=HFeGvgEACAA...,2010,Manning Publications Company
2,nEJ-jcYF2fMC,python,Learning Python,,Mark Lutz,"Portable, powerful, and a breeze to use, Pytho...",ISBN_13ISBN_10,True,True,749,...,,FOR_SALE,True,3235.55,INR,3235.55,INR,https://play.google.com/store/books/details?id...,2007,"""O'Reilly Media, Inc."""
3,4pgQfXQvekcC,python,Learning Python,Powerful Object-Oriented Programming,Mark Lutz,"Get a comprehensive, in-depth introduction to ...",ISBN_13ISBN_10,True,True,1740,...,,FOR_SALE,True,3540,INR,3540,INR,https://play.google.com/store/books/details?id...,2013,"""O'Reilly Media, Inc."""
4,bIZHCgAAQBAJ,python,Fluent Python,"Clear, Concise, and Effective Programming",Luciano Ramalho,Python’s simplicity lets you become productive...,ISBN_13ISBN_10,True,True,755,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=bIZHCgAAQBA...,2015,"""O'Reilly Media, Inc."""
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
702,WJy1zQEACAAJ,python,"JAVASCRIPT in 8 Hours: for Beginners, Learn Co...",,Ray Yao,"About this book: Absolutely for Beginners ""Jav...",ISBN_13,False,False,200,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=WJy1zQEACAA...,2020,Not available
703,GDia0Uy3D3cC,python,Totemism and Exogamy: -3. An ethnographical su...,,James George Frazer,No description available.,OTHER,False,False,674,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=GDia0Uy3D3c...,1910,Not available
704,BE0EAAAAMBAJ,python,LIFE,,,LIFE Magazine is the treasured photographic ma...,,False,True,152,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=BE0EAAAAMBA...,1950,Not available
705,kDgDAAAAMBAJ,python,Jet,,,The weekly source of African American politica...,,False,True,64,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=kDgDAAAAMBA...,1996,Not available


In [27]:
import requests

# Set up your API key and base URL
API_KEY = 'AIzaSyBa_59ZupTnQEOzwpiXh2w9sQuayhveCcY'
BASE_URL = 'https://www.googleapis.com/books/v1/volumes'

# Function to fetch books based on query and handle pagination
def fetch_books(query, max_results=40, total_books=1000):
    books = []
    start_index = 0
    while len(books) < total_books:
        # Build the URL for the API request
        url = f"{BASE_URL}?q={query}&startIndex={start_index}&maxResults={max_results}&key={API_KEY}"
        response = requests.get(url)
        data = response.json()
        
        # Check if the response contains items
        if data:
            for index, item in enumerate(data.get("items", [])):
                volume_info = item.get('volumeInfo', {})
                sale_info = item.get('saleInfo', {})
                industry_Identifiers = volume_info.get('industryIdentifiers', [])
                book_data = {
                    'book_id' : item.get('id', 'N/A'),  # Book ID
                    'search_key' : query,  # Search key (selfLink is unique)
                    'book_title' : volume_info.get('title', 'N/A'),  # Book title
                    'book_subtitle' : volume_info.get('subtitle', 'N/A'),  # Book subtitle
                    'book_authors' : ', '.join(volume_info.get('authors', ['N/A'])),  # Authors
                    'book_description' : volume_info.get('description', 'No description available.'),  # Description
            
                    # Concatenate all types in industryIdentifiers directly
                    'industryIdentifiers' : ''.join([identifier.get('type', 'N/A') for identifier in industry_Identifiers]),  # Concatenate types
            
                    'text_readingModes' : volume_info.get('readingModes', {}).get('text', 'False'),  # Text reading mode
                    'image_readingModes' : volume_info.get('readingModes', {}).get('image', 'False') , # Image reading mode
                    'pageCount' : volume_info.get('pageCount', '0'),  # Page count
                    'categories' : ', '.join(volume_info.get('categories', ['N/A'])),  # Categories
                    'language' : volume_info.get('language', 'N/A'),  # Language
                    'imageLinks' : volume_info.get('imageLinks', {}).get('thumbnail', 'No thumbnail available.'),  # Image links
                    'ratingsCount' : volume_info.get('ratingsCount', '0'),  # Ratings count
                    'averageRating' : volume_info.get('averageRating', '0'),  # Average rating
                    'country' : volume_info.get('country', 'N/A'),  # Country (publication)
                    'saleability' : sale_info.get('saleability', 'N/A'),  # Saleability
                    'isEbook' : sale_info.get('isEbook', 'False'),  # Is ebook
                    'amount_listPrice' : sale_info.get('listPrice', {}).get('amount', '0.0'),  # Amount listPrice
                    'currencyCode_listPrice' : sale_info.get('listPrice', {}).get('currencyCode', 'N/A'),  # Currency code listPrice
                    'amount_retailPrice' : sale_info.get('retailPrice', {}).get('amount', '0.0'),  # Amount retailPrice
                    'currencyCode_retailPrice' : sale_info.get('retailPrice', {}).get('currencyCode', 'N/A'),  # Currency code retailPrice
                    'buyLink' : volume_info.get('infoLink', 'NA'),  # Buy link
                    'year' : volume_info.get('publishedDate', '0000')[:4],  # Year (from publishedDate)
                    'publisher' : volume_info.get('publisher', 'Not available'),  # Publisher
                }
                books.append(book_data)
        
        # Increment the startIndex for the next page of results
        start_index += max_results
        
        # If no more items, break the loop
        if 'items' not in data or len(data['items']) == 0:
            break
    
    return books

# Example usage
query = 'sql'
books = fetch_books(query)

# Display the first few books
for book in books[:5]:
    print(book)


{'book_id': 'y3_9EAAAQBAJ', 'search_key': 'sql', 'book_title': 'SQL All-in-One For Dummies', 'book_subtitle': 'N/A', 'book_authors': 'Allen G. Taylor, Richard Blum', 'book_description': 'The most thorough SQL reference, now updated for SQL:2023 SQL All-in-One For Dummies has everything you need to get started with the SQL programming language, and then to level up your skill with advanced applications. This relational database coding language is one of the most used languages in professional software development. And, as it becomes ever more important to take control of data, there’s no end in sight to the need for SQL know-how. You can take your career to the next level with this guide to creating databases, accessing and editing data, protecting data from corruption, and integrating SQL with other languages in a programming environment. Become a SQL guru and turn the page on the next chapter of your coding career. Get 7 mini-books in one, covering basic SQL, database development, and

In [28]:
import pandas as pd
df2 = pd.DataFrame(books)
df2

Unnamed: 0,book_id,search_key,book_title,book_subtitle,book_authors,book_description,industryIdentifiers,text_readingModes,image_readingModes,pageCount,...,country,saleability,isEbook,amount_listPrice,currencyCode_listPrice,amount_retailPrice,currencyCode_retailPrice,buyLink,year,publisher
0,y3_9EAAAQBAJ,sql,SQL All-in-One For Dummies,,"Allen G. Taylor, Richard Blum","The most thorough SQL reference, now updated f...",ISBN_13ISBN_10,True,True,807,...,,FOR_SALE,True,706.82,INR,353.41,INR,https://play.google.com/store/books/details?id...,2024,John Wiley & Sons
1,pjhCEAAAQBAJ,sql,SQL for Data Analysis,,Cathy Tanimura,"With the explosion of data, computing power, a...",ISBN_13ISBN_10,False,True,359,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=pjhCEAAAQBA...,2021,"""O'Reilly Media, Inc."""
2,BA2CDwAAQBAJ,sql,1000 SQL Interview Questions and Answers,1000 most important and frequently asked struc...,Vamsee Puligadda,"Get that job, you aspire for! Want to switch t...",,False,True,189,...,,FOR_SALE,True,206.5,INR,144.55,INR,https://play.google.com/store/books/details?id...,0000,Vamsee Puligadda
3,RHOOCwAAQBAJ,sql,Getting Started with SQL,,Thomas Nield,Businesses are gathering data today at exponen...,ISBN_13ISBN_10,True,True,144,...,,FOR_SALE,True,2135.8,INR,2135.8,INR,https://play.google.com/store/books/details?id...,2016,"""O'Reilly Media, Inc."""
4,FwKHDwAAQBAJ,sql,Learning SQL,Master SQL Fundamentals,Alan Beaulieu,Updated for the latest database management sys...,ISBN_13ISBN_10,False,True,338,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=FwKHDwAAQBA...,2009,O'Reilly Media
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
670,Fms3AQAAMAAJ,sql,De Ingenieur,,,No description available.,OTHER,False,False,1018,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=Fms3AQAAMAA...,1917,Not available
671,-1gfAQAAMAAJ,sql,Marine Engineering,,,No description available.,OTHER,False,False,1146,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=-1gfAQAAMAA...,1921,Not available
672,WStqa-OykF0C,sql,Muret-Sanders Enzyklopädisches englisch-deutsc...,,Eduard Muret,No description available.,OTHER,False,False,1172,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=WStqa-OykF0...,1910,Not available
673,E1EEAAAAMBAJ,sql,InfoWorld,,,InfoWorld is targeted to Senior IT professiona...,,False,True,156,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=E1EEAAAAMBA...,1998,Not available


In [29]:
df = pd.concat([df1, df2], axis=0, ignore_index=True)
df

Unnamed: 0,book_id,search_key,book_title,book_subtitle,book_authors,book_description,industryIdentifiers,text_readingModes,image_readingModes,pageCount,...,country,saleability,isEbook,amount_listPrice,currencyCode_listPrice,amount_retailPrice,currencyCode_retailPrice,buyLink,year,publisher
0,pVP8AQAAQBAJ,python,Introduction to Computation and Programming Us...,,"John V. Guttag, John Guttag",An introductory text that teaches students the...,ISBN_13ISBN_10,False,True,315,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=pVP8AQAAQBA...,2013,MIT Press
1,HFeGvgEACAAJ,python,The Quick Python Book,,"Vernon L. Ceder, Naomi R. Ceder","Introduces the programming language's syntax, ...",ISBN_10ISBN_13,False,False,336,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=HFeGvgEACAA...,2010,Manning Publications Company
2,nEJ-jcYF2fMC,python,Learning Python,,Mark Lutz,"Portable, powerful, and a breeze to use, Pytho...",ISBN_13ISBN_10,True,True,749,...,,FOR_SALE,True,3235.55,INR,3235.55,INR,https://play.google.com/store/books/details?id...,2007,"""O'Reilly Media, Inc."""
3,4pgQfXQvekcC,python,Learning Python,Powerful Object-Oriented Programming,Mark Lutz,"Get a comprehensive, in-depth introduction to ...",ISBN_13ISBN_10,True,True,1740,...,,FOR_SALE,True,3540,INR,3540,INR,https://play.google.com/store/books/details?id...,2013,"""O'Reilly Media, Inc."""
4,bIZHCgAAQBAJ,python,Fluent Python,"Clear, Concise, and Effective Programming",Luciano Ramalho,Python’s simplicity lets you become productive...,ISBN_13ISBN_10,True,True,755,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=bIZHCgAAQBA...,2015,"""O'Reilly Media, Inc."""
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1377,Fms3AQAAMAAJ,sql,De Ingenieur,,,No description available.,OTHER,False,False,1018,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=Fms3AQAAMAA...,1917,Not available
1378,-1gfAQAAMAAJ,sql,Marine Engineering,,,No description available.,OTHER,False,False,1146,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=-1gfAQAAMAA...,1921,Not available
1379,WStqa-OykF0C,sql,Muret-Sanders Enzyklopädisches englisch-deutsc...,,Eduard Muret,No description available.,OTHER,False,False,1172,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=WStqa-OykF0...,1910,Not available
1380,E1EEAAAAMBAJ,sql,InfoWorld,,,InfoWorld is targeted to Senior IT professiona...,,False,True,156,...,,NOT_FOR_SALE,False,0.0,,0.0,,http://books.google.co.in/books?id=E1EEAAAAMBA...,1998,Not available


In [30]:
df.isnull().sum()

book_id                     0
search_key                  0
book_title                  0
book_subtitle               0
book_authors                0
book_description            0
industryIdentifiers         0
text_readingModes           0
image_readingModes          0
pageCount                   0
categories                  0
language                    0
imageLinks                  0
ratingsCount                0
averageRating               0
country                     0
saleability                 0
isEbook                     0
amount_listPrice            0
currencyCode_listPrice      0
amount_retailPrice          0
currencyCode_retailPrice    0
buyLink                     0
year                        0
publisher                   0
dtype: int64

In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1382 entries, 0 to 1381
Data columns (total 25 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   book_id                   1382 non-null   object
 1   search_key                1382 non-null   object
 2   book_title                1382 non-null   object
 3   book_subtitle             1382 non-null   object
 4   book_authors              1382 non-null   object
 5   book_description          1382 non-null   object
 6   industryIdentifiers       1382 non-null   object
 7   text_readingModes         1382 non-null   bool  
 8   image_readingModes        1382 non-null   bool  
 9   pageCount                 1382 non-null   object
 10  categories                1382 non-null   object
 11  language                  1382 non-null   object
 12  imageLinks                1382 non-null   object
 13  ratingsCount              1382 non-null   object
 14  averageRating           

In [32]:
df['ratingsCount'] = pd.to_numeric(df['ratingsCount'], errors='coerce').fillna(0).astype(int)
df['pageCount'] = pd.to_numeric(df['pageCount'], errors='coerce').fillna(0).astype(int)
df['averageRating'] = pd.to_numeric(df['averageRating'], errors='coerce').fillna(0).astype(float)
df['amount_listPrice'] = pd.to_numeric(df['amount_listPrice'], errors='coerce').fillna(0).astype(float)
df['amount_retailPrice'] = pd.to_numeric(df['amount_retailPrice'], errors='coerce').fillna(0).astype(float)
df['book_id'] = df['book_id'].astype(str)
df['search_key'] = df['search_key'].astype(str)
df['book_title'] = df['book_title'].astype(str)
df['language'] = df['language'].astype(str)
df['saleability'] = df['saleability'].astype(str)
df['currencyCode_listPrice'] = df['currencyCode_listPrice'].astype(str)
df['currencyCode_retailPrice'] = df['currencyCode_retailPrice'].astype(str)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1382 entries, 0 to 1381
Data columns (total 25 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   book_id                   1382 non-null   object 
 1   search_key                1382 non-null   object 
 2   book_title                1382 non-null   object 
 3   book_subtitle             1382 non-null   object 
 4   book_authors              1382 non-null   object 
 5   book_description          1382 non-null   object 
 6   industryIdentifiers       1382 non-null   object 
 7   text_readingModes         1382 non-null   bool   
 8   image_readingModes        1382 non-null   bool   
 9   pageCount                 1382 non-null   int32  
 10  categories                1382 non-null   object 
 11  language                  1382 non-null   object 
 12  imageLinks                1382 non-null   object 
 13  ratingsCount              1382 non-null   int32  
 14  averageR

In [33]:
df.to_csv('Books_details_1.csv', index=False)

In [22]:
dup_df = df[df.duplicated()]
dup_df

Unnamed: 0,book_id,search_key,book_title,book_subtitle,book_authors,book_description,industryIdentifiers,text_reading_modes,image_reading_modes,page_count,...,country,saleability,isEbook,list_price,currencycode_listprice,retail_price,currencycode_retailprice,buy_link,year,publisher
120,On9dEAAAQBAJ,python,Python Concurrency with Asyncio,,Matthew Fowler,Learn how to speed up slow Python code with co...,ISBN_13ISBN_10,False,False,374,...,,NOT_FOR_SALE,False,0,,0,,http://books.google.co.in/books?id=On9dEAAAQBA...,2022,Simon and Schuster
121,dNdaEAAAQBAJ,python,A Journey to Core Python,"Experience the Applications of Tuples, Diction...","Girish Kumar , Dr. Ajay Shriram Kushwaha, Raji...",Come and join hands together to learn Python f...,ISBN_13ISBN_10,True,True,308,...,,FOR_SALE,True,566.4,INR,283.2,INR,https://play.google.com/store/books/details?id...,286,BPB Publications
122,dJ6FEAAAQBAJ,python,Time Series Forecasting in Python,,Marco Peixeiro,Build predictive models from time-based patter...,ISBN_13ISBN_10,True,False,454,...,,FOR_SALE,True,2960.62,INR,2960.62,INR,https://play.google.com/store/books/details?id...,2022,Simon and Schuster
123,yYKGDwAAQBAJ,python,Python Network Programming,Conquer all your networking challenges with th...,"Abhishek Ratan, Eric Chou, Pradeeban Kathirave...",Power up your network applications with Python...,ISBN_13ISBN_10,True,True,763,...,,FOR_SALE,True,1108.01,INR,554,INR,https://play.google.com/store/books/details?id...,2019,Packt Publishing Ltd
124,VQDgDAAAQBAJ,python,Mastering Python,,Rick van Hattem,Master the art of writing beautiful and powerf...,ISBN_13ISBN_10,True,True,486,...,,FOR_SALE,True,3163.57,INR,3163.57,INR,https://play.google.com/store/books/details?id...,2016,Packt Publishing Ltd
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335,JrYzAQAAMAAJ,sql,Materialy po i︠a︡feticheskomu i︠a︡zykoznanii︠u︡,,,No description available.,OTHER,False,True,876,...,,FREE,True,0,,0,,https://play.google.com/store/books/details?id...,1910,
1336,71BzZGHj-XMC,sql,Königliches Hof- und Nationaltheater,1879,,No description available.,OTHER,False,True,784,...,,FREE,True,0,,0,,https://play.google.com/store/books/details?id...,1879,
1337,RhRQAAAAYAAJ,sql,Abhandlungen und Berichte des Königl. Zoologis...,,,No description available.,OTHER,False,True,574,...,,FREE,True,0,,0,,https://play.google.com/store/books/details?id...,1903,
1338,ieksAAAAYAAJ,sql,Le réveil de la musique,"ou, étude rendue d'une facilité extrême et ses...",Louis A. Ivon,No description available.,OTHER,False,True,432,...,,FREE,True,0,,0,,https://play.google.com/store/books/details?id...,1877,


In [26]:
import pymysql
from sqlalchemy import create_engine

# MySQL connection
engine = create_engine('mysql+pymysql://root:mysql@localhost/Book_details')

df.to_sql('Book_details', engine, if_exists='replace', index=False)

  df.to_sql('Book_details', engine, if_exists='replace', index=False)


1400

In [25]:
df['saleability'].unique()

array(['FOR_SALE', 'NOT_FOR_SALE', 'FREE'], dtype=object)

In [38]:
import mysql.connector

mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  password="mysql",
  database="book_details"
)

mycursor = mydb.cursor()

mycursor.execute("""CREATE TABLE IF NOT EXISTS books_details1 (
book_id VARCHAR(255) PRIMARY KEY,
search_key VARCHAR(255),
book_title VARCHAR(255), 
book_subtitle TEXT,
book_authors TEXT,
book_description TEXT,
industryIdentifiers TEXT,
text_readingModes BOOLEAN,
image_readingModes BOOLEAN, 
pageCount INT,
categories TEXT, 
language VARCHAR(50), 
imageLinks TEXT,
ratingsCount INT,
averageRating dec(3,2),
country VARCHAR(50),
saleability VARCHAR(50),
isEbook BOOLEAN, 
amount_listPrice dec(10,2),
currencyCode_listPrice VARCHAR(50),
amount_retailPrice dec(10,2),
currencycode_retailprice VARCHAR(50),
buyLink TEXT,
year TEXT,
publisher TEXT
);""")

In [35]:
# Import libraries
import pandas as pd
from sqlalchemy import create_engine
import mysql.connector

# MySQL connection details
user = 'root'
password = 'mysql'
host = 'localhost'
database = 'book_details'

# Create SQLAlchemy engine
connection_string = f'mysql+mysqlconnector://{user}:{password}@{host}/{database}'
engine = create_engine(connection_string)

# Sample DataFrame
print(df.head())

# Insert DataFrame into MySQL table
df.to_sql(name='books_details', con=engine, if_exists='replace', index=False)

# Verify the insertion
conn = mysql.connector.connect(user=user, password=password, host=host, database=database)
cursor = conn.cursor()
cursor.execute("SELECT * FROM books_details")
result = cursor.fetchall()
for row in result:
    print(row)

# Close connection
cursor.close()
conn.close()


        book_id search_key                                         book_title  \
0  pVP8AQAAQBAJ     python  Introduction to Computation and Programming Us...   
1  HFeGvgEACAAJ     python                              The Quick Python Book   
2  nEJ-jcYF2fMC     python                                    Learning Python   
3  4pgQfXQvekcC     python                                    Learning Python   
4  bIZHCgAAQBAJ     python                                      Fluent Python   

                               book_subtitle                     book_authors  \
0                                        N/A      John V. Guttag, John Guttag   
1                                        N/A  Vernon L. Ceder, Naomi R. Ceder   
2                                        N/A                        Mark Lutz   
3       Powerful Object-Oriented Programming                        Mark Lutz   
4  Clear, Concise, and Effective Programming                  Luciano Ramalho   

                          