In [33]:
import requests as rq
from bs4 import BeautifulSoup as bs
import pandas as pd

In [34]:
def get_pageno(pageno):
    pageno= str(pageno)
    # Construct the URL
    books_pageno_url = 'https://www.audible.in/search?node=21881795031&pageSize=50&sort=&page=' + pageno
    
    # Get the HTML page content using requests
    response = rq.get(books_pageno_url)
    
    # Ensure that the reponse is valid
    if response.status_code != 200:
        print('Status code:', response.status_code)
        raise Exception('Failed to fetch web page ' + books_pageno_url)
    
    # Construct a beautiful soup document
    doc = bs(response.text)
    
    return doc

In [35]:
pageno_1= get_pageno(1)

In [36]:
book_contents= pageno_1.find_all('li', class_='bc-list-item productListItem')

#### Extracting Name of Books

In [37]:
def get_book_names(book_contents):
    book_names= []
    for tag in book_contents:
            a_tag_name= tag.h3.find_all('a', recursive=False)
            book_name= a_tag_name[0].text.strip()
            book_names.append(book_name)
    return book_names

#### Extracting Audio Book Length

In [38]:
def get_book_length(book_contents):
    book_length=[]
    for tag in book_contents:
        try:
            len_tag= tag.find('li', class_='bc-list-item runtimeLabel')
            length_tag = len_tag.find('span')
            length = length_tag.text.strip()
            book_length.append(length)
        except AttributeError:
            book_length.append(None)
    return book_length

#### Extracting Name of Author

In [39]:
def get_written_by(book_contents):
    written_by=[]
    for tag in book_contents:
        author_tag= tag.find('li', class_='bc-list-item authorLabel')
        try:
            auth_tag = author_tag.find('a')
            author = auth_tag.text.strip()
            written_by.append(author)
        except AttributeError:
            written_by.append(None)
    return written_by

#### Extracting Release Date

In [40]:
def get_release_date(book_contents):
    release_date=[]
    for tag in book_contents:
        released_tag= tag.find('li', class_='bc-list-item releaseDateLabel')
        try:
            release_tag = released_tag.find('span').text.strip()
            release_date.append(release_tag)
        except AttributeError:
            release_date.append(None)
    return release_date

#### Extracting Language of Books

In [41]:
def get_language(book_contents):
    language=[]
    for tag in book_contents:
        lang_tag= tag.find('li', class_='bc-list-item languageLabel')
        try:
            language_tag = lang_tag.find('span').text.split()
            language.append(language_tag)
        except AttributeError:
            language.append(None)
    return language

#### Extracting Score (Rating) of the Book

In [42]:
def get_rating(book_contents):
    rating=[]
    for tag in book_contents:
        star_tag= tag.find('li', class_='bc-list-item ratingsLabel')
        try:
            rating_tag = star_tag.find('span', class_='bc-text bc-pub-offscreen').text.strip()
            rating.append(rating_tag)
        except AttributeError:
            rating.append(None)
    return rating

#### Extracting Number of Rating/Reviews

In [43]:
def get_no_of_ratings(book_contents):
    no_of_ratings=[]
    for tag in book_contents:
        star_tag= tag.find('li', class_='bc-list-item ratingsLabel')
        try:
            rating_tag = star_tag.find('span', class_='bc-text bc-size-small bc-color-secondary').text.strip()
            no_of_ratings.append(rating_tag)
        except AttributeError:
            no_of_ratings.append(None)
    return no_of_ratings

#### Extracting Cost of Audio Books

In [44]:
def get_regular_price(book_contents):
    regular_price=[]
    for tag in book_contents:
        buy_tag= tag.find('p', class_='bc-text buybox-regular-price bc-spacing-none bc-spacing-top-none')
        try:
            price_tag = buy_tag.find_all('span', class_='bc-text bc-size-base bc-color-base')
            price= price_tag[1].text.strip()
            regular_price.append(price)
        except AttributeError:
            regular_price.append(None)
    return regular_price

#### Extracting Narrator of Audio Books

In [45]:
def get_narrated_by(book_contents):
    narrated_by=[]
    for tag in book_contents:
        narrator_tag= tag.find('li', class_='bc-list-item narratorLabel')
        try:
            narrate_tag = narrator_tag.find('a')
            narrator = narrate_tag.text.strip()
            narrated_by.append(narrator)
        except AttributeError:
            narrated_by.append(None)
    return narrated_by

#### Combining all columns into a Dataframe

In [46]:
def parse_all_pages(end_page):
    all_page_contents = {
            'Book_Name':[],
            'Author':[],
            'Release_Date':[],
            'Narrator':[],
            'Rating':[],
            'No_of_Ratings':[],
            'Regular_Price':[],
            'Language':[],
            'Book_Audio_Length':[],
            'Release_Date':[],
            'Narrator':[]
            }    
    for page in range (0,end_page):
        pageno_x = get_pageno(page)
        book_contents = pageno_x.find_all('li', class_='bc-list-item productListItem')
        all_page_contents['Book_Name'] += get_book_names(book_contents)
        all_page_contents['Author'] += get_written_by(book_contents)
        all_page_contents['Release_Date'] += get_release_date(book_contents)
        all_page_contents['Narrator'] += get_narrated_by(book_contents)
        all_page_contents['Rating'] += get_rating(book_contents)
        all_page_contents['No_of_Ratings'] += get_no_of_ratings(book_contents)
        all_page_contents['Regular_Price'] += get_regular_price(book_contents)
        all_page_contents['Language'] += get_language(book_contents)
        all_page_contents['Book_Audio_Length'] += get_book_length(book_contents)
        page = page + 1
    return all_page_contents

In [47]:
all_pages_scraped= pd.DataFrame(parse_all_pages(24))
all_pages_scraped

Unnamed: 0,Book_Name,Author,Release_Date,Narrator,Rating,No_of_Ratings,Regular_Price,Language,Book_Audio_Length
0,The Immortals of Meluha,Amish Tripathi,Release Date:\n ...,Manish Dongardive,5 out of 5 stars,13 ratings,₹836.00,"[Language:, English]",Length: 11 hrs and 39 mins
1,The Seven Moons of Maali Almeida,Shehan Karunatilaka,Release Date:\n ...,Shivantha Wijesinha,4.5 out of 5 stars,2 ratings,₹664.00,"[Language:, English]",Length: 15 hrs and 8 mins
2,Scion of Ikshvaku,Amish,Release Date:\n ...,Sagar Arya,4.5 out of 5 stars,"1,423 ratings",₹234.00,"[Language:, English]",Length: 11 hrs and 35 mins
3,Novelist as a Vocation,Haruki Murakami,Release Date:\n ...,Kotaro Watanabe,,Not rated yet,₹519.00,"[Language:, English]",Length: 7 hrs and 35 mins
4,Meluha Ke Mritunjay [The Immortals of Meluha],Amish Tripathi,Release Date:\n ...,Swetanshu Bora,5 out of 5 stars,1 rating,₹836.00,"[Language:, Hindi]",Length: 14 hrs and 45 mins
...,...,...,...,...,...,...,...,...,...
1194,"Hitched, Vol. 2",Kendall Ryan,Release Date:\n ...,Alexander Cendese,3.5 out of 5 stars,12 ratings,₹585.00,"[Language:, English]",Length: 3 hrs and 28 mins
1195,Madame Bovary,Gustave Flaubert,Release Date:\n ...,Ronald Pickup,4.5 out of 5 stars,3 ratings,₹873.00,"[Language:, English]",Length: 15 hrs and 22 mins
1196,"EMPEROR: The Field of Swords, Book 3 (Unabridged)",Conn Iggulden,Release Date:\n ...,Paul Blake,5 out of 5 stars,2 ratings,₹797.00,"[Language:, English]",Length: 17 hrs and 35 mins
1197,Persepolis Rising,James S. A. Corey,Release Date:\n ...,Jefferson Mays,5 out of 5 stars,14 ratings,₹932.00,"[Language:, English]",Length: 20 hrs and 34 mins


#### Exporting Scraped Data into a csv file

In [48]:
all_pages_scraped.to_csv('Literature_&_Fiction_Books_2022.csv',index=None)