## Importing Libraries

In [2]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

#### Web Page Scraping Setup

In [3]:
all_page_url = ['https://books.toscrape.com/catalogue/page-1.html']
current_page = "https://books.toscrape.com/catalogue/page-1.html"
base_url_page = "https://books.toscrape.com/catalogue/"
res_page = requests.get(current_page)

# Web Page Scraping Loop
# Iterates through the pages, scraping data and appending new page URLs until there are no more "next" buttons.

while res_page.status_code == 200:
    data_page = BeautifulSoup(res_page.text, "html.parser")
    if data_page.find(class_ = "next") is None:
        break
    page_url = base_url_page + data_page.find(class_ = "next").a["href"]
    current_page = page_url
    res_page = requests.get(current_page)
#     print(page_url)
    all_page_url.append(page_url)

In [4]:
# let's Select Subset of Page URLs to check

page_urls = [all_page_url[i] for i in range(0,5)]
print(page_urls)


['https://books.toscrape.com/catalogue/page-1.html', 'https://books.toscrape.com/catalogue/page-2.html', 'https://books.toscrape.com/catalogue/page-3.html', 'https://books.toscrape.com/catalogue/page-4.html', 'https://books.toscrape.com/catalogue/page-5.html']


#### Scraping Book URLs from Pages

In [5]:
# Iterates through a subset of page URLs, scrapes book URLs, and stores them in the book_links list.

book_links = []
for page_url in page_urls:
    
# current_page = "https://books.toscrape.com/catalogue/page-1.html"
    res_book = requests.get(page_url)
    data_book = BeautifulSoup(res_book.text, "html.parser")
    book_half_urls = data_book.find_all(class_ = "product_pod")

    for book_url in book_half_urls:
        book_link = base_url_page + book_url.h3.a["href"]
        book_links.append(book_link)

#### Scraping Book Details from Detail Pages

In [6]:
# Iterates through the list of book URLs, scrapes details like title, price and stock from each book's detail page, and stores them in the book_detail list.

book_detail = []
for book_details_link in book_links:

# book_details_link = 'https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html'
    book_details = requests.get(book_details_link)
    data_book_details = BeautifulSoup(book_details.text, "html.parser")
    title = data_book_details.find("h1").string
    link = book_details_link  
    price = data_book_details.find("p", class_ = "price_color").string
    price = float(re.search("\d.+", price).group())
    stock = data_book_details.find("p", class_ = "instock availability").text.strip()
    stock = int(re.search("\d+", stock).group())
    book_detail.append([title, link, price, stock])

In [7]:
# Creating DataFrame from Book Details

df = pd.DataFrame(book_detail, columns=["Title", "Link", "Price", "Stock Available"])

In [8]:
# let's see the info of dataframe

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Title            100 non-null    object 
 1   Link             100 non-null    object 
 2   Price            100 non-null    float64
 3   Stock Available  100 non-null    int64  
dtypes: float64(1), int64(1), object(2)
memory usage: 3.2+ KB


In [10]:
# Displaying the First Rows of the DataFrame

df.head()

Unnamed: 0,Title,Link,Price,Stock Available
0,A Light in the Attic,https://books.toscrape.com/catalogue/a-light-i...,51.77,22
1,Tipping the Velvet,https://books.toscrape.com/catalogue/tipping-t...,53.74,20
2,Soumission,https://books.toscrape.com/catalogue/soumissio...,50.1,20
3,Sharp Objects,https://books.toscrape.com/catalogue/sharp-obj...,47.82,20
4,Sapiens: A Brief History of Humankind,https://books.toscrape.com/catalogue/sapiens-a...,54.23,20


In [None]:
# Saving DataFrame to CSV File

df.to_csv("Books_details_project_5.csv", index=False)

**Conclusion:**
This web scraping project utilized Python libraries such as requests, BeautifulSoup, and pandas to extract book details from the "Books to Scrape" website. The process involved fetching page URLs, navigating pagination, collecting book URLs, scraping information like title, link, price, and stock availability, and organizing the data into a structured DataFrame. The final step included saving the DataFrame as a CSV file. This project highlights the significance of web scraping for data acquisition and manipulation, showcasing its practical applications in data analysis and providing hands-on experience in these essential skills.