In [2]:
# Task no :5

import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd  # Import Pandas

class BookScraper:
    def __init__(self):
        self.base_url = 'https://books.toscrape.com/catalogue/category/books/'

    def scrape_book_details(self, url):
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        book_details = []

        for book in soup.find_all('article', class_='product_pod'):
            title = book.h3.a.get('title')
            price = book.find('p', class_='price_color').get_text()
            availability = book.find('p', class_='instock availability').get_text().strip()
            image_url = book.img.get('src')
            rating = book.find('p', class_='star-rating')['class'][1]
            product_page_url = book.h3.a.get('href')

            book_info = {
                'Title': title,
                'Price': price,
                'Availability': availability,
                'Image URL': image_url,
                'Rating': rating,
                'Product Page URL': f'https://books.toscrape.com/catalogue/{product_page_url}'
            }
            book_details.append(book_info)

        return book_details

    def write_to_csv(self, data, filename):
        keys = data[0].keys() if data else []
        with open(filename, 'w', newline='', encoding='utf-8') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=keys)
            writer.writeheader()
            writer.writerows(data)

    def convert_to_dataframe(self, filename):
        df = pd.read_csv(filename)  # Read CSV file using Pandas
        return df

    def scrape_books_for_category(self, category):
        url = f'{self.base_url}{category}/index.html'
        print(f"URL for '{category}' category: {url}")

        book_data = []
        page_number = 1
        total_columns = 0

        while total_columns < 100:
            page_url = f"{url}?page={page_number}"
            print(f"Scraping URL: {page_url}")  # Debugging line
            page_data = self.scrape_book_details(page_url)
            book_data.extend(page_data)
            total_columns += len(page_data)
            page_number += 1

        if total_columns > 100:
            excess = total_columns - 100
            book_data = book_data[:-excess]

        filename = f'{category}_books.csv'
        self.write_to_csv(book_data, filename)
        print(f"Books data for '{category}' category has been saved in '{filename}'.")

        dataframe = self.convert_to_dataframe(filename)
        print(f"Converted '{filename}' to Pandas DataFrame:")
        print(dataframe)

scraper = BookScraper()
categories = ['fiction_10', 'science-fiction_16', 'historical-fiction_4']
for category in categories:
    scraper.scrape_books_for_category(category)


URL for 'fiction_10' category: https://books.toscrape.com/catalogue/category/books/fiction_10/index.html
Scraping URL: https://books.toscrape.com/catalogue/category/books/fiction_10/index.html?page=1
Scraping URL: https://books.toscrape.com/catalogue/category/books/fiction_10/index.html?page=2
Scraping URL: https://books.toscrape.com/catalogue/category/books/fiction_10/index.html?page=3
Scraping URL: https://books.toscrape.com/catalogue/category/books/fiction_10/index.html?page=4
Scraping URL: https://books.toscrape.com/catalogue/category/books/fiction_10/index.html?page=5
Books data for 'fiction_10' category has been saved in 'fiction_10_books.csv'.
Converted 'fiction_10_books.csv' to Pandas DataFrame:
                                                Title   Price Availability  \
0                                          Soumission  £50.10     In stock   
1                         Private Paris (Private #10)  £47.61     In stock   
2                        We Love You, Charlie Freeman