In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import csv

def scrape_amazon_books(query, num_pages=1):
    # Base URL for Amazon India
    base_url = "https://www.amazon.in/s"
    
    # Parameters for the search query
    params = {
        "k": query,
        "i": "stripbooks"
    }
    
    # Lists to store the book data
    titles = []
    authors = []
    prices = []
    ratings = []
    urls = []
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    
    # Loop through the specified number of pages
    for page in range(1, num_pages + 1):
        params["page"] = page
        response = requests.get(base_url, params=params, headers=headers)
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Find all the book containers
        books = soup.find_all("div", {"data-component-type": "s-search-result"})
        
        for book in books:
            # Get the title
            title = book.h2.text.strip()
            titles.append(title)
            
            # Get the URL
            url = "https://www.amazon.in" + book.h2.a['href']
            urls.append(url)
            
            # Get the author
            try:
                author = book.find("span", class_="a-size-base-plus").text.strip()
            except AttributeError:
                author = "Not available"
            authors.append(author)
            
            # Get the price
            try:
                price = book.find("span", class_="a-price-whole").text.strip()
                price_fraction = book.find("span", class_="a-price-fraction").text.strip()
                price = f"{price}.{price_fraction}"
            except AttributeError:
                price = "Not available"
            prices.append(price)
            
            # Get the rating
            try:
                rating = book.find("span", class_="a-icon-alt").text.strip()
            except AttributeError:
                rating = "Not available"
            ratings.append(rating)
    
    # Create a DataFrame
    data = {
        "Title": titles,
        "Author": authors,
        "Price": prices,
        "Rating": ratings,
        "URL": urls
    }
    df = pd.DataFrame(data)
    
    # Save to CSV
    df.to_csv("amazon_books.csv", index=False)
    print("Books information successfully scraped and saved to amazon_books.csv!")

# Example usage
scrape_amazon_books("data science", num_pages=2)


Books information successfully scraped and saved to amazon_books.csv!
