In [65]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Define a base URL pattern
base_url = 'https://books.toscrape.com/catalogue/page-{}'

# Get user input for the number of pages to scrape
print("Enter the number of pages you want to scrape: ")
num_pages = int(input())  # Convert input to an integer

# Create empty lists to store data
titles = []
prices = []
star_ratings = []

# Iterate over pages
for page_number in range(1, num_pages + 1):
    # Construct the URL for the current page
    current_url = base_url.format(page_number) + '.html'  # Append '.html' to the URL

    # Send a GET request to the URL
    response = requests.get(current_url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract book details including title, price, and star rating
        for book in soup.find_all('article', class_='product_pod'):
            title = book.h3.a['title']
            price = book.select_one('p.price_color').text
            star_rating = book.select_one('p.star-rating')['class'][1]

            # Append data to lists
            titles.append(title)
            prices.append(price)
            star_ratings.append(star_rating)
    else:
        print(f"Failed to fetch data from {current_url}")

# Create a DataFrame from the lists
data = {'Title': titles, 'Price': prices, 'Star Rating': star_ratings}
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

# Prompt user to confirm downloading as a CSV file
download_confirmation = input("\n\n\n\n\n Do you want to download the DataFrame as a CSV file? (Enter 'CONFIRM' or 'confirm' to proceed): ")

if download_confirmation.lower() == 'confirm':
    # Save DataFrame to CSV file
    df.to_csv('book_data.csv', index=False, chunksize=3000)
    print("CSV file saved successfully.")
else:
    print("CSV file download canceled.")


Enter the number of pages you want to scrape: 


 20


                                                 Title    Price Star Rating
0                                 A Light in the Attic  Â£51.77       Three
1                                   Tipping the Velvet  Â£53.74         One
2                                           Soumission  Â£50.10         One
3                                        Sharp Objects  Â£47.82        Four
4                Sapiens: A Brief History of Humankind  Â£54.23        Five
..                                                 ...      ...         ...
395        Take Me Home Tonight (Rock Star Romance #3)  Â£53.98       Three
396                  Sleeping Giants (Themis Files #1)  Â£48.74         One
397  Setting the World on Fire: The Brief, Astonish...  Â£21.15         Two
398                                  Playing with Fire  Â£13.71       Three
399              Off the Hook (Fishing for Trouble #1)  Â£47.67       Three

[400 rows x 3 columns]







 Do you want to download the DataFrame as a CSV file? (Enter 'CONFIRM' or 'confirm' to proceed):  confirm


CSV file saved successfully.
