In [3]:
import requests
from bs4 import BeautifulSoup
import csv

# Function to fetch the HTML content of a given URL
def fetch_html(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to retrieve content. Status code: {response.status_code}")
        return None

# Function to parse the HTML content and extract book data
def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    books = []

    # Find all book items
    book_items = soup.find_all('article', class_='product_pod')

    for book in book_items:
        # Extract the book title
        title = book.h3.a['title']

        # Extract the book price
        price = book.find('p', class_='price_color').get_text()

        # Extract stock availability
        availability = book.find('p', class_='instock availability').get_text(strip=True)

        # Extract rating
        rating = book.p['class'][1]  # 'star-rating Three', 'star-rating Five', etc.

        # Append the extracted data to the books list
        books.append({
            'title': title,
            'price': price,
            'availability': availability,
            'rating': rating
        })

    return books

# Function to save the extracted data to a CSV file
def save_to_csv(data, filename):
    keys = data[0].keys()
    with open(filename, 'w', newline='', encoding='utf-8') as output_file:
        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)

# Function to display the contents of the CSV file
def display_csv(filename):
    with open(filename, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            print(f"Title: {row['title']}")
            print(f"Price: {row['price']}")
            print(f"Availability: {row['availability']}")
            print(f"Rating: {row['rating']}")
            print('-' * 40)

# Main function to control the flow of the script
def main():
    base_url = 'http://books.toscrape.com/catalogue/page-{}.html'
    all_books = []

    # Loop through the first 5 pages
    for page_num in range(1, 6):
        url = base_url.format(page_num)
        html = fetch_html(url)
        if html:
            books = parse_html(html)
            all_books.extend(books)

    # Save the extracted data to a CSV file
    save_to_csv(all_books, 'books.csv')
    print("Data has been successfully saved to books.csv")

    # Display the contents of the CSV file
    display_csv('books.csv')

# Entry point of the script
if __name__ == "__main__":
    main()



Data has been successfully saved to books.csv
Title: A Light in the Attic
Price: Â£51.77
Availability: In stock
Rating: Three
----------------------------------------
Title: Tipping the Velvet
Price: Â£53.74
Availability: In stock
Rating: One
----------------------------------------
Title: Soumission
Price: Â£50.10
Availability: In stock
Rating: One
----------------------------------------
Title: Sharp Objects
Price: Â£47.82
Availability: In stock
Rating: Four
----------------------------------------
Title: Sapiens: A Brief History of Humankind
Price: Â£54.23
Availability: In stock
Rating: Five
----------------------------------------
Title: The Requiem Red
Price: Â£22.65
Availability: In stock
Rating: One
----------------------------------------
Title: The Dirty Little Secrets of Getting Your Dream Job
Price: Â£33.34
Availability: In stock
Rating: Four
----------------------------------------
Title: The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhul