In [1]:
import requests
from bs4 import BeautifulSoup
import csv


In [2]:
# Function to scrape a page for product info
def scrape_page(url):
    # Send a GET request to the page
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve page: {url}")
        return []

    # Parse the page content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all product elements on the page
    product_items = soup.find_all('article', class_='product_pod')
    
    product_list = []
    
    # Extract relevant data from each product
    for item in product_items:
        title = item.h3.a['title']  # Extract the title from the <a> tag in <h3>
        price = item.find('p', class_='price_color').text  # Extract the price
        product_link = item.h3.a['href']  # Extract the link to the product page

        # Build the full URL (if the link is relative)
        full_product_link = f"http://books.toscrape.com/catalogue/{product_link}"

        # Add to the product list
        product_list.append({
            'title': title,
            'price': price,
            'link': full_product_link
        })

    return product_list

In [3]:
# Function to scrape multiple pages
def scrape_website(base_url, num_pages):
    all_products = []

    for page_num in range(1, num_pages + 1):
        # Construct the URL for each page
        url = f"{base_url}/catalogue/page-{page_num}.html"
        print(f"Scraping page: {url}")
        
        # Scrape the current page
        products_on_page = scrape_page(url)
        
        if products_on_page:
            all_products.extend(products_on_page)
        else:
            print(f"No products found on page: {page_num}")
    
    return all_products

In [4]:
# Function to save data into a CSV file
def save_to_csv(products, filename):
    # Define the CSV column headers
    csv_columns = ['title', 'price', 'link']
    
    # Open the file in write mode
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=csv_columns)
        
        # Write the header
        writer.writeheader()
        
        # Write the product data
        for product in products:
            writer.writerow(product)
    
    print(f"Data saved to {filename}")

In [5]:
# Example usage
base_url = 'http://books.toscrape.com'

In [6]:
num_pages_to_scrape = 4  # Number of pages to scrape

In [7]:
# Scrape the e-commerce site
products = scrape_website(base_url, num_pages_to_scrape)

Scraping page: http://books.toscrape.com/catalogue/page-1.html
Scraping page: http://books.toscrape.com/catalogue/page-2.html
Scraping page: http://books.toscrape.com/catalogue/page-3.html
Scraping page: http://books.toscrape.com/catalogue/page-4.html


In [8]:
# Save the scraped data to a CSV file
csv_filename = 'scraped_products.csv'
save_to_csv(products, csv_filename)

Data saved to scraped_products.csv
