In [13]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Base URL of the website to scrape

In [34]:
base_url = 'http://books.toscrape.com/catalogue/page-{}.html'


## Lists to store scraped data



In [35]:
titles = []
prices = []
availabilities = []

## Loop through the first few pages of the website (modify range as needed)

In [36]:
for page in range(1, 51):  # Scrape the first 2 pages, change range(1, n) to scrape more pages
    # Generate the URL for each page
    url = base_url.format(page)
    
    # Send a GET request to the page
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the page content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all the books on the page
        books = soup.find_all('article', class_='product_pod')
        
        # Loop through each book and extract data
        for book in books:
            # Extract title
            title = book.h3.a['title']
            titles.append(title)
            
            # Extract price
            price = book.find('p', class_='price_color').text
            prices.append(price)
            
            # Extract availability
            availability = book.find('p', class_='instock availability').text.strip()
            availabilities.append(availability)
    else:
        print(f"Failed to retrieve page {page}. Status code: {response.status_code}")

## Create a DataFrame to store the scraped data

In [37]:
books_df = pd.DataFrame({
    'Title': titles,
    'Price': prices,
    'Availability': availabilities
})

## Save the DataFrame to a CSV file

In [39]:
books_df

Unnamed: 0,Title,Price,Availability
0,A Light in the Attic,Â£51.77,In stock
1,Tipping the Velvet,Â£53.74,In stock
2,Soumission,Â£50.10,In stock
3,Sharp Objects,Â£47.82,In stock
4,Sapiens: A Brief History of Humankind,Â£54.23,In stock
...,...,...,...
995,Alice in Wonderland (Alice's Adventures in Won...,Â£55.53,In stock
996,"Ajin: Demi-Human, Volume 1 (Ajin: Demi-Human #1)",Â£57.06,In stock
997,A Spy's Devotion (The Regency Spies of London #1),Â£16.97,In stock
998,1st to Die (Women's Murder Club #1),Â£53.98,In stock


In [40]:
books_df.to_csv('books.csv', index=False)

print("Scraping completed. Data saved to books.csv")


Scraping completed. Data saved to books.csv


# converting to excel

In [11]:
books_df.to_excel('books.xlsx',sheet_name="Books")
print("Scraping completed. Data saved to books.excel")

Scraping completed. Data saved to books.excel
