In [1]:
#import libraries
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import matplotlib.pyplot as plt
import time
import csv
from datetime import datetime

In [2]:
url = "https://books.toscrape.com/"

In [3]:
header = ["Title", "Price", "Availability", "Rating"]
data = []

def connect_and_get_data(start, end):
  for i in range(start, end + 1):
        url = f"https://books.toscrape.com/catalogue/page-{i}.html"
        response = requests.get(url)

        if response.status_code == 200:
            soup = bs(response.content, "html.parser")
            books = soup.find_all("article", class_="product_pod")

            if not books:
                print(f"No books found on page {i}")
                continue

            for book in books:
                row_data = []
                title = book.h3.a["title"]
                price = book.find("p", class_="price_color").text.strip()
                availability = book.find("p", class_="instock availability").text.strip()
                rating = book.p["class"][1]

                row_data.extend([title, price, availability, rating])
                data.append(row_data)
        else:
            print(f"Error fetching page {i}")

  return header, data


In [4]:
#Getting the data from the web page
connect_and_get_data(1, 50)

(['Title', 'Price', 'Availability', 'Rating'],
 [['A Light in the Attic', '£51.77', 'In stock', 'Three'],
  ['Tipping the Velvet', '£53.74', 'In stock', 'One'],
  ['Soumission', '£50.10', 'In stock', 'One'],
  ['Sharp Objects', '£47.82', 'In stock', 'Four'],
  ['Sapiens: A Brief History of Humankind', '£54.23', 'In stock', 'Five'],
  ['The Requiem Red', '£22.65', 'In stock', 'One'],
  ['The Dirty Little Secrets of Getting Your Dream Job',
   '£33.34',
   'In stock',
   'Four'],
  ['The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull',
   '£17.93',
   'In stock',
   'Three'],
  ['The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics',
   '£22.60',
   'In stock',
   'Four'],
  ['The Black Maria', '£52.15', 'In stock', 'One'],
  ['Starving Hearts (Triangular Trade Trilogy, #1)',
   '£13.99',
   'In stock',
   'Two'],
  ["Shakespeare's Sonnets", '£20.66', 'In stock', 'Four'],
  ['Set Me Free', '£17.46', 'In stock'

In [5]:
#Create a DataFrame
df = pd.DataFrame(data, columns=header)

In [6]:
#Clean column names
#df.columns = [col.replace(' ', '_').strip() for col in df.columns]
#df.columns = [col.replace('%', 'Percentage').strip() for col in df.columns]

In [7]:
df.head()

Unnamed: 0,Title,Price,Availability,Rating
0,A Light in the Attic,£51.77,In stock,Three
1,Tipping the Velvet,£53.74,In stock,One
2,Soumission,£50.10,In stock,One
3,Sharp Objects,£47.82,In stock,Four
4,Sapiens: A Brief History of Humankind,£54.23,In stock,Five


In [8]:
df.shape

(1000, 4)

In [9]:
def save_to_csv(df, filename=None):
    """
    Save the DataFrame to a CSV file
    """
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    if filename is None:
        filename = f"WebScrap_{timestamp}.csv"

    try:
        filename = f"{filename}_{timestamp}.csv"
        df.to_csv(filename, index=False, encoding='utf-8')
        print(f"Data saved to {filename}")
        return True
    except Exception as e:
        print(f"Error saving to CSV: {e}")
        return False

def save_to_excel(df, filename=None):
    """
    Save the DataFrame to an Excel file
    """
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    if filename is None:
        filename = f"WebScrap_{timestamp}.xlsx"

    try:
        filename = f"{filename}_{timestamp}.xlsx"
        df.to_excel(filename, index=False)
        print(f"Data saved to {filename}")
        return True
    except Exception as e:
        print(f"Error saving to Excel: {e}")
        return False

In [10]:
#Exporting the data in csv and excel format
save_to_csv(df, "books_to_scrape")
save_to_excel(df, "books_to_scrape")

Data saved to books_to_scrape_20260209_173950.csv
Data saved to books_to_scrape_20260209_173950.xlsx


True