In [27]:
import requests
from bs4 import BeautifulSoup
import logging
import pandas as pd

In [None]:
logging.basicConfig(level=logging.INFO)

headers = {"User-Agent": "Mozilla/5.0"}

def get_page(url):
    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code == 200:
            return BeautifulSoup(response.text, 'lxml')
        else:
            logging.error(f"Couldn't get the page. Status code: {response.status_code}")
            return None
    except Exception as e:
        logging.error(f"Error while getting the page: {e}")
        return None

In [36]:
def scrape_book_depository(query):
    url = f"https://www.bookdepository.com/search?searchTerm={query.replace(' ', '+')}"
    soup = get_page(url)
    if soup is None: return []

    books = soup.find_all('div', class_='book-item')
    results = []

    for book in books:
        title = book.find('h3', class_='title')
        price = book.find('p', class_='price')
        availability = book.find('p', class_='availability')

        results.append({
            "title": title.text.strip() if title else None,
            "price": price.text.strip() if price else None,
            "availability": availability.text.strip() if availability else None,
            "source": "Book Depository"
        })

    return results

In [37]:
def scrape_abebooks(query):
    url = f"https://www.abebooks.com/servlet/SearchResults?kn={query.replace(' ', '+')}"
    soup = get_page(url)
    
    if not soup:
        return []

    books = soup.find_all('div', class_='cf result')
    results = []

    for book in books:
        title = book.find('meta', itemprop='name')
        price = book.find('p', class_='item-price')

        results.append({
            "title": title['content'] if title else None,
            "price": price.text.strip() if price else None,
            "source": "AbeBooks"
        })

    return results


In [None]:
def scrape_books_to_scrape():
    url = "https://books.toscrape.com/"
    soup = get_page(url)
    
    if not soup:
        return []

    books = soup.find_all('article', class_='product_pod')
    results = []

    for book in books:
        title = book.h3.a['title']
        price = book.find('p', class_='price_color').text.strip()
        availability = book.find('p', class_='instock availability').text.strip()

        results.append({
            "title": title,
            "price": price,
            "availability": availability,
            "source": "Books to Scrape"
        })

    return results

In [None]:
def save_books_to_csv(book_lists, filename="books_data.csv"):
    all_books = []
    for site_books in book_lists:
        all_books.extend(site_books)

    df = pd.DataFrame(all_books)
    df.to_csv(filename, index=False)
    print(f" Data saved to {filename}")

if __name__ == "__main__":
    query = "python programming"
    books1 = scrape_book_depository(query)
    books2 = scrape_abebooks(query)
    books3 = scrape_books_to_scrape()

    save_books_to_csv([books1, books2, books3])


✅ Data saved to books_data.csv


In [43]:
df = pd.read_csv("books_data.csv", encoding='utf-8-sig')
print(df.head())


                                   title    price availability  \
0                   A Light in the Attic  Â£51.77     In stock   
1                     Tipping the Velvet  Â£53.74     In stock   
2                             Soumission  Â£50.10     In stock   
3                          Sharp Objects  Â£47.82     In stock   
4  Sapiens: A Brief History of Humankind  Â£54.23     In stock   

            source  
0  Books to Scrape  
1  Books to Scrape  
2  Books to Scrape  
3  Books to Scrape  
4  Books to Scrape  


In [44]:
print(df.isnull().sum)

<bound method DataFrame.sum of     title  price  availability  source
0   False  False         False   False
1   False  False         False   False
2   False  False         False   False
3   False  False         False   False
4   False  False         False   False
5   False  False         False   False
6   False  False         False   False
7   False  False         False   False
8   False  False         False   False
9   False  False         False   False
10  False  False         False   False
11  False  False         False   False
12  False  False         False   False
13  False  False         False   False
14  False  False         False   False
15  False  False         False   False
16  False  False         False   False
17  False  False         False   False
18  False  False         False   False
19  False  False         False   False>
