<a href="https://colab.research.google.com/github/AdrianRamos956/Test-Web-Scraper/blob/main/Web_Scraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup

# Function to scrape book data from a website
def scrape_books(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    books = soup.find_all('article', class_='product_pod')

    book_list = []
    for index, book in enumerate(books, start=1):
        title = book.h3.a['title']
        price = book.find('p', class_='price_color').text
        rating = book.p['class'][1]
        availability = 'In stock' if 'In stock' in book.find('p', class_='availability').text else 'Out of stock'
        detail_url = book.h3.a['href']  # Extract the detail URL for the book

        book_info = {
            'number': index,  # Assign a unique number to each book
            'title': title,
            'price': price,
            'rating': rating,
            'availability': availability,
            'detail_url': detail_url  # Add the detail URL to the book information
        }
        book_list.append(book_info)

    return book_list


# Function to scrape more details about a book from its detail page
def scrape_book_details(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    description = soup.find('article', class_='product_page').find('p', recursive=False).text.strip()
    upc = soup.find('table', class_='table-striped').find('td').text.strip()
    product_type = soup.find('table', class_='table-striped').find_all('td')[1].text.strip()
    price_excl_tax = soup.find('table', class_='table-striped').find_all('td')[2].text.strip()
    price_incl_tax = soup.find('table', class_='table-striped').find_all('td')[3].text.strip()
    tax = soup.find('table', class_='table-striped').find_all('td')[4].text.strip()
    availability_detail = soup.find('table', class_='table-striped').find_all('td')[5].text.strip()
    number_of_reviews = soup.find('table', class_='table-striped').find_all('td')[6].text.strip()

    book_details = {
        'description': description,
        'upc': upc,
        'product_type': product_type,
        'price_excl_tax': price_excl_tax,
        'price_incl_tax': price_incl_tax,
        'tax': tax,
        'availability_detail': availability_detail,
        'number_of_reviews': number_of_reviews
    }

    return book_details


# Function to display a list of books
def display_books(books):
    for book in books:
        print("Number:", book['number'])
        print("Title:", book['title'])
        print("Price:", book['price'])
        print("Rating:", book['rating'])
        print("Availability:", book['availability'])
        print()


# Function to display additional information about a specific book
def display_book_details(book):
    url = 'http://books.toscrape.com/catalogue' + book['detail_url']
    book_details = scrape_book_details(url)

    print("Title:", book['title'])
    print("Price:", book['price'])
    print("Rating:", book['rating'])
    print("Availability:", book['availability'])
    print("Description:", book_details['description'])
    print("UPC:", book_details['upc'])
    print("Product Type:", book_details['product_type'])
    print("Price (excl. tax):", book_details['price_excl_tax'])
    print("Price (incl. tax):", book_details['price_incl_tax'])
    print("Tax:", book_details['tax'])
    print("Availability Detail:", book_details['availability_detail'])
    print("Number of Reviews:", book_details['number_of_reviews'])
    print()


# Main program loop
while True:
    print("Menu:")
    print("1. Display all books")
    print("2. Sort books by price")
    print("3. Get a random book")
    print("4. Exit")
    choice = input("Enter your choice: ")

    if choice == '1':
        url = 'http://books.toscrape.com/catalogue/category/books/science_22/index.html'
        book_list = scrape_books(url)
        display_books(book_list)
        more_info_choice = input("Enter the book number for more information (or '0' to continue): ")
        if more_info_choice != '0':
            try:
                selected_book = book_list[int(more_info_choice) - 1]
                display_book_details(selected_book)
            except IndexError:
                print("Invalid book number. Please try again.")
        print()
    elif choice == '2':
        sorted_books = sorted(book_list, key=lambda x: float(x['price'].replace('£', '')))
        display_books(sorted_books)
    elif choice == '3':
        import random

        random_book = random.choice(book_list)
        print("Random Book:")
        print("Number:", random_book['number'])
        print("Title:", random_book['title'])
        print("Price:", random_book['price'])
        print("Rating:", random_book['rating'])
        print("Availability:", random_book['availability'])
        print()
    elif choice == '4':
        break
    else:
        print("Invalid choice. Please try again.")
        print()
