# 1. Import Your Libraries

In [None]:
from bs4 import BeautifulSoup
import requests
import csv
import pandas as pd

# 2. Get URL and Send GET Request

In [None]:
url = "http://books.toscrape.com/"
response = requests.get(url)
if response.status_code == 200:
    print("request successful")
else:
    print("request failed")

# 3. Parse the HTML Content

In [None]:
# The following is a soup object to parse the html content of the website
soup = BeautifulSoup(response.text, "html.parser")

# 4. Extract Book Details for Page 1

In [None]:
# find all 20 books on page 1
books = soup.find_all("h3")

# iterate through the books and extract the information for each book

for book in books:
    book_url = book.find('a')['href']
    book_response = requests.get(url + book_url)
    book_soup = BeautifulSoup(book_response.content, 'html.parser')
    
    title = book_soup.find('h1').text
    category = book_soup.find('ul', class_='breadcrumb').find_all('a')[2].text.strip()
    rating = book_soup.find('p', class_='star-rating')['class'][1]
    price = book_soup.find('p', class_='price_color').text.strip()
    availability = book_soup.find('p', class_='instock availability').text.strip()
    
    print(f'Title: {title}')
    print(f'Category: {category}')
    print(f'Rating: {rating}')
    print(f'Price: {price}')
    print(f'Availability: {availability}')
    print('******')

# 5. Extract Details for All 50 Pages

In [None]:
# create a list to hold all the book information
book_data = []

# loop through all 50 pages
for page_num in range(1, 51):
    url = f'http://books.toscrape.com/catalogue/page-{page_num}.html'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    books = soup.find_all("h3")

    for book in books:
        book_url = book.find('a')['href']
        book_response = requests.get('http://books.toscrape.com/catalogue/' + book_url)
        book_soup = BeautifulSoup(book_response.content, 'html.parser')
    
        title = book_soup.find('h1').text
        category = book_soup.find('ul', class_='breadcrumb').find_all('a')[2].text.strip()
        rating = book_soup.find('p', class_='star-rating')['class'][1]
        price = book_soup.find('p', class_='price_color').text.strip()
        availability = book_soup.find('p', class_='instock availability').text.strip()
    
# add extracted info to the list
        book_data.append([title, category, rating, price, availability])
        print(book_data)
        print('******')

# 6. Export the Data

In [None]:
# convert list to pandas dataframe
df = pd.DataFrame(book_data, columns=["Title", "Category", "Rating", "Price", "Availability"])

# display first 10 rows
print(df.head(10))


In [None]:
# save data to csv
df.to_csv("books_scraped.csv", index=False)