## Request a web page and save it in an object

In [262]:
from bs4 import BeautifulSoup
import requests

In [263]:
url = 'https://www.amazon.com/gp/bestsellers/books' 
response = requests.get(url)
response.status_code

200

## Make a beautiful soup out of the web page Front End script

In [264]:
page = response.text
soup = BeautifulSoup(page, "lxml")
print(soup.prettify()[:1000])

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-us">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-na.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:feature:aui-assets -->
  <link href="https://images-na.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41SIz69qHYL.css,31qGOnSAToL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41-PwE7+H0L.css,21j0IlW7xKL.css,01XPHJk60-L.css,014OeDQisGL.css,21aPhFy+riL.css,11gneA3MtJL.css,21fecG8pUzL.css,01RddH8vm-L.css,01CFUgsA-Y

## Find all elements of a class 

In [265]:
books_list=soup.find_all("li",class_="zg-item-immersion")

## Loop through elements to get data as a text and save it in a dictionary

In [266]:
books_dict={}
counter=0
for element in books_list:
    
    try:
        book_title=element.find(class_="p13n-sc-truncate p13n-sc-line-clamp-1").text.strip()
    except:
        book_title=""
    
    try:
        book_author=element.find(class_="a-size-small a-link-child").text.strip()
    except:
        book_author=""
        
    try:
        book_price=element.find(class_="p13n-sc-price").text.strip()
    except:
        book_price=""
    
    try:
        book_rating=element.find(class_="a-icon a-icon-star a-star-4-5 aok-align-top").text.strip().split(" ")[0]
    except:
        book_rating=""
        
    try:
        reviews_number=element.find(class_="a-size-small a-link-normal").text.strip()
        reviews_link=element.find(class_="a-size-small a-link-normal").get("href")
    except:
        reviews_number=""
        reviews_link=""
        
    
    books_dict[counter]=[book_title,book_author,book_price,book_rating,reviews_number,reviews_link]
    counter+=1

In [267]:
len(books_dict)

50

## Convert dictionary to dataframe

In [268]:
import pandas as pd

books_df = pd.DataFrame(books_dict).T 
books_df.columns=['Book Name','Book Author',"Book Price","Book Rating","reviews_number","reviews_link"]
books_df.head(15)

Unnamed: 0,Book Name,Book Author,Book Price,Book Rating,reviews_number,reviews_link
0,It Ends with Us: A Novel,Colleen Hoover,$9.94,4.7,29781.0,/product-reviews/1501110365
1,Atomic Habits: An Easy & Proven Way to Build G...,James Clear,$11.98,,52133.0,/product-reviews/0735211299
2,Joshua Weissman: An Unapologetic Cookbook,Joshua Weissman,$19.20,,,
3,Apples Never Fall,Liane Moriarty,$20.29,,,
4,American Marxism,Mark R. Levin,$16.80,,12708.0,/product-reviews/150113597X
5,Countdown bin Laden: The Untold Story of the 2...,Chris Wallace,$18.84,,63.0,/product-reviews/1982176520
6,"The Body Keeps the Score: Brain, Mind, and Bod...",,$11.40,,35692.0,/product-reviews/0143127748
7,A Hunter-Gatherer's Guide to the 21st Century:...,,$15.99,,,
8,The Seven Husbands of Evelyn Hugo: A Novel,Taylor Jenkins Reid,$9.42,4.6,28094.0,/product-reviews/1501161938
9,"Beautiful World, Where Are You: A Novel",Sally Rooney,$16.88,4.4,87.0,/product-reviews/0374602603


## Clean Price Column

In [269]:
def clean_price(text):
    return text.split("$")[1]

In [270]:
books_df["Book Price"]=books_df["Book Price"].apply(clean_price)

In [271]:
books_df.head(3)

Unnamed: 0,Book Name,Book Author,Book Price,Book Rating,reviews_number,reviews_link
0,It Ends with Us: A Novel,Colleen Hoover,9.94,4.7,29781.0,/product-reviews/1501110365
1,Atomic Habits: An Easy & Proven Way to Build G...,James Clear,11.98,,52133.0,/product-reviews/0735211299
2,Joshua Weissman: An Unapologetic Cookbook,Joshua Weissman,19.2,,,


## Clean URL column

In [272]:
books_df["reviews_link"]="https://www.amazon.com"+(books_df["reviews_link"])

In [273]:
# Check if cleaned URL is valid
books_df.head(3)

Unnamed: 0,Book Name,Book Author,Book Price,Book Rating,reviews_number,reviews_link
0,It Ends with Us: A Novel,Colleen Hoover,9.94,4.7,29781.0,https://www.amazon.com/product-reviews/1501110365
1,Atomic Habits: An Easy & Proven Way to Build G...,James Clear,11.98,,52133.0,https://www.amazon.com/product-reviews/0735211299
2,Joshua Weissman: An Unapologetic Cookbook,Joshua Weissman,19.2,,,https://www.amazon.com
