# Amazon Best Sellers

# Import Libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns 
# this statement allows the visuals to render within your Jupyter Notebook
%matplotlib inline 
# You can configure the format of the images: ‘png’, ‘retina’, ‘jpeg’, ‘svg’, ‘pdf’.
%config InlineBackend.figure_format = 'png'

In [2]:
# Project for best sales in amazon.sa

# Best Sellers in Electronics (Page 1)

In [3]:
header = {'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'}
url = "https://www.amazon.sa/-/en/gp/bestsellers/electronics/?ie=UTF8&ref_=sv_sv_elec_all_1"

In [4]:
response = requests.get(url, {'headers':header})
response.status_code

200

In [5]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

# BeautifulSoup Basics

In [6]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [7]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [8]:
soup.find_all('li', {'class':'zg-item-immersion'})[0].find_all('a')[1].text

'\n3.0 out of 5 stars\n'

In [9]:
# create function to clean data 

In [10]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [11]:
#Test with the first electroni product to ensure functions are working
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))


SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB, 4GB RAM, LTE, Black (KSA Version)
3.0 out of 5 stars
3.0
2
2
SAR 539.00
539.00


In [12]:
soup.find_all("li", class_ = "zg-item-immersion")[3] 

<li class="zg-item-immersion" role="gridcell"><span class="a-list-item"><div class="a-section a-spacing-none aok-relative"><div class="a-row a-spacing-none aok-inline-block"><span class="a-size-small aok-float-left zg-badge-body zg-badge-color"><span class="zg-badge-text">#4</span></span><span class="aok-float-left zg-badge-triangle zg-badge-color"></span></div><span class="aok-inline-block zg-item"><a class="a-link-normal" href="/-/en/SAMSUNG-Galaxy-M12-Dual-Smartphone/dp/B08XY5X3J7?_encoding=UTF8&amp;psc=1"><span class="zg-text-center-align"><div class="a-section a-spacing-small"><img alt="SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB, 4GB RAM, 4G LTE, Black (KSA Version)" height="200" src="https://images-na.ssl-images-amazon.com/images/I/81DCMeVrbKS._AC_UL200_SR200,200_.jpg" width="200"/></div></span>
<div aria-hidden="true" class="p13n-sc-truncate p13n-sc-line-clamp-2" data-rows="2">
            SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB, 4GB RAM, 4G LTE, Black (KSA Version)
   

In [13]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

    

In [14]:
products_list

[{'pro_name': 'SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB, 4GB RAM, LTE, Black (KSA Version)',
  'pro_rate': '3.0 out of 5 stars',
  'pro_rate_clean': '3.0',
  'review_num': '2',
  'review_num_clean': '2',
  'pro_price': 'SAR 539.00',
  'pro_price_clean': '539.00'},
 {'pro_name': 'Samsung Galaxy A12 LTE Dual SIM Smartphone - 64GB Storage, 4GB RAM, Blue (KSA Version)',
  'pro_rate': '1.0 out of 5 stars',
  'pro_rate_clean': '1.0',
  'review_num': '2',
  'review_num_clean': '2',
  'pro_price': 'SAR 539.00',
  'pro_price_clean': '539.00'},
 {'pro_name': 'Apple 20W USB-C Power Adapter, White',
  'pro_rate': '4.5 out of 5 stars',
  'pro_rate_clean': '4.5',
  'review_num': '359',
  'review_num_clean': '359',
  'pro_price': 'SAR 79.00',
  'pro_price_clean': '79.00'},
 {'pro_name': 'SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB, 4GB RAM, 4G LTE, Black (KSA Version)',
  'pro_rate': '4.1 out of 5 stars',
  'pro_rate_clean': '4.1',
  'review_num': '40',
  'review_num_clean': '40',
  'pro_price

In [15]:
products_df1 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df1

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB,...",3.0 out of 5 stars,3.0,2.0,2.0,SAR 539.00,539.0
1,Samsung Galaxy A12 LTE Dual SIM Smartphone - 6...,1.0 out of 5 stars,1.0,2.0,2.0,SAR 539.00,539.0
2,"Apple 20W USB-C Power Adapter, White",4.5 out of 5 stars,4.5,359.0,359.0,SAR 79.00,79.0
3,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.1 out of 5 stars,4.1,40.0,40.0,SAR 499.00,499.0
4,Apple AirPods with Charging Case,4.1 out of 5 stars,4.1,2777.0,2777.0,SAR 475.00,475.0
5,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.5 out of 5 stars,4.5,10.0,10.0,SAR 499.00,499.0
6,"Anker PowerCore Select 20000, 20000mAh Power B...",4.1 out of 5 stars,4.1,54.0,54.0,SAR 94.00,94.0
7,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.2 out of 5 stars,4.2,22.0,22.0,SAR 499.00,499.0
8,"Apple Watch Series 6 (GPS, 44mm) - Space Grey ...",4.6 out of 5 stars,4.6,306.0,306.0,"SAR 1,349.00",1349.0
9,New Apple Airpods Pro,4.3 out of 5 stars,4.3,2351.0,2351.0,SAR 749.00,749.0


# Best Sellers in   Electronics (Page 2)

In [16]:
url = 'https://www.amazon.sa/-/en/gp/bestsellers/electronics/ref=zg_bs_pg_2?ie=UTF8&pg=2'

In [17]:
response = requests.get(url)
response.status_code

200

In [18]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [19]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [20]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [21]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.4 out of 5 stars\n'

In [22]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [23]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

UGREEN USB C Cable 2M, Braided 100W Power Delivery PD Fast charge Cable 20V 5A USB C to USB C 2.0 Cable Compatible for iPad mini 6, MacBook Pro 2021, iPad Pro 12.9", Samsung S21, Huawei P40, etc
4.2 out of 5 stars
4.2
3,503
3503
SAR 40.80
40.80


In [24]:
soup.find_all("li", class_ = "zg-item-immersion")[1] 

<li class="zg-item-immersion" role="gridcell"><span class="a-list-item"><div class="a-section a-spacing-none aok-relative"><div class="a-row a-spacing-none aok-inline-block"><span class="a-size-small aok-float-left zg-badge-body zg-badge-color"><span class="zg-badge-text">#52</span></span><span class="aok-float-left zg-badge-triangle zg-badge-color"></span></div><span class="aok-inline-block zg-item"><a class="a-link-normal" href="/-/en/Apple-iPhone-Pro-FaceTime-128GB/dp/B08L5R1CCC?_encoding=UTF8&amp;psc=1"><span class="zg-text-center-align"><div class="a-section a-spacing-small"><img alt="Apple iPhone 12 Pro Max With FaceTime (128GB) - Graphite" height="200" src="https://images-na.ssl-images-amazon.com/images/I/71XXJC7V8tL._AC_UL200_SR200,200_.jpg" width="200"/></div></span>
<div aria-hidden="true" class="p13n-sc-truncate p13n-sc-line-clamp-2" data-rows="2">
            Apple iPhone 12 Pro Max With FaceTime (128GB) - Graphite
        </div>
</a>
<div class="a-icon-row a-spacing-none">

In [25]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [26]:
products_df2 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df2

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"UGREEN USB C Cable 2M, Braided 100W Power Deli...",4.2 out of 5 stars,4.2,3503,3503,SAR 40.80,40.8
1,Apple iPhone 12 Pro Max With FaceTime (128GB) ...,4.4 out of 5 stars,4.4,548,548,"SAR 4,399.00",4399.0
2,Sony MDR-ZX110AP Extra Bass Smartphone Headset...,4.1 out of 5 stars,4.1,3890,3890,SAR 47.58,47.58
3,MSI MPG Z490 Gaming Plus Gaming Motherboard (A...,4.6 out of 5 stars,4.6,651,651,SAR 489.23,489.23
4,"Anker PowerCore III 10K, Wireless Portable Cha...",4.7 out of 5 stars,4.7,4,4,SAR 135.00,135.0
5,Glorious Large Gaming Mouse Pad 11''x13'' - Black,4.7 out of 5 stars,4.7,7992,7992,SAR 37.00,37.0
6,UGREEN 2 Pack Screen Protector for iPhone 13/1...,4.4 out of 5 stars,4.4,90,90,SAR 47.00,47.0
7,Apple AirPods Pro,4.3 out of 5 stars,4.3,2351,2351,SAR 727.00,727.0
8,Stylus Pen for iPad with Palm Rejection，Active...,2.7 out of 5 stars,2.7,101,101,SAR 76.97,76.97
9,Compatible with Apple Watch Case Series 4 Seri...,3.9 out of 5 stars,3.9,204,204,SAR 12.79,12.79


# Best Sellers in Camera & Photo Products (Page 1)

In [27]:
url = 'https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966385031/ref=zg_bs_nav_1_electronics'

In [28]:
response = requests.get(url)
response.status_code

200

In [29]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [30]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [31]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [32]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.3 out of 5 stars\n'

In [33]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [34]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

EZVIZ C6N Pan/Tilt Camera FHD Indoor,1080p WiFi Smart Home Security Camera IR Night Vision Motion Detection Auto Tracking Baby/Elder/Pet Cloud Storage/SD Slot 2-Way Audio Wi-Fi 2.4G iOS Android App
4.2 out of 5 stars
4.2
1,409
1409
SAR 98.00
98.00


In [35]:
soup.find_all("li", class_ = "zg-item-immersion")[1] 

<li class="zg-item-immersion" role="gridcell"><span class="a-list-item"><div class="a-section a-spacing-none aok-relative"><div class="a-row a-spacing-none aok-inline-block"><span class="a-size-small aok-float-left zg-badge-body zg-badge-color"><span class="zg-badge-text">#2</span></span><span class="aok-float-left zg-badge-triangle zg-badge-color"></span></div><span class="aok-inline-block zg-item"><a class="a-link-normal" href="/-/en/EZVIZ-Indoor-Security-Camera-Android/dp/B07ZC1Q6W6?_encoding=UTF8&amp;psc=1"><span class="zg-text-center-align"><div class="a-section a-spacing-small"><img alt="EZVIZ C6N FHD Indoor Security Camera WiFi Smart 2.4G with iOS and Android App" height="200" src="https://images-na.ssl-images-amazon.com/images/I/51bA8A2cU9L._AC_UL200_SR200,200_.jpg" width="200"/></div></span>
<div aria-hidden="true" class="p13n-sc-truncate p13n-sc-line-clamp-2" data-rows="2">
            EZVIZ C6N FHD Indoor Security Camera WiFi Smart 2.4G with iOS and Android App
        </div

In [36]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [37]:
products_df3 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df3

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"EZVIZ C6N Pan/Tilt Camera FHD Indoor,1080p WiF...",4.2 out of 5 stars,4.2,1409.0,1409.0,SAR 98.00,98.0
1,EZVIZ C6N FHD Indoor Security Camera WiFi Smar...,4.3 out of 5 stars,4.3,391.0,391.0,SAR 98.00,98.0
2,SanDisk Ultra Micro SD Card Android - Class 10...,4.4 out of 5 stars,4.4,116967.0,116967.0,SAR 35.00,35.0
3,"6 PCS Camera Cover,Sliding Webcam Cover,Ultra ...",4.4 out of 5 stars,4.4,45.0,45.0,SAR 9.00,9.0
4,"eufy Security, eufyCam 2 Pro Wireless Home Sec...",4.5 out of 5 stars,4.5,257.0,257.0,SAR 998.00,998.0
5,SanDisk Ultra microSDXC 128GB 100MB/s Class 10...,4.4 out of 5 stars,4.4,116967.0,116967.0,SAR 45.00,45.0
6,Eufy Indoor Cam 2K Pan & Tilt Home Security Ca...,4.5 out of 5 stars,4.5,10.0,10.0,SAR 155.00,155.0
7,SanDisk Extreme microSDXC UHS-I Card- 128GB,4.4 out of 5 stars,4.4,116967.0,116967.0,SAR 77.00,77.0
8,Xiaomi Mi Home Security Camera 360° - White,4.0 out of 5 stars,4.0,391.0,391.0,SAR 145.00,145.0
9,AmazonBasics Braided 4K HDMI to HDMI Cable - 3...,4.7 out of 5 stars,4.7,9242.0,9242.0,SAR 9.00,9.0


# Best Sellers in Camera & Photo Products (Page 2)

In [38]:
url ='https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966385031/ref=zg_bs_pg_2?ie=UTF8&pg=2'

In [39]:
response = requests.get(url)
response.status_code

200

In [40]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [41]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [42]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [43]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.3 out of 5 stars\n'

In [44]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [45]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

AccLoo Mini WiFi Camera, HD 1080P Wireless Portable Small Camera with Motion Detection and Night Version Home Security Cameras Nanny Cam Video Recorder for Room, Office
3.4 out of 5 stars
3.4
19
19
SAR 128.47
128.47


In [46]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [47]:
products_df4 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df4

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"AccLoo Mini WiFi Camera, HD 1080P Wireless Por...",3.4 out of 5 stars,3.4,19.0,19.0,SAR 128.47,128.47
1,Celestron Portable Telescope Travel Scope 70,4.3 out of 5 stars,4.3,9013.0,9013.0,SAR 615.00,615.0
2,"150 inch Projector Screen for Movies, 16:9 HD ...",3.8 out of 5 stars,3.8,28.0,28.0,SAR 149.29,149.29
3,Fujifilm Instax Mini Instant Film - 60 Sheets ...,4.6 out of 5 stars,4.6,22.0,22.0,SAR 180.54,180.54
4,2 * 2m/6.6 * 6.6ft Studio Backdrop Stand Brack...,3.3 out of 5 stars,3.3,4.0,4.0,SAR 111.99,111.99
5,Microphone Stand Set Heavy Duty Mic Suspension...,3.5 out of 5 stars,3.5,3.0,3.0,SAR 125.00,125.0
6,UGREEN Micro HDMI to HDMI Adapter Cable Male t...,4.6 out of 5 stars,4.6,7114.0,7114.0,SAR 42.40,42.4
7,UGREEN 8K DisplayPort Cable Ultra HD DisplayPo...,4.6 out of 5 stars,4.6,315.0,315.0,SAR 82.40,82.4
8,"SanDisk 256GB microSDXC Card, Licensed for Nin...",4.9 out of 5 stars,4.9,24665.0,24665.0,SAR 174.31,174.31
9,SanDisk Extreme microSDXC UHS-I Card- 256GB,4.4 out of 5 stars,4.4,116968.0,116968.0,SAR 168.00,168.0


# Best Sellers in Binoculars, Telescopes & Optics (Page 1) 

In [48]:
url ="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966400031/ref=zg_bs_nav_2_16966385031"


In [49]:
response = requests.get(url)
response.status_code

200

In [50]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [51]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [52]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [53]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.3 out of 5 stars\n'

In [54]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [55]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Apple Pencil Tips - 4 pack
4.8 out of 5 stars
4.8
8
8
SAR 98.00
98.00


In [56]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [57]:
products_df5 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df5

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,Apple Pencil Tips - 4 pack,4.8 out of 5 stars,4.8,8.0,8.0,SAR 98.00,98.0
1,Celestron Portable Telescope Travel Scope 70,4.3 out of 5 stars,4.3,9013.0,9013.0,SAR 615.00,615.0
2,(Single Pack) - Carson MicroBrite Plus 60 -120...,4.2 out of 5 stars,4.2,30503.0,30503.0,SAR 84.50,84.5
3,"Celestron UpClose G2 10x25 Monocular, Black (7...",4.0 out of 5 stars,4.0,833.0,833.0,SAR 80.65,80.65
4,Celestron - 50mm Travel Scope - Portable Refra...,3.8 out of 5 stars,3.8,709.0,709.0,SAR 311.52,311.52
5,"Steiner 5845 7X50 Sagor II Binocular, Green",5.0 out of 5 stars,5.0,1.0,1.0,"SAR 1,069.00",1069.0
6,National Geographic 50Mm Telescope For Kids by...,3.7 out of 5 stars,3.7,22.0,22.0,SAR 186.12,186.12
7,"Telescope Phone Mount, Universal Smart Phone A...",3.5 out of 5 stars,3.5,22.0,22.0,SAR 53.90,53.9
8,Bushnell Trophy Xtreme Binocular,4.6 out of 5 stars,4.6,122.0,122.0,SAR 867.35,867.35
9,PROFESSIONAL COOLING SYSTEM,5.0 out of 5 stars,5.0,2.0,2.0,SAR 70.00,70.0


# Best Sellers in Binoculars, Telescopes & Optics (Page 2)

In [58]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966400031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [59]:
response = requests.get(url)
response.status_code

200

In [60]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [61]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [62]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [63]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'SAR 149.99'

In [64]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [65]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Camera Diaphragm, Compact Iris Aperture Adjustable for Industry for Home for Optical Instruments for Laboratory for Workshop




SAR 195.89
195.89


In [66]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [67]:
products_df6 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df6

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Camera Diaphragm, Compact Iris Aperture Adjust...",,,,,SAR 195.89,195.89
1,12X50 HD Monocular Telescope with Quick Smartp...,,,,,SAR 149.99,149.99
2,"Bysameyee USB Digital Microscope 40X to 1000X,...",3.0 out of 5 stars,3.0,1.0,1.0,SAR 139.00,139.0
3,Mega Sports 10X25 Compact Binocular for Bird W...,,,,,,
4,12x42 Binoculars for Adults with New Smartphon...,4.8 out of 5 stars,4.8,7949.0,7949.0,SAR 532.97,532.97
5,Newdoar 25x30 Zoomable Monocular Vintage Pirat...,4.2 out of 5 stars,4.2,132.0,132.0,SAR 126.00,126.0
6,"Nikon Aculon A211 7x50 Binoculars, Black",4.6 out of 5 stars,4.6,2921.0,2921.0,,
7,"2021 Newest Telescope, 60mm Aperture 500mm AZ ...",,,,,SAR 527.46,527.46
8,Vortex Optics Solo Monocular 10x25,4.4 out of 5 stars,4.4,1546.0,1546.0,SAR 344.97,344.97
9,SCOKC 10x-20x Zoom Binoculars Monocular HD Pow...,4.3 out of 5 stars,4.3,114.0,114.0,SAR 299.00,299.0


# Best Sellers in Computers, Components & Accessories (Page 1)

In [68]:
url= " https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966388031/ref=zg_bs_nav_1_electronics"

In [69]:
response = requests.get(url)
response.status_code

200

In [70]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [71]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [72]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [73]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.1 out of 5 stars\n'

In [74]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [75]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Stylus Pen for iPad with Palm Rejection, Active Pencil Compatible with (2018-2021) iPad Pro 11 & 12.9 inch, iPad 9th/8th/7th/6th Gen, iPad Air 4th/3rd Gen,iPad Mini 6th/5th Gen
3.4 out of 5 stars
3.4
5,548
5548
SAR 58.77
58.77


In [76]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [77]:
products_df7 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df7

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Stylus Pen for iPad with Palm Rejection, Activ...",3.4 out of 5 stars,3.4,5548,5548,SAR 58.77,58.77
1,530pcs/set Heat Shrink Tubing Insulation Shrin...,4.1 out of 5 stars,4.1,117,117,SAR 21.99,21.99
2,Kingston A400 SATA SSD Solid State Drive 2.5 I...,4.5 out of 5 stars,4.5,31730,31730,SAR 209.00,209.0
3,Anker PowerLine III USB-C to USB-C Cable USB-C...,5.0 out of 5 stars,5.0,7,7,SAR 26.00,26.0
4,"Anker USB C to Lightning Cable (6 ft), Powerli...",4.0 out of 5 stars,4.0,9,9,SAR 46.95,46.95
5,SAMSUNG 980 PRO 500GB PCIe NVMe Gen4 Internal ...,4.8 out of 5 stars,4.8,2581,2581,SAR 311.63,311.63
6,SanDisk Ultra Micro SD Card Android - Class 10...,4.4 out of 5 stars,4.4,116967,116967,SAR 35.00,35.0
7,"UGREEN USB C Cable 2M, Braided 100W Power Deli...",4.2 out of 5 stars,4.2,3503,3503,SAR 40.80,40.8
8,MSI MPG Z490 Gaming Plus Gaming Motherboard (A...,4.6 out of 5 stars,4.6,651,651,SAR 489.23,489.23
9,Glorious Large Gaming Mouse Pad 11''x13'' - Black,4.7 out of 5 stars,4.7,7992,7992,SAR 37.00,37.0


 # Best Sellers in Computers, Components & Accessories (Page 2)

In [78]:
url ="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966388031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [79]:
response = requests.get(url)
response.status_code

200

In [80]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [81]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [82]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [83]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.5 out of 5 stars\n'

In [84]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [85]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

SanDisk SSD PLUS 240GB - 2.5” SATA SSD, up to 530MB/s Read and 440MB/s Write speeds
4.7 out of 5 stars
4.7
60,635
60635
SAR 119.00
119.00


In [86]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [87]:
products_df8 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df8

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"SanDisk SSD PLUS 240GB - 2.5” SATA SSD, up to ...",4.7 out of 5 stars,4.7,60635.0,60635.0,SAR 119.00,119.0
1,"Stylus Pen for iPad with Palm Rejection, Activ...",4.5 out of 5 stars,4.5,490.0,490.0,SAR 68.00,68.0
2,SteelSeries Arctis 7 - Lossless Wireless Gamin...,4.5 out of 5 stars,4.5,25696.0,25696.0,SAR 427.38,427.38
3,SAMSUNG (MZ-V7S1T0B/AM) 970 EVO Plus SSD 1TB -...,4.9 out of 5 stars,4.9,25309.0,25309.0,SAR 527.79,527.79
4,TP-Link AC1300 USB WiFi Adapter(Archer T3U)- 2...,4.5 out of 5 stars,4.5,13671.0,13671.0,SAR 79.00,79.0
5,Kingston 240GB A400 Sata 3 2.5 inch Internal S...,4.5 out of 5 stars,4.5,31730.0,31730.0,SAR 140.94,140.94
6,ZORD M27 GAMING MONITOR 27INCH,3.8 out of 5 stars,3.8,194.0,194.0,SAR 930.00,930.0
7,UGREEN USB 3.0 Hub 4 Port USB 3 Data Hub Porta...,4.5 out of 5 stars,4.5,13008.0,13008.0,SAR 48.80,48.8
8,Toshiba 1TB Canvio Basics Portable USB3.0 Hard...,4.6 out of 5 stars,4.6,85070.0,85070.0,SAR 168.90,168.9
9,90x40cm Old World Map mouse pad large pad to m...,4.3 out of 5 stars,4.3,98.0,98.0,SAR 16.96,16.96


# Best Sellers in Headphones, Earbuds & Accessories (Page 1 )

In [88]:
url = "https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966390031/ref=zg_bs_pg_1?ie=UTF8&pg=1"

In [89]:
response = requests.get(url)
response.status_code

200

In [90]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [91]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [92]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [93]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.3 out of 5 stars\n'

In [94]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [95]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Apple AirPods with Charging Case
4.1 out of 5 stars
4.1
2,777
2777
SAR 475.00
475.00


In [96]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [97]:
products_df9 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df9

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,Apple AirPods with Charging Case,4.1 out of 5 stars,4.1,2777.0,2777.0,SAR 475.00,475.0
1,New Apple Airpods Pro,4.3 out of 5 stars,4.3,2351.0,2351.0,SAR 749.00,749.0
2,Sony WH-1000XM4 Wireless Noise Cancelling Blue...,4.4 out of 5 stars,4.4,63.0,63.0,SAR 980.00,980.0
3,SoundPEATS Air3 Wireless Earbuds Mini Bluetoot...,4.0 out of 5 stars,4.0,309.0,309.0,SAR 143.20,143.2
4,"JBL In-Ear Headphones, Black, T110",4.2 out of 5 stars,4.2,17962.0,17962.0,SAR 28.15,28.15
5,Sony WI-C200 Wireless In-ear Bluetooth Headpho...,3.7 out of 5 stars,3.7,261.0,261.0,SAR 93.00,93.0
6,Wireless Earbuds SoundPEATS TrueAir2 Bluetooth...,3.8 out of 5 stars,3.8,2095.0,2095.0,SAR 127.20,127.2
7,"Anker Soundcore Life Q30 Bluetooth Headphones,...",4.6 out of 5 stars,4.6,11981.0,11981.0,SAR 249.00,249.0
8,New Apple AirPods (3rd generation),4.3 out of 5 stars,4.3,25.0,25.0,SAR 829.00,829.0
9,Bluetooth Wireless Headphones 40 Hours Playtim...,4.2 out of 5 stars,4.2,59.0,59.0,SAR 143.00,143.0


# Best Sellers in Headphones, Earbuds & Accessories (Page 2 )

In [98]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966390031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [99]:
response = requests.get(url)
response.status_code

200

In [100]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [101]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [102]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [103]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.2 out of 5 stars\n'

In [104]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [105]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Wireless Earbuds, SoundPEATS T3 Active Noise Canceling with AI ENC, Bluetooth 5.2 in-Ear Headphones, ANC Earbuds for Clear Calls, Transparency Mode, Touch Control, Total 16.5 Hrs
4.4 out of 5 stars
4.4
68
68
SAR 127.20
127.20


In [106]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [107]:
products_df10 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df10

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Wireless Earbuds, SoundPEATS T3 Active Noise C...",4.4 out of 5 stars,4.4,68,68,SAR 127.20,127.2
1,"Bluetooth Earphones, SoundPEATS Dual Dynamic D...",4.2 out of 5 stars,4.2,101,101,SAR 143.20,143.2
2,SKULLCANDY Dime True Wireless In-Ear Earbuds W...,4.1 out of 5 stars,4.1,3427,3427,SAR 99.00,99.0
3,Sony WH-1000XM4 Wireless Noise Cancelling Blue...,4.5 out of 5 stars,4.5,71,71,SAR 994.90,994.9
4,"Datazone Ear phone Headphones,crystal clear so...",3.6 out of 5 stars,3.6,85,85,SAR 9.38,9.38
5,"Amoi F9 Bluetooth 5.0 Wireless Earbuds,IPX7 Wa...",2.8 out of 5 stars,2.8,77,77,SAR 40.66,40.66
6,"G9000 Stereo Gaming Headset for PS4, PC, Xbox ...",3.5 out of 5 stars,3.5,49,49,SAR 48.90,48.9
7,Huawei Honor xsport Bluetooth Headset am61 IPX...,2.7 out of 5 stars,2.7,3,3,SAR 115.00,115.0
8,TOSHIBA RZE-BT1200H Wireless Headphones Black,4.5 out of 5 stars,4.5,18,18,SAR 149.00,149.0
9,"Wireless Earbuds, Bluetooth 5.2 True Wireless ...",3.5 out of 5 stars,3.5,20,20,SAR 109.00,109.0


# Best Sellers in Mobile Phones & Communication Products (Page 1 )

In [108]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966387031/ref=zg_bs_nav_1_electronics"

In [109]:
response = requests.get(url)
response.status_code

200

In [110]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [111]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [112]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [113]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n1.0 out of 5 stars\n'

In [114]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [115]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB, 4GB RAM, LTE, Black (KSA Version)
3.0 out of 5 stars
3.0
2
2
SAR 539.00
539.00


In [116]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [117]:
products_df11 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df11

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB,...",3.0 out of 5 stars,3.0,2.0,2.0,SAR 539.00,539.0
1,Samsung Galaxy A12 LTE Dual SIM Smartphone - 6...,1.0 out of 5 stars,1.0,2.0,2.0,SAR 539.00,539.0
2,"Apple 20W USB-C Power Adapter, White",4.5 out of 5 stars,4.5,359.0,359.0,SAR 79.00,79.0
3,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.1 out of 5 stars,4.1,40.0,40.0,SAR 499.00,499.0
4,Apple AirPods with Charging Case,4.1 out of 5 stars,4.1,2777.0,2777.0,SAR 475.00,475.0
5,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.5 out of 5 stars,4.5,10.0,10.0,SAR 499.00,499.0
6,"Anker PowerCore Select 20000, 20000mAh Power B...",4.1 out of 5 stars,4.1,54.0,54.0,SAR 94.00,94.0
7,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.2 out of 5 stars,4.2,22.0,22.0,SAR 499.00,499.0
8,"Apple Watch Series 6 (GPS, 44mm) - Space Grey ...",4.6 out of 5 stars,4.6,306.0,306.0,"SAR 1,349.00",1349.0
9,New Apple Airpods Pro,4.3 out of 5 stars,4.3,2351.0,2351.0,SAR 749.00,749.0


# Best Sellers in Mobile Phones & Communication Products (Page 2 )

In [118]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966387031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [119]:
response = requests.get(url)
response.status_code

200

In [120]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [121]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [122]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [123]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.3 out of 5 stars\n'

In [124]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [125]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

UGREEN Fast Car Charger Adapter 42.5W Dual USB Quick Charge QC 3.0 and PD Fast Charging Car Plug for iPhone 13 Pro/13 Pro Max/13/13 mini/12 Pro Max/11Pro Max,New iPad 9,iPad mini 6,Galaxy S20 S10,etc
4.3 out of 5 stars
4.3
2,191
2191
SAR 63.20
63.20


In [126]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [127]:
products_df12 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df12

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,UGREEN Fast Car Charger Adapter 42.5W Dual USB...,4.3 out of 5 stars,4.3,2191,2191,SAR 63.20,63.2
1,ELON Fast Charging Cable for iPhone USB to Lig...,4.3 out of 5 stars,4.3,18,18,SAR 25.00,25.0
2,UGREEN PD 20W Car Charger Fast Charging for iP...,4.3 out of 5 stars,4.3,2191,2191,SAR 44.00,44.0
3,"Samsung Galaxy M11 Dual SIM - 32GB, 3GB RAM, 4...",4.0 out of 5 stars,4.0,208,208,SAR 695.00,695.0
4,"Xiaomi POCO X3 Pro, Dual SIM, 256GB, 8GB RAM, ...",4.2 out of 5 stars,4.2,188,188,SAR 899.00,899.0
5,"Anker 30W PIQ 3.0 USB-C Fast Charger Adapter, ...",4.5 out of 5 stars,4.5,18,18,SAR 59.00,59.0
6,SoundPEATS Smart Watch New Upgraded 13 Sports ...,4.1 out of 5 stars,4.1,469,469,SAR 159.00,159.0
7,HUAWEI FreeBuds 4i Wireless In-Ear Bluetooth E...,4.1 out of 5 stars,4.1,276,276,SAR 285.00,285.0
8,UGREEN iPhone Charger PD20W USB C iPhone 13 Fa...,4.2 out of 5 stars,4.2,403,403,SAR 55.20,55.2
9,Apple iPhone 12 With FaceTime (128GB) - Purple,4.4 out of 5 stars,4.4,355,355,"SAR 3,149.00",3149.0


# Best Sellers in Wearable Technology (Page 1)

In [128]:
url= "https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966397031/ref=zg_bs_pg_1?ie=UTF8&pg=1"

In [129]:
response = requests.get(url)
response.status_code

200

In [130]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [131]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [132]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [133]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.6 out of 5 stars\n'

In [134]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [135]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Apple Watch Series 6 (GPS, 44mm) - Space Grey Aluminium Case with Black Sport Band
4.6 out of 5 stars
4.6
306
306
SAR 1,349.00
1349.00


In [136]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [137]:
products_df13 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df13

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Apple Watch Series 6 (GPS, 44mm) - Space Grey ...",4.6 out of 5 stars,4.6,306.0,306.0,"SAR 1,349.00",1349.0
1,"Apple Watch Series 6 (GPS, 44mm) - Blue Alumin...",4.6 out of 5 stars,4.6,306.0,306.0,"SAR 1,349.00",1349.0
2,"Apple Watch Series 6 (GPS, 40mm) - Gold Alumin...",4.6 out of 5 stars,4.6,306.0,306.0,"SAR 1,199.00",1199.0
3,Compatible with Apple Watch Case Series 4 Seri...,3.9 out of 5 stars,3.9,204.0,204.0,SAR 12.79,12.79
4,"HUAWEI Band 6, All-day SpO2 Monitoring, 1.47"" ...",4.4 out of 5 stars,4.4,816.0,816.0,SAR 189.00,189.0
5,SoundPEATS Smart Watch New Upgraded 13 Sports ...,4.1 out of 5 stars,4.1,469.0,469.0,SAR 159.00,159.0
6,"ibsun Wireless Charger, 3 in 1 Wireless Chargi...",4.1 out of 5 stars,4.1,184.0,184.0,,
7,Milanese Loop Bracelet Stainless Steel band Fo...,4.3 out of 5 stars,4.3,29.0,29.0,SAR 38.48,38.48
8,"Apple Watch Series 6 (GPS, 40mm) - Space Grey ...",4.6 out of 5 stars,4.6,306.0,306.0,"SAR 1,199.00",1199.0
9,HUAWEI HW-TIA-B09-BLACK WATCH FIT Smartwatch w...,4.5 out of 5 stars,4.5,2638.0,2638.0,SAR 331.00,331.0


# Best Sellers in Wearable Technology (Page 2)

In [138]:
url = "https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966397031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [139]:
response = requests.get(url)
response.status_code

200

In [140]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [141]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [142]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [143]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n3.2 out of 5 stars\n'

In [144]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [145]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Samsung Galaxy Watch Active 2 , 40 mm Aluminium, Pink Gold - SM-R830NZDAKSA
5.0 out of 5 stars
5.0
5
5




In [146]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [147]:
products_df14 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df14

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Samsung Galaxy Watch Active 2 , 40 mm Aluminiu...",5.0 out of 5 stars,5.0,5.0,5.0,,
1,Screen Protector Compatible with Huawei Watch ...,3.2 out of 5 stars,3.2,29.0,29.0,SAR 19.00,19.0
2,"Compatible with Apple Watch Bands 38mm 40mm, A...",3.8 out of 5 stars,3.8,41.0,41.0,SAR 27.99,27.99
3,"Compatible with Apple Watch Band, Nylon Strap ...",5.0 out of 5 stars,5.0,2.0,2.0,SAR 14.85,14.85
4,Lomet 22mm Stainless Steel Mesh Loop Bracelet ...,4.3 out of 5 stars,4.3,66.0,66.0,SAR 34.50,34.5
5,Fitbit Versa 2 Health and Fitness Smartwatch -...,4.5 out of 5 stars,4.5,32617.0,32617.0,SAR 596.17,596.17
6,SKULLCANDY Dime True Wireless In-Ear Earbuds W...,4.1 out of 5 stars,4.1,3427.0,3427.0,SAR 99.00,99.0
7,Case Compatible for Huawei Watch FIT Screen Pr...,5.0 out of 5 stars,5.0,1.0,1.0,SAR 30.00,30.0
8,Smart Watch 42mm Midnight Blue Sport Band,2.8 out of 5 stars,2.8,5.0,5.0,SAR 17.94,17.94
9,"HUAWEI WATCH FIT Elegant Smartwatch, Stainless...",4.7 out of 5 stars,4.7,39.0,39.0,SAR 449.00,449.0


# Best Sellers in Car & Vehicle Electronics (Page 1)

In [148]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966386031/ref=zg_bs_nav_1_electronics"

In [149]:
response = requests.get(url)
response.status_code

200

In [150]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [151]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [152]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [153]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.3 out of 5 stars\n'

In [154]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [155]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

UGREEN Fast Car Charger Adapter 42.5W Dual USB Quick Charge QC 3.0 and PD Fast Charging Car Plug for iPhone 13 Pro/13 Pro Max/13/13 mini/12 Pro Max/11Pro Max,New iPad 9,iPad mini 6,Galaxy S20 S10,etc
4.3 out of 5 stars
4.3
2,191
2191
SAR 63.20
63.20


In [156]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [157]:
products_df15 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df15

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,UGREEN Fast Car Charger Adapter 42.5W Dual USB...,4.3 out of 5 stars,4.3,2191.0,2191.0,SAR 63.20,63.2
1,UGREEN PD 20W Car Charger Fast Charging for iP...,4.3 out of 5 stars,4.3,2191.0,2191.0,SAR 44.00,44.0
2,UGREEN Car Phone Mount Dashboard Car Holder Wi...,4.4 out of 5 stars,4.4,1387.0,1387.0,SAR 76.00,76.0
3,UGREEN Magnetic Car Phone Holder Air Vent Moun...,4.0 out of 5 stars,4.0,1773.0,1773.0,SAR 69.60,69.6
4,"UGREEN Bluetooth Aux Adapter, Bluetooth 5.0 Au...",4.5 out of 5 stars,4.5,218.0,218.0,SAR 93.60,93.6
5,"Muson Car Phone Mount, Dashboard/Air Vent/Wind...",3.7 out of 5 stars,3.7,9.0,9.0,SAR 60.00,60.0
6,UGREEN Car Air Vent Mount Cell Phone Holder Gr...,4.4 out of 5 stars,4.4,19683.0,19683.0,SAR 58.40,58.4
7,"Magnetic Phone Car Mount, WORLDMOM Strong Magn...",,,,,SAR 25.50,25.5
8,UGREEN Car Phone Holder Magnetic Dashboard Mob...,4.2 out of 5 stars,4.2,3136.0,3136.0,SAR 52.00,52.0
9,UGREEN Car Phone Holder Air Vent Cell Phone Mo...,4.3 out of 5 stars,4.3,664.0,664.0,SAR 36.00,36.0


# Best Sellers in Car & Vehicle Electronics (Page 2)

In [158]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966386031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [159]:
response = requests.get(url)
response.status_code

200

In [160]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [161]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [162]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [163]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n5.0 out of 5 stars\n'

In [164]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [165]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Car Phone Mount, PHOCAR Universal Phone Holder for Car, Dashboard Cell Phone Stand Compatible with iPhone 12 pro max, LG, Samsung
2.8 out of 5 stars
2.8
6
6
SAR 29.99
29.99


In [166]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [167]:
products_df16 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df16

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Car Phone Mount, PHOCAR Universal Phone Holder...",2.8 out of 5 stars,2.8,6.0,6.0,SAR 29.99,29.99
1,4 Types 12V Add-a-Circuit Adapter & Fuse Kit -...,5.0 out of 5 stars,5.0,1.0,1.0,SAR 70.00,70.0
2,"LENCENT FM Transmitter, Bluetooth FM Transmitt...",4.4 out of 5 stars,4.4,8296.0,8296.0,SAR 73.73,73.73
3,USB Charger Car Bluetooth 5.0 Fm Transmitter M...,,,,,SAR 59.99,59.99
4,Duracell Car Charger Single USB White - DR5020W,5.0 out of 5 stars,5.0,1.0,1.0,SAR 29.00,29.0
5,"Urise Car Phone Holder, 360° adjustable cup mo...",3.3 out of 5 stars,3.3,3.0,3.0,SAR 49.99,49.99
6,iOttie Easy One Touch 5 Cup Holder Car Mount P...,4.4 out of 5 stars,4.4,11815.0,11815.0,SAR 205.09,205.09
7,"SUNJOYCO Car Cup Holder Phone Mount, Universal...",4.0 out of 5 stars,4.0,77.0,77.0,SAR 52.00,52.0
8,"UGREEN Car Charger, Dual USB 24W/4.8A All Meta...",4.4 out of 5 stars,4.4,242.0,242.0,SAR 40.80,40.8
9,Aukey HD-C5 Car Mount Air Vent Magnetic Cell P...,,,,,SAR 35.00,35.0


# Best Sellers in Electrical Power Accessories (Page 1)

In [168]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966395031/ref=zg_bs_nav_1_electronics"

In [169]:
response = requests.get(url)
response.status_code

200

In [170]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [171]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [172]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [173]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n3.9 out of 5 stars\n'

In [174]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [175]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Apple 20W USB-C Power Adapter, White
4.5 out of 5 stars
4.5
359
359
SAR 79.00
79.00


In [176]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [177]:
products_df17 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df17

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Apple 20W USB-C Power Adapter, White",4.5 out of 5 stars,4.5,359.0,359.0,SAR 79.00,79.0
1,Extension Cord with 3 Power Socket and 3 USB S...,3.9 out of 5 stars,3.9,41.0,41.0,SAR 59.99,59.99
2,"FLIZIL Fast Charging Universal Travel Adapter,...",4.5 out of 5 stars,4.5,24.0,24.0,SAR 74.88,74.88
3,"Extension Cord with USB, Power Strip with 3 Ou...",3.7 out of 5 stars,3.7,7.0,7.0,SAR 46.99,46.99
4,"LDNIO SC3604 Socket extension, 6USB Ports 3.4A...",3.9 out of 5 stars,3.9,99.0,99.0,SAR 61.66,61.66
5,Enriches electricity with 6 USB Ports,4.4 out of 5 stars,4.4,10.0,10.0,SAR 65.47,65.47
6,Multi Extension Socket Plug Adapter with 3 USB...,4.5 out of 5 stars,4.5,33.0,33.0,SAR 72.79,72.79
7,Belkin BSV804ar2M 8-Outlet Surge Protection Ex...,4.5 out of 5 stars,4.5,98.0,98.0,SAR 128.99,128.99
8,Belkin BSV603ar2M 6 Way/6 Plug 2m Surge Protec...,4.5 out of 5 stars,4.5,98.0,98.0,SAR 99.00,99.0
9,FLIZIL Power Strip Extension with 4 way Socket...,4.5 out of 5 stars,4.5,24.0,24.0,SAR 39.99,39.99


# Best Sellers in Electrical Power Accessories (Page 2)

In [178]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966395031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [179]:
response = requests.get(url)
response.status_code

200

In [180]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [181]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [182]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [183]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.4 out of 5 stars\n'

In [184]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [185]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

TOMSHOO Universal Travel Adapter 4 USB 2.4A Charger AC Power International Wall Charger
2.9 out of 5 stars
2.9
3
3
SAR 90.09
90.09


In [186]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [187]:
products_df18 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df18

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,TOMSHOO Universal Travel Adapter 4 USB 2.4A Ch...,2.9 out of 5 stars,2.9,3.0,3.0,SAR 90.09,90.09
1,Anker Universal Travel Adapter with 4 USB Port...,4.4 out of 5 stars,4.4,46.0,46.0,SAR 199.00,199.0
2,"Belkin SurgePlus USB Surge Protector, 2 USB 2....",4.8 out of 5 stars,4.8,5.0,5.0,SAR 128.99,128.99
3,"Vertical Power Strip 4 layers, 11 outlets with...",4.0 out of 5 stars,4.0,3.0,3.0,SAR 77.92,77.92
4,Dual electrical outlet with triple and dual ou...,,,,,SAR 19.95,19.95
5,"wafi World Travel Power adapter, all in one In...",4.0 out of 5 stars,4.0,1.0,1.0,SAR 92.00,92.0
6,mini-head charger for Nokia phones compatible ...,,,,,SAR 20.00,20.0
7,KLARHEIT Upgraded Universal Travel Adapter One...,3.9 out of 5 stars,3.9,3.0,3.0,SAR 99.00,99.0
8,Apple 12W USB Power Adapter,4.5 out of 5 stars,4.5,3.0,3.0,SAR 79.00,79.0
9,"Vertical Power Strip 4 layers, 11 outlets with...",5.0 out of 5 stars,5.0,6.0,6.0,SAR 79.06,79.06


# Best Sellers in Computer Tablets (Page 1)

In [188]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966433031/ref=zg_bs_nav_1_electronics"

In [189]:
response = requests.get(url)
response.status_code

200

In [190]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [191]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [192]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [193]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n3.2 out of 5 stars\n'

In [194]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [195]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

2021 Apple iPad Pro (11-inch, Wi-Fi, 128GB) - Space Grey (3rd Generation)
4.6 out of 5 stars
4.6
54
54
SAR 3,499.00
3499.00


In [196]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [197]:
products_df19 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df19

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"2021 Apple iPad Pro (11-inch, Wi-Fi, 128GB) - ...",4.6 out of 5 stars,4.6,54.0,54.0,"SAR 3,499.00",3499.0
1,"SAMSUNG Galaxy Tab A7 Lite Tablet - 32GB, 3GB ...",3.2 out of 5 stars,3.2,10.0,10.0,SAR 595.00,595.0
2,"2020 Apple iPad Air (10.9-inch, Wi-Fi, 64GB) -...",4.4 out of 5 stars,4.4,157.0,157.0,"SAR 2,449.00",2449.0
3,Samsung Electronics A7 Tablet 10.4 Wi-Fi 64GB ...,4.1 out of 5 stars,4.1,22.0,22.0,SAR 793.32,793.32
4,"SAMSUNG Galaxy Tab A7 Lite Tablet - 32GB, 3GB ...",3.2 out of 5 stars,3.2,10.0,10.0,SAR 716.96,716.96
5,"2021 Apple 10.2-inch iPad (Wi-Fi, 64GB) - Spac...",4.2 out of 5 stars,4.2,37.0,37.0,,
6,"Lenovo Smart Tab M10 Plus, 10.3"" Android Table...",4.2 out of 5 stars,4.2,982.0,982.0,"SAR 2,102.00",2102.0
7,"Samsung Galaxy Tab S7 Wi-Fi, Mystic Navy - 256 GB",4.7 out of 5 stars,4.7,1307.0,1307.0,"SAR 2,293.47",2293.47
8,"Samsung Galaxy Tab S7+ Wi-Fi, Mystic Black - 2...",4.7 out of 5 stars,4.7,1307.0,1307.0,"SAR 3,152.63",3152.63
9,HUION HS64 Graphics Drawing Tablet with Batter...,4.4 out of 5 stars,4.4,50.0,50.0,SAR 111.20,111.2


# Best Sellers in Computer Tablets (Page 2)

In [198]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966433031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [199]:
response = requests.get(url)
response.status_code

200

In [200]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [201]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [202]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [203]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'SAR 575.60'

In [204]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [205]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Atouch Tablet, Kids Tab A10,10.1 inch Tablet,Dual Sim,4GB Ram,64GB Rom,4G Lite With Gifts (Black)




SAR 342.77
342.77


In [206]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [207]:
products_df20 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df20

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Atouch Tablet, Kids Tab A10,10.1 inch Tablet,D...",,,,,SAR 342.77,342.77
1,"Lenovo Tab M10 HD (2nd Gen), X306F, 10.1""HD Ta...",,,,,SAR 575.60,575.6
2,"2021 Apple 10.2-inch iPad (Wi-Fi, 64GB) - Silver",4.2 out of 5 stars,4.2,37.0,37.0,,
3,"Lenovo Tab M8 HD 2ND GEN (TB-8505F), 8 inch Ta...",3.8 out of 5 stars,3.8,21.0,21.0,SAR 484.95,484.95
4,"ATOUCH X10 7-Inch Tablet, Dual SIM, 32GB ROM,3...",,,,,SAR 213.27,213.27
5,5G WiFi 2 in 1 Tablet 10 inch Android 10.0 Tab...,3.0 out of 5 stars,3.0,25.0,25.0,SAR 732.90,732.9
6,AOYODKG 2 in 1 Tablet 10.1 inch Android 10.0 T...,3.9 out of 5 stars,3.9,49.0,49.0,SAR 786.50,786.5
7,"2020 Apple iPad Air (10.9-inch, Wi-Fi, 64GB) -...",4.4 out of 5 stars,4.4,157.0,157.0,"SAR 2,699.00",2699.0
8,"2021 Apple 10.2-inch iPad (Wi-Fi + Cellular, 2...",4.2 out of 5 stars,4.2,37.0,37.0,"SAR 2,849.00",2849.0
9,"C idea CM802 Android Tablet, 8Inch Tablet, 3gb...",,,,,SAR 280.28,280.28


In [216]:
df = pd.concat([products_df1,products_df2, products_df3, products_df4, products_df5, products_df6, products_df7, products_df8, 
             products_df9, products_df10, products_df11, products_df12, products_df13, products_df14, products_df15, 
             products_df16, products_df17, products_df18, products_df19, products_df20])

In [217]:
df

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB,...",3.0 out of 5 stars,3.0,2,2,SAR 539.00,539.00
1,Samsung Galaxy A12 LTE Dual SIM Smartphone - 6...,1.0 out of 5 stars,1.0,2,2,SAR 539.00,539.00
2,"Apple 20W USB-C Power Adapter, White",4.5 out of 5 stars,4.5,359,359,SAR 79.00,79.00
3,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.1 out of 5 stars,4.1,40,40,SAR 499.00,499.00
4,Apple AirPods with Charging Case,4.1 out of 5 stars,4.1,2777,2777,SAR 475.00,475.00
...,...,...,...,...,...,...,...
45,Lume Cube Broadcast Lighting Kit | Self Broadc...,4.4 out of 5 stars,4.4,1044,1044,SAR 674.69,674.69
46,Slim Folio Pro for iPad Pro 11-inch (1st and 2...,4.0 out of 5 stars,4.0,1,1,SAR 505.22,505.22
47,"8"" Windows Tablet with HDMI Port,1280x800 Touc...",2.5 out of 5 stars,2.5,3,3,SAR 659.83,659.83
48,"Lenovo Yoga Smart Tablet, 10.1"" FHD (1920x1200...",,,,,"SAR 1,026.26",1026.26


In [257]:
df['i'] = y
y=[]
for i in range (0,1000):
    y.append(i) 

In [262]:
df =df.set_index(df['i'])

In [263]:
df

Unnamed: 0_level_0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean,i
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,"SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB,...",3.0 out of 5 stars,3.0,2,2,SAR 539.00,539.00,0
1,Samsung Galaxy A12 LTE Dual SIM Smartphone - 6...,1.0 out of 5 stars,1.0,2,2,SAR 539.00,539.00,1
2,"Apple 20W USB-C Power Adapter, White",4.5 out of 5 stars,4.5,359,359,SAR 79.00,79.00,2
3,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.1 out of 5 stars,4.1,40,40,SAR 499.00,499.00,3
4,Apple AirPods with Charging Case,4.1 out of 5 stars,4.1,2777,2777,SAR 475.00,475.00,4
...,...,...,...,...,...,...,...,...
995,Lume Cube Broadcast Lighting Kit | Self Broadc...,4.4 out of 5 stars,4.4,1044,1044,SAR 674.69,674.69,995
996,Slim Folio Pro for iPad Pro 11-inch (1st and 2...,4.0 out of 5 stars,4.0,1,1,SAR 505.22,505.22,996
997,"8"" Windows Tablet with HDMI Port,1280x800 Touc...",2.5 out of 5 stars,2.5,3,3,SAR 659.83,659.83,997
998,"Lenovo Yoga Smart Tablet, 10.1"" FHD (1920x1200...",,,,,"SAR 1,026.26",1026.26,998
