# Amazon Best Sellers

# Import Libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns 
# this statement allows the visuals to render within your Jupyter Notebook
%matplotlib inline 
# You can configure the format of the images: ‘png’, ‘retina’, ‘jpeg’, ‘svg’, ‘pdf’.
%config InlineBackend.figure_format = 'png'
import csv

In [2]:
# Project for best sales in amazon.sa

# Best Sellers in Electronics (Page 1)

In [3]:
header = {'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'}
url = "https://www.amazon.sa/-/en/gp/bestsellers/electronics/?ie=UTF8&ref_=sv_sv_elec_all_1"

In [4]:
response = requests.get(url, {'headers':header})
response.status_code

200

In [5]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

# BeautifulSoup Basics

In [6]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [7]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [8]:
soup.find_all('li', {'class':'zg-item-immersion'})[0].find_all('a')[1].text

'\n3.0 out of 5 stars\n'

In [9]:
# create function to clean data 

In [10]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [11]:
#Test with the first electroni product to ensure functions are working
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))


SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB, 4GB RAM, LTE, Black (KSA Version)
3.0 out of 5 stars
3.0
2
2
SAR 539.00
539.00


In [12]:
soup.find_all("li", class_ = "zg-item-immersion")[3] 

<li class="zg-item-immersion" role="gridcell"><span class="a-list-item"><div class="a-section a-spacing-none aok-relative"><div class="a-row a-spacing-none aok-inline-block"><span class="a-size-small aok-float-left zg-badge-body zg-badge-color"><span class="zg-badge-text">#4</span></span><span class="aok-float-left zg-badge-triangle zg-badge-color"></span></div><span class="aok-inline-block zg-item"><a class="a-link-normal" href="/-/en/SAMSUNG-Galaxy-M12-Dual-Smartphone/dp/B08XY5X3J7?_encoding=UTF8&amp;psc=1"><span class="zg-text-center-align"><div class="a-section a-spacing-small"><img alt="SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB, 4GB RAM, 4G LTE, Black (KSA Version)" height="200" src="https://images-na.ssl-images-amazon.com/images/I/81DCMeVrbKS._AC_UL200_SR200,200_.jpg" width="200"/></div></span>
<div aria-hidden="true" class="p13n-sc-truncate p13n-sc-line-clamp-2" data-rows="2">
            SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB, 4GB RAM, 4G LTE, Black (KSA Version)
   

In [13]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

    

In [14]:
products_list

[{'pro_name': 'SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB, 4GB RAM, LTE, Black (KSA Version)',
  'pro_rate': '3.0 out of 5 stars',
  'pro_rate_clean': '3.0',
  'review_num': '2',
  'review_num_clean': '2',
  'pro_price': 'SAR 539.00',
  'pro_price_clean': '539.00'},
 {'pro_name': 'Samsung Galaxy A12 LTE Dual SIM Smartphone - 64GB Storage, 4GB RAM, Blue (KSA Version)',
  'pro_rate': '1.0 out of 5 stars',
  'pro_rate_clean': '1.0',
  'review_num': '2',
  'review_num_clean': '2',
  'pro_price': 'SAR 539.00',
  'pro_price_clean': '539.00'},
 {'pro_name': 'Apple 20W USB-C Power Adapter, White',
  'pro_rate': '4.5 out of 5 stars',
  'pro_rate_clean': '4.5',
  'review_num': '359',
  'review_num_clean': '359',
  'pro_price': 'SAR 79.00',
  'pro_price_clean': '79.00'},
 {'pro_name': 'SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB, 4GB RAM, 4G LTE, Black (KSA Version)',
  'pro_rate': '4.1 out of 5 stars',
  'pro_rate_clean': '4.1',
  'review_num': '40',
  'review_num_clean': '40',
  'pro_price

In [30]:
products_df1 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df1

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"ViewSonic M1 Mini DLP Pico Projector, 50 Lumens",4.0 out of 5 stars,4.0,3030.0,3030.0,SAR 829.00,829.0
1,BenQ Portable Projector GV1,3.5 out of 5 stars,3.5,287.0,287.0,"SAR 1,328.99",1328.99
2,MO C9 Mini LED Projector Supports Full HD 1080...,3.2 out of 5 stars,3.2,6.0,6.0,SAR 599.95,599.95
3,"Mini Projector, ELIKLIV Native 1080P Projector...",1.7 out of 5 stars,1.7,3.0,3.0,SAR 479.20,479.2
4,MOTIM YG300 LED Projector 1080P Projection Mac...,,,,,SAR 155.00,155.0
5,"Epson EB-FH06 3LCD, Full HD, 3500 Lumens, 332 ...",4.4 out of 5 stars,4.4,42.0,42.0,"SAR 3,901.02",3901.02
6,Aproca Hard Travel Case Compatible with Anker ...,4.9 out of 5 stars,4.9,17.0,17.0,SAR 176.71,176.71
7,Annefish Projector Portable Wifi Wireless High...,,,,,,
8,DLP Link 3D Gl 144Hz Rechargeable 3D Active Sh...,4.3 out of 5 stars,4.3,637.0,637.0,SAR 214.85,214.85
9,Mini Smart Android Wireless Projector,2.7 out of 5 stars,2.7,49.0,49.0,SAR 898.00,898.0


# Best Sellers in   Electronics (Page 2)

In [31]:
url = 'https://www.amazon.sa/-/en/gp/bestsellers/electronics/ref=zg_bs_pg_2?ie=UTF8&pg=2'

In [32]:
response = requests.get(url)
response.status_code

200

In [33]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [34]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [35]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [36]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n3.2 out of 5 stars\n'

In [37]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [38]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

SAMSUNG 980 PRO 500GB PCIe NVMe Gen4 Internal Gaming SSD M.2 (MZ-V8P500B)
4.8 out of 5 stars
4.8
2,581
2581
SAR 311.63
311.63


In [39]:
soup.find_all("li", class_ = "zg-item-immersion")[1] 

<li class="zg-item-immersion" role="gridcell"><span class="a-list-item"><div class="a-section a-spacing-none aok-relative"><div class="a-row a-spacing-none aok-inline-block"><span class="a-size-small aok-float-left zg-badge-body zg-badge-color"><span class="zg-badge-text">#52</span></span><span class="aok-float-left zg-badge-triangle zg-badge-color"></span></div><span class="aok-inline-block zg-item"><a class="a-link-normal" href="/-/en/SAMSUNG-Galaxy-Tab-Lite-Tablet/dp/B095WBLGXX?_encoding=UTF8&amp;psc=1"><span class="zg-text-center-align"><div class="a-section a-spacing-small"><img alt="SAMSUNG Galaxy Tab A7 Lite Tablet - 32GB, 3GB RAM, LTE, Gray (KSA Version)" height="200" src="https://images-na.ssl-images-amazon.com/images/I/41ZKC4i2pGS._AC_UL200_SR200,200_.jpg" width="200"/></div></span>
<div aria-hidden="true" class="p13n-sc-truncate p13n-sc-line-clamp-2" data-rows="2">
            SAMSUNG Galaxy Tab A7 Lite Tablet - 32GB, 3GB RAM, LTE, Gray (KSA Version)
        </div>
</a>
<div

In [40]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [41]:
products_df2 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df2

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,SAMSUNG 980 PRO 500GB PCIe NVMe Gen4 Internal ...,4.8 out of 5 stars,4.8,2581.0,2581.0,SAR 311.63,311.63
1,"SAMSUNG Galaxy Tab A7 Lite Tablet - 32GB, 3GB ...",3.2 out of 5 stars,3.2,10.0,10.0,SAR 595.00,595.0
2,Apple iPhone 12 Pro Max With FaceTime (128GB) ...,4.4 out of 5 stars,4.4,547.0,547.0,"SAR 4,399.00",4399.0
3,"2020 Apple iPad Air (10.9-inch, Wi-Fi, 64GB) -...",4.4 out of 5 stars,4.4,157.0,157.0,"SAR 2,449.00",2449.0
4,Xiaomi Mi Box S - 4K Ultra HDR Android TV Stre...,4.2 out of 5 stars,4.2,500.0,500.0,SAR 249.00,249.0
5,Samsung Galaxy S21 Ultra 5G Android Smartphone...,3.8 out of 5 stars,3.8,125.0,125.0,"SAR 3,499.00",3499.0
6,Apple iPhone 12 With FaceTime (256GB) - Purple,4.4 out of 5 stars,4.4,355.0,355.0,"SAR 3,299.00",3299.0
7,"Samsung Galaxy S20 FE 4G Android Smartphone, 1...",,,,,"SAR 1,599.00",1599.0
8,"6 PCS Camera Cover,Sliding Webcam Cover,Ultra ...",4.4 out of 5 stars,4.4,45.0,45.0,SAR 9.00,9.0
9,MSI MPG Z490 Gaming Plus Gaming Motherboard (A...,4.6 out of 5 stars,4.6,651.0,651.0,SAR 489.23,489.23


# Best Sellers in Camera & Photo Products (Page 1)

In [42]:
url = 'https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966385031/ref=zg_bs_nav_1_electronics'

In [43]:
response = requests.get(url)
response.status_code

200

In [44]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [45]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [46]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [47]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.3 out of 5 stars\n'

In [48]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [49]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

EZVIZ C6N Pan/Tilt Camera FHD Indoor,1080p WiFi Smart Home Security Camera IR Night Vision Motion Detection Auto Tracking Baby/Elder/Pet Cloud Storage/SD Slot 2-Way Audio Wi-Fi 2.4G iOS Android App
4.2 out of 5 stars
4.2
1,410
1410
SAR 98.00
98.00


In [35]:
soup.find_all("li", class_ = "zg-item-immersion")[1] 

<li class="zg-item-immersion" role="gridcell"><span class="a-list-item"><div class="a-section a-spacing-none aok-relative"><div class="a-row a-spacing-none aok-inline-block"><span class="a-size-small aok-float-left zg-badge-body zg-badge-color"><span class="zg-badge-text">#2</span></span><span class="aok-float-left zg-badge-triangle zg-badge-color"></span></div><span class="aok-inline-block zg-item"><a class="a-link-normal" href="/-/en/EZVIZ-Indoor-Security-Camera-Android/dp/B07ZC1Q6W6?_encoding=UTF8&amp;psc=1"><span class="zg-text-center-align"><div class="a-section a-spacing-small"><img alt="EZVIZ C6N FHD Indoor Security Camera WiFi Smart 2.4G with iOS and Android App" height="200" src="https://images-na.ssl-images-amazon.com/images/I/51bA8A2cU9L._AC_UL200_SR200,200_.jpg" width="200"/></div></span>
<div aria-hidden="true" class="p13n-sc-truncate p13n-sc-line-clamp-2" data-rows="2">
            EZVIZ C6N FHD Indoor Security Camera WiFi Smart 2.4G with iOS and Android App
        </div

In [51]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [52]:
products_df3 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df3

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"EZVIZ C6N Pan/Tilt Camera FHD Indoor,1080p WiF...",4.2 out of 5 stars,4.2,1410.0,1410.0,SAR 98.00,98.0
1,EZVIZ C6N FHD Indoor Security Camera WiFi Smar...,4.3 out of 5 stars,4.3,392.0,392.0,SAR 98.00,98.0
2,"6 PCS Camera Cover,Sliding Webcam Cover,Ultra ...",4.4 out of 5 stars,4.4,45.0,45.0,SAR 9.00,9.0
3,SanDisk Ultra Micro SD Card Android - Class 10...,4.4 out of 5 stars,4.4,116979.0,116979.0,SAR 35.00,35.0
4,SanDisk Ultra microSDXC 128GB 100MB/s Class 10...,4.4 out of 5 stars,4.4,116979.0,116979.0,SAR 45.00,45.0
5,Kioxia Exceria 64GB MicroSD Card - LMEX1L064GG2,4.6 out of 5 stars,4.6,961.0,961.0,SAR 22.03,22.03
6,AmazonBasics Braided 4K HDMI to HDMI Cable - 3...,4.7 out of 5 stars,4.7,9242.0,9242.0,SAR 9.00,9.0
7,Eufy Indoor Cam 2K Pan & Tilt Home Security Ca...,4.5 out of 5 stars,4.5,10.0,10.0,SAR 155.00,155.0
8,EZVIZ C2C 1080P Smart Home Security Wifi Camer...,,,,,SAR 69.00,69.0
9,"Imou 360 Degree WiFi Security Camera, Up to 25...",3.4 out of 5 stars,3.4,7.0,7.0,SAR 69.00,69.0


# Best Sellers in Camera & Photo Products (Page 2)

In [53]:
url ='https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966385031/ref=zg_bs_pg_2?ie=UTF8&pg=2'

In [54]:
response = requests.get(url)
response.status_code

200

In [55]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [56]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [57]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [58]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.2 out of 5 stars\n'

In [59]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [60]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

UGREEN 8K DisplayPort Cable Ultra HD DisplayPort 1.4 Male to Male Nylon Braided Cable SPCC Shell, Support 7680x4320 Resolution 8K@60Hz, 4K@144Hz, 2K@165Hz HDP HDCP Compatible for Gaming Monitor-3Meter
4.6 out of 5 stars
4.6
315
315
SAR 92.00
92.00


In [61]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [62]:
products_df4 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df4

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,UGREEN 8K DisplayPort Cable Ultra HD DisplayPo...,4.6 out of 5 stars,4.6,315.0,315.0,SAR 92.00,92.0
1,(Single Pack) - Carson MicroBrite Plus 60 -120...,4.2 out of 5 stars,4.2,30505.0,30505.0,SAR 84.50,84.5
2,"EZVIZ Wifi Smart Security Camera 360 Degree, D...",4.2 out of 5 stars,4.2,395.0,395.0,SAR 285.25,285.25
3,1.6 x 3M / 5 x 10FT Photography Studio Non-wov...,4.1 out of 5 stars,4.1,16.0,16.0,SAR 39.98,39.98
4,"EZVIZ C1C-B WiFi Security Camera, 12 Meters Ni...",4.1 out of 5 stars,4.1,17.0,17.0,SAR 89.00,89.0
5,"Phone Tripod,136cm Extendable Tripod Stand wit...",4.3 out of 5 stars,4.3,113.0,113.0,SAR 83.69,83.69
6,"Rankie Mini HDMI to HDMI Cable, High Speed Sup...",4.6 out of 5 stars,4.6,9389.0,9389.0,SAR 24.50,24.5
7,"eufy Security, eufyCam 2 Pro Wireless Home Sec...",4.5 out of 5 stars,4.5,107.0,107.0,SAR 438.00,438.0
8,Xiaomi Mi Home Security Camera 360 Degrees 2K ...,4.2 out of 5 stars,4.2,477.0,477.0,SAR 167.00,167.0
9,CanaKit Raspberry Pi 4 8GB Extreme Kit - 128GB...,4.8 out of 5 stars,4.8,1112.0,1112.0,SAR 723.03,723.03


# Best Sellers in Binoculars, Telescopes & Optics (Page 1) 

In [63]:
url ="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966400031/ref=zg_bs_nav_2_16966385031"


In [64]:
response = requests.get(url)
response.status_code

200

In [65]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [66]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [67]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [68]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.2 out of 5 stars\n'

In [69]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [70]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

GTHUNDER Digital Night Vision Goggles Binoculars for Total Darkness—Infrared Digital Night Vision Large Viewing Screen, 32GB Memory Card for Photo and Video Storage—Perfect for Surveillance
4.3 out of 5 stars
4.3
1,791
1791
SAR 760.00
760.00


In [71]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [72]:
products_df5 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df5

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,GTHUNDER Digital Night Vision Goggles Binocula...,4.3 out of 5 stars,4.3,1791.0,1791.0,SAR 760.00,760.0
1,(Single Pack) - Carson MicroBrite Plus 60 -120...,4.2 out of 5 stars,4.2,30505.0,30505.0,SAR 84.50,84.5
2,Apple Pencil Tips - 4 pack,4.8 out of 5 stars,4.8,8.0,8.0,SAR 98.00,98.0
3,Bushnell Falcon Binoculars 7x35 mm # 133410 - ...,4.5 out of 5 stars,4.5,4440.0,4440.0,SAR 147.90,147.9
4,Bushnell Falcon 10x50 Wide Angle Binoculars - ...,4.5 out of 5 stars,4.5,4443.0,4443.0,SAR 220.40,220.4
5,Celestron Portable Telescope Travel Scope 70,4.3 out of 5 stars,4.3,9014.0,9014.0,SAR 615.00,615.0
6,2.1X Eschenbach Max TV Glasses Distance Viewing,,,,,"SAR 1,152.26",1152.26
7,10X25 Small Compact Lightweight Binoculars for...,3.3 out of 5 stars,3.3,11.0,11.0,SAR 75.00,75.0
8,"16X52 Monocular Telescope, High Power Prism Co...",,,,,SAR 45.99,45.99
9,"Telescope, 70mm Aperture 400mm AZ Mount Astron...",,,,,SAR 325.42,325.42


# Best Sellers in Binoculars, Telescopes & Optics (Page 2)

In [73]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966400031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [74]:
response = requests.get(url)
response.status_code

200

In [75]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [76]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [77]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [78]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.4 out of 5 stars\n'

In [79]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [80]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Telescope Star Finder with Tripod F36050 HD Zoom Monocular Space Astronomical Spotting Scope for Kids and Beginner (Small)
3.3 out of 5 stars
3.3
841
841
SAR 216.54
216.54


In [81]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [82]:
products_df6 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df6

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,Telescope Star Finder with Tripod F36050 HD Zo...,3.3 out of 5 stars,3.3,841.0,841.0,SAR 216.54,216.54
1,"HUTACT 10x42 Binoculars for Adults, with Stora...",4.4 out of 5 stars,4.4,1180.0,1180.0,SAR 240.00,240.0
2,"SCOKC Binoculars 20x50 High Power, Compact HD ...",4.7 out of 5 stars,4.7,12.0,12.0,SAR 359.00,359.0
3,DMG-Digital Microscope 50x To 1000x Magnificat...,3.0 out of 5 stars,3.0,1.0,1.0,,
4,12X50 HD Monocular Telescope with Quick Smartp...,,,,,SAR 149.99,149.99
5,ARCTIC P12 PWM (Black/Black) - Pressure-optimi...,4.6 out of 5 stars,4.6,27894.0,27894.0,SAR 71.00,71.0
6,"Gskyer Telescope, 70mm Aperture 400mm AZ Mount...",4.4 out of 5 stars,4.4,15236.0,15236.0,SAR 768.60,768.6
7,"Bysameyee USB Digital Microscope 40X to 1000X,...",3.0 out of 5 stars,3.0,1.0,1.0,SAR 139.00,139.0
8,Newdoar 25x30 Zoomable Monocular Vintage Pirat...,4.2 out of 5 stars,4.2,132.0,132.0,SAR 126.00,126.0
9,"Camera Diaphragm, Compact Iris Aperture Adjust...",,,,,SAR 195.89,195.89


# Best Sellers in Computers, Components & Accessories (Page 1)

In [83]:
url= " https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966388031/ref=zg_bs_nav_1_electronics"

In [84]:
response = requests.get(url)
response.status_code

200

In [85]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [86]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [87]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [88]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.1 out of 5 stars\n'

In [89]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [90]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Stylus Pen for iPad with Palm Rejection, Active Pencil Compatible with (2018-2021) iPad Pro 11 & 12.9 inch, iPad 9th/8th/7th/6th Gen, iPad Air 4th/3rd Gen,iPad Mini 6th/5th Gen
3.4 out of 5 stars
3.4
5,548
5548
SAR 58.75
58.75


In [91]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [92]:
products_df7 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df7

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Stylus Pen for iPad with Palm Rejection, Activ...",3.4 out of 5 stars,3.4,5548,5548,SAR 58.75,58.75
1,530pcs/set Heat Shrink Tubing Insulation Shrin...,4.1 out of 5 stars,4.1,118,118,SAR 22.00,22.0
2,"UGREEN USB C Cable 2M, Braided 100W Power Deli...",4.2 out of 5 stars,4.2,3504,3504,SAR 40.80,40.8
3,"Anker USB C to Lightning Cable (6 ft), Powerli...",4.0 out of 5 stars,4.0,9,9,SAR 46.95,46.95
4,"TP-Link AC1750 Wi-Fi Range Extender - RE450, W...",4.0 out of 5 stars,4.0,5917,5917,SAR 190.00,190.0
5,Glorious Large Gaming Mouse Pad 11''x13'' - Black,4.7 out of 5 stars,4.7,7992,7992,SAR 37.00,37.0
6,Anker PowerLine III USB-C to USB-C Cable USB-C...,5.0 out of 5 stars,5.0,7,7,SAR 26.00,26.0
7,Razer Seiren Mini - USB Condenser Microphone f...,4.6 out of 5 stars,4.6,407,407,SAR 149.00,149.0
8,Kingston A400 SATA SSD Solid State Drive 2.5 I...,4.5 out of 5 stars,4.5,31735,31735,SAR 209.00,209.0
9,SAMSUNG 980 PRO 500GB PCIe NVMe Gen4 Internal ...,4.8 out of 5 stars,4.8,2581,2581,SAR 311.63,311.63


 # Best Sellers in Computers, Components & Accessories (Page 2)

In [93]:
url ="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966388031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [94]:
response = requests.get(url)
response.status_code

200

In [95]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [96]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [97]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [98]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.5 out of 5 stars\n'

In [99]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [100]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

3Pack Original [Apple MFi Certified] Charger Lightning to USB Cable Compatible iPhone 11 Pro/11/XS MAX/XR/8/7/6s/6/plus,iPad Pro/Air/Mini,iPod Touch(White 1M/3.3FT)
4.5 out of 5 stars
4.5
4,135
4135
SAR 60.00
60.00


In [101]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [102]:
products_df8 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df8

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,3Pack Original [Apple MFi Certified] Charger L...,4.5 out of 5 stars,4.5,4135.0,4135.0,SAR 60.00,60.0
1,SteelSeries Arctis 7 - Lossless Wireless Gamin...,4.5 out of 5 stars,4.5,25697.0,25697.0,SAR 427.38,427.38
2,"4 Pack Cable Management Sleeve, Cord Organizer...",4.2 out of 5 stars,4.2,42.0,42.0,SAR 27.21,27.21
3,SanDisk Ultra Dual Drive USB Type-C Flash Driv...,4.4 out of 5 stars,4.4,44681.0,44681.0,SAR 57.95,57.95
4,Samsung Electronics (MZ-V8V1T0B/AM) 980 SSD 1T...,4.7 out of 5 stars,4.7,1574.0,1574.0,SAR 559.00,559.0
5,UGREEN iPhone Lightning Cable [MFi Certified] ...,4.1 out of 5 stars,4.1,105.0,105.0,SAR 50.40,50.4
6,UGREEN USB C Cable USB A to Type C Data Cable ...,4.4 out of 5 stars,4.4,121.0,121.0,SAR 31.20,31.2
7,[2 Pack] ProCase Screen Protector for Galaxy T...,3.8 out of 5 stars,3.8,37.0,37.0,SAR 46.79,46.79
8,UGREEN iPhone 13 Charger Cable [MFi Certified]...,4.1 out of 5 stars,4.1,105.0,105.0,SAR 50.40,50.4
9,HyperX QuadCast - USB Condenser Gaming Microph...,4.8 out of 5 stars,4.8,12188.0,12188.0,SAR 499.00,499.0


# Best Sellers in Headphones, Earbuds & Accessories (Page 1 )

In [103]:
url = "https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966390031/ref=zg_bs_pg_1?ie=UTF8&pg=1"

In [104]:
response = requests.get(url)
response.status_code

200

In [105]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [106]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [107]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [108]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.1 out of 5 stars\n'

In [109]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [110]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

New Apple Airpods Pro
4.3 out of 5 stars
4.3
2,355
2355
SAR 749.00
749.00


In [111]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [112]:
products_df9 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df9

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,New Apple Airpods Pro,4.3 out of 5 stars,4.3,2355,2355,SAR 749.00,749.0
1,Apple AirPods with Charging Case,4.1 out of 5 stars,4.1,2777,2777,SAR 475.00,475.0
2,New Apple AirPods (3rd generation),4.4 out of 5 stars,4.4,26,26,SAR 829.00,829.0
3,Sony WI-C200 Wireless In-ear Bluetooth Headpho...,3.8 out of 5 stars,3.8,263,263,SAR 93.00,93.0
4,SoundPEATS Air3 Wireless Earbuds Mini Bluetoot...,4.0 out of 5 stars,4.0,314,314,SAR 143.20,143.2
5,Sony WH-1000XM4 Wireless Noise Cancelling Blue...,4.4 out of 5 stars,4.4,63,63,SAR 930.00,930.0
6,Wireless Earbuds SoundPEATS TrueAir2 Bluetooth...,3.8 out of 5 stars,3.8,2095,2095,SAR 127.20,127.2
7,"Anker Soundcore Life Q30 Bluetooth Headphones,...",4.6 out of 5 stars,4.6,11997,11997,SAR 249.00,249.0
8,"JBL In-Ear Headphones, Black, T110",4.2 out of 5 stars,4.2,17965,17965,SAR 28.15,28.15
9,Apple AirPods Pro,4.3 out of 5 stars,4.3,2355,2355,SAR 785.00,785.0


# Best Sellers in Headphones, Earbuds & Accessories (Page 2 )

In [113]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966390031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [114]:
response = requests.get(url)
response.status_code

200

In [115]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [116]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [117]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [118]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n3.7 out of 5 stars\n'

In [119]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [120]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Wireless Earbuds SoundPEATS TrueAir2 Bluetooth V5.2 Wireless Earphones with Qualcomm QCC3040, True-Wireless Mirroring, 4 Mics for Clear Calls and CVC 8.0 Noise Cancellation, aptX Codec, Total 25 Hours
3.8 out of 5 stars
3.8
2,095
2095
SAR 132.00
132.00


In [121]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [122]:
products_df10 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df10

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,Wireless Earbuds SoundPEATS TrueAir2 Bluetooth...,3.8 out of 5 stars,3.8,2095.0,2095.0,SAR 132.00,132.0
1,Sony WH-CH510 Wireless Bluetooth On-Ear with M...,3.7 out of 5 stars,3.7,113.0,113.0,SAR 142.00,142.0
2,ElloGear 2020 Earbuds Stereo Headphones for Sa...,4.0 out of 5 stars,4.0,2059.0,2059.0,SAR 29.00,29.0
3,For iPhone X 8 7 Plus Bluetooth Earphones Wire...,2.5 out of 5 stars,2.5,35.0,35.0,SAR 15.98,15.98
4,"Nokia Lite Earbuds BH-205 - Black, Normal",3.3 out of 5 stars,3.3,17.0,17.0,SAR 107.00,107.0
5,HUAWEI FreeBuds 4i Wireless In-Ear Bluetooth E...,4.4 out of 5 stars,4.4,15.0,15.0,SAR 289.00,289.0
6,JOYROOM JR-T03s white Wireless bluetooth headp...,2.7 out of 5 stars,2.7,95.0,95.0,SAR 57.74,57.74
7,TOSHIBA RZE-BT1200H Wireless Headphones Black,4.5 out of 5 stars,4.5,18.0,18.0,SAR 149.00,149.0
8,"SAMSUNG Galaxy Buds Pro, True Wireless Earbuds...",4.2 out of 5 stars,4.2,119.0,119.0,SAR 580.00,580.0
9,"TPU Silicone Case for AirPods Pro, Heavy Duty ...",3.5 out of 5 stars,3.5,31.0,31.0,SAR 28.55,28.55


# Best Sellers in Mobile Phones & Communication Products (Page 1 )

In [123]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966387031/ref=zg_bs_nav_1_electronics"

In [124]:
response = requests.get(url)
response.status_code

200

In [125]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [126]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [127]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [128]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n3.0 out of 5 stars\n'

In [129]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [130]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB, 4GB RAM, 4G LTE, Black (KSA Version)
4.1 out of 5 stars
4.1
40
40
SAR 499.00
499.00


In [131]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [132]:
products_df11 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df11

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.1 out of 5 stars,4.1,40.0,40.0,SAR 499.00,499.0
1,"SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB,...",3.0 out of 5 stars,3.0,2.0,2.0,SAR 539.00,539.0
2,Samsung Galaxy A12 LTE Dual SIM Smartphone - 6...,1.0 out of 5 stars,1.0,2.0,2.0,SAR 539.00,539.0
3,"Apple 20W USB-C Power Adapter, White",4.5 out of 5 stars,4.5,363.0,363.0,SAR 79.00,79.0
4,Samsung Galaxy A72 | Ultra High-Res Quad Camer...,4.2 out of 5 stars,4.2,24.0,24.0,"SAR 1,499.00",1499.0
5,Apple iPhone 12 Pro With FaceTime (256GB) - Pa...,4.2 out of 5 stars,4.2,346.0,346.0,"SAR 4,199.00",4199.0
6,New Apple Airpods Pro,4.3 out of 5 stars,4.3,2355.0,2355.0,SAR 749.00,749.0
7,"Anker PowerCore Select 20000, 20000mAh Power B...",4.0 out of 5 stars,4.0,56.0,56.0,SAR 94.00,94.0
8,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.2 out of 5 stars,4.2,22.0,22.0,SAR 499.00,499.0
9,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.5 out of 5 stars,4.5,10.0,10.0,SAR 499.00,499.0


# Best Sellers in Mobile Phones & Communication Products (Page 2 )

In [133]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966387031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [134]:
response = requests.get(url)
response.status_code

200

In [135]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [136]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [137]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [138]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.1 out of 5 stars\n'

In [139]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [140]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Wireless Earbuds, SOUNDPEATS S5 Over-Ear Hooks Headphones Bluetooth 5.0 Stereo Sound Wireless Earphones with Touch Control IPX7 Waterproof for Sports, 12 mm Driver, USB-C Charge
4.3 out of 5 stars
4.3
192
192
SAR 103.20
103.20


In [141]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [142]:
products_df12 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df12

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Wireless Earbuds, SOUNDPEATS S5 Over-Ear Hooks...",4.3 out of 5 stars,4.3,192,192,SAR 103.20,103.2
1,"Samsung Galaxy S20 FE Dual SIM - 128GB, 8GB RA...",4.1 out of 5 stars,4.1,15,15,"SAR 1,779.00",1779.0
2,Apple iPhone 12 With FaceTime (128GB) - Blue,4.4 out of 5 stars,4.4,355,355,"SAR 3,599.00",3599.0
3,UGREEN 2 Pack Screen Protector for iPhone 13/1...,4.4 out of 5 stars,4.4,91,91,SAR 47.00,47.0
4,HUAWEI HW-TIA-B09-BLACK WATCH FIT Smartwatch w...,4.5 out of 5 stars,4.5,2640,2640,SAR 331.00,331.0
5,"Samsung Galaxy Note20 5G, Pro Grade Triple Cam...",4.1 out of 5 stars,4.1,32,32,"SAR 2,399.00",2399.0
6,SanDisk Ultra microSDXC 128GB 100MB/s Class 10...,4.4 out of 5 stars,4.4,116979,116979,SAR 45.00,45.0
7,UGREEN Clear iPhone 12/12Pro Case Compatible F...,4.1 out of 5 stars,4.1,450,450,SAR 29.60,29.6
8,Samsung Galaxy S21 Ultra 5G Android Smartphone...,3.8 out of 5 stars,3.8,125,125,"SAR 3,499.00",3499.0
9,"Anker USB C Charger, 20W PIQ 3.0 Fast Charger ...",4.2 out of 5 stars,4.2,135,135,SAR 49.00,49.0


# Best Sellers in Wearable Technology (Page 1)

In [143]:
url= "https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966397031/ref=zg_bs_pg_1?ie=UTF8&pg=1"

In [144]:
response = requests.get(url)
response.status_code

200

In [145]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [146]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [147]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [148]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.6 out of 5 stars\n'

In [149]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [150]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Apple Watch Series 6 (GPS, 44mm) - Space Grey Aluminium Case with Black Sport Band
4.6 out of 5 stars
4.6
307
307
SAR 1,349.00
1349.00


In [151]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [152]:
products_df13 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df13

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Apple Watch Series 6 (GPS, 44mm) - Space Grey ...",4.6 out of 5 stars,4.6,307.0,307.0,"SAR 1,349.00",1349.0
1,"Apple Watch Series 6 (GPS, 44mm) - Blue Alumin...",4.6 out of 5 stars,4.6,307.0,307.0,"SAR 1,349.00",1349.0
2,"Apple Watch Series 6 (GPS, 40mm) - Gold Alumin...",4.6 out of 5 stars,4.6,307.0,307.0,"SAR 1,199.00",1199.0
3,"HUAWEI Band 6, All-day SpO2 Monitoring, 1.47"" ...",4.4 out of 5 stars,4.4,818.0,818.0,SAR 189.00,189.0
4,SoundPEATS Smart Watch New Upgraded 13 Sports ...,4.1 out of 5 stars,4.1,532.0,532.0,SAR 159.00,159.0
5,Compatible with Apple Watch Case Series 4 Seri...,3.9 out of 5 stars,3.9,205.0,205.0,SAR 12.79,12.79
6,HUAWEI HW-TIA-B09-BLACK WATCH FIT Smartwatch w...,4.5 out of 5 stars,4.5,2640.0,2640.0,SAR 331.00,331.0
7,"Apple Watch Series 6 (GPS, 40mm) - Space Grey ...",4.6 out of 5 stars,4.6,307.0,307.0,"SAR 1,199.00",1199.0
8,"Fitbit-Inspire 2, Black/Black",4.5 out of 5 stars,4.5,18392.0,18392.0,SAR 259.00,259.0
9,Milanese Loop Bracelet Stainless Steel band Fo...,4.3 out of 5 stars,4.3,29.0,29.0,SAR 20.53,20.53


# Best Sellers in Wearable Technology (Page 2)

In [153]:
url = "https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966397031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [154]:
response = requests.get(url)
response.status_code

200

In [155]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [156]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [157]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [158]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.7 out of 5 stars\n'

In [159]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [160]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Apple Watch Magnetic Charging Cable (1 m)
3.0 out of 5 stars
3.0
1
1
SAR 99.00
99.00


In [161]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [162]:
products_df14 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df14

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,Apple Watch Magnetic Charging Cable (1 m),3.0 out of 5 stars,3.0,1.0,1.0,SAR 99.00,99.0
1,"HUAWEI WATCH FIT Elegant Smartwatch, Stainless...",4.7 out of 5 stars,4.7,39.0,39.0,SAR 449.00,449.0
2,hw22 plus Smart Watch series 6 Bluetooth Call ...,2.4 out of 5 stars,2.4,9.0,9.0,SAR 99.00,99.0
3,"Apple Watch Series 7 (GPS, 45mm) - Starlight A...",5.0 out of 5 stars,5.0,1.0,1.0,"SAR 1,879.00",1879.0
4,Smart Watch 42mm Midnight Blue Sport Band,2.8 out of 5 stars,2.8,5.0,5.0,SAR 17.84,17.84
5,GEEAN Stainless Steel Watch Band Metal Strap f...,4.1 out of 5 stars,4.1,37.0,37.0,SAR 43.00,43.0
6,"OMIRA Smart Watch, Fitness Tracker with Heart ...",3.8 out of 5 stars,3.8,34.0,34.0,SAR 119.98,119.98
7,"EURCRBU Compatible with HUAWEI Watch GT, SAMSU...",,,,,SAR 12.80,12.8
8,Skullcandy Dime True Wireless Earbuds with 12 ...,4.1 out of 5 stars,4.1,3431.0,3431.0,SAR 99.00,99.0
9,"Jsbaby Smart Watch for Kids,Kids Smart Watch W...",,,,,SAR 220.71,220.71


# Best Sellers in Car & Vehicle Electronics (Page 1)

In [163]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966386031/ref=zg_bs_nav_1_electronics"

In [164]:
response = requests.get(url)
response.status_code

200

In [165]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [166]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [167]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [168]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.4 out of 5 stars\n'

In [169]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [170]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

UGREEN Fast Car Charger Adapter 42.5W Dual USB Quick Charge QC 3.0 and PD Fast Charging Car Plug for iPhone 13 Pro/13 Pro Max/13/13 mini/12 Pro Max/11Pro Max,New iPad 9,iPad mini 6,Galaxy S20 S10,etc
4.3 out of 5 stars
4.3
2,192
2192
SAR 63.20
63.20


In [171]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [172]:
products_df15 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df15

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,UGREEN Fast Car Charger Adapter 42.5W Dual USB...,4.3 out of 5 stars,4.3,2192.0,2192.0,SAR 63.20,63.2
1,UGREEN Car Phone Mount Dashboard Car Holder Wi...,4.4 out of 5 stars,4.4,1387.0,1387.0,SAR 76.00,76.0
2,UGREEN PD 20W Car Charger Fast Charging for iP...,4.3 out of 5 stars,4.3,2192.0,2192.0,SAR 44.00,44.0
3,UGREEN Magnetic Car Phone Holder Air Vent Moun...,4.0 out of 5 stars,4.0,1773.0,1773.0,SAR 69.60,69.6
4,UGREEN Car Phone Holder Air Vent Cell Phone Mo...,4.3 out of 5 stars,4.3,665.0,665.0,SAR 36.00,36.0
5,ANKER POWERDRIVE 2 QC BLACK,3.9 out of 5 stars,3.9,33.0,33.0,SAR 43.00,43.0
6,"UGREEN Bluetooth Aux Adapter, Bluetooth 5.0 Au...",4.5 out of 5 stars,4.5,218.0,218.0,SAR 93.60,93.6
7,UGREEN Car Phone Holder Magnetic Dashboard Mob...,4.2 out of 5 stars,4.2,3136.0,3136.0,SAR 52.00,52.0
8,UGREEN Car Air Vent Mount Cell Phone Holder Gr...,4.4 out of 5 stars,4.4,19686.0,19686.0,SAR 58.40,58.4
9,"Muson Car Phone Mount, Dashboard/Air Vent/Wind...",3.7 out of 5 stars,3.7,9.0,9.0,SAR 60.00,60.0


# Best Sellers in Car & Vehicle Electronics (Page 2)

In [173]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966386031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [174]:
response = requests.get(url)
response.status_code

200

In [175]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [176]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [177]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [178]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'SAR 56.94'

In [179]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [180]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

UGREEN Car Phone Holder Stand for Dashboard Adjustable Windshield Car Mobile Mount Dock Compatible with iPhone 13/13 mini/13Pro/13Pro Max/12/11 pro max/Galaxy S21 ultra A52 Huawei P30 Pro Mate 40 Pro
3.9 out of 5 stars
3.9
23
23
SAR 83.00
83.00


In [181]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [182]:
products_df16 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df16

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,UGREEN Car Phone Holder Stand for Dashboard Ad...,3.9 out of 5 stars,3.9,23.0,23.0,SAR 83.00,83.0
1,NNAA Vehicle Navigator Sunshade Visor GPS Navi...,,,,,SAR 56.94,56.94
2,Baseus T typed S-09A Bluetooth MP3 car charger...,4.3 out of 5 stars,4.3,46.0,46.0,SAR 66.02,66.02
3,"UGREEN Bike Phone Holder, Bicycle Motorcycle P...",4.3 out of 5 stars,4.3,140.0,140.0,SAR 52.00,52.0
4,Universal Bluetooth Aux Music Receiver 3.5mm S...,2.4 out of 5 stars,2.4,157.0,157.0,SAR 11.00,11.0
5,UGREEN Dashboard Phone Holder 360 Degree Car P...,3.6 out of 5 stars,3.6,817.0,817.0,SAR 69.00,69.0
6,Bluetooth V2.0 7 Inch 2 DIN Car Video Stereo P...,3.1 out of 5 stars,3.1,16.0,16.0,,
7,Universal Dashboard Car Mount Holder Rearview ...,3.0 out of 5 stars,3.0,36.0,36.0,SAR 19.58,19.58
8,UGREEN Bluetooth FM Transmitter USB Car Fast C...,3.7 out of 5 stars,3.7,42.0,42.0,SAR 108.00,108.0
9,"LENCENT FM Transmitter, Bluetooth FM Transmitt...",4.4 out of 5 stars,4.4,8301.0,8301.0,SAR 73.73,73.73


# Best Sellers in Electrical Power Accessories (Page 1)

In [183]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966395031/ref=zg_bs_nav_1_electronics"

In [184]:
response = requests.get(url)
response.status_code

200

In [185]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [186]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [187]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [188]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.0 out of 5 stars\n'

In [189]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [190]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

Apple 20W USB-C Power Adapter, White
4.5 out of 5 stars
4.5
363
363
SAR 79.00
79.00


In [191]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [192]:
products_df17 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df17

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"Apple 20W USB-C Power Adapter, White",4.5 out of 5 stars,4.5,363.0,363.0,SAR 79.00,79.0
1,Extension Cord with 3 Power Socket and 3 USB S...,4.0 out of 5 stars,4.0,43.0,43.0,SAR 59.99,59.99
2,"FLIZIL Fast Charging Universal Travel Adapter,...",4.5 out of 5 stars,4.5,24.0,24.0,SAR 79.99,79.99
3,Multi Extension Socket Plug Adapter with 3 USB...,4.6 out of 5 stars,4.6,34.0,34.0,SAR 72.79,72.79
4,"FLIZIL Lightning Cable, Apple MFi Certified Li...",4.5 out of 5 stars,4.5,24.0,24.0,SAR 23.77,23.77
5,"LDNIO SC3604 Socket extension, 6USB Ports 3.4A...",3.9 out of 5 stars,3.9,99.0,99.0,SAR 57.00,57.0
6,FLIZIL Power Strip Extension with 4 way Socket...,4.5 out of 5 stars,4.5,24.0,24.0,SAR 44.99,44.99
7,Belkin BSV804ar2M 8-Outlet Surge Protection Ex...,4.5 out of 5 stars,4.5,98.0,98.0,SAR 128.99,128.99
8,5.6A Fast Charging Worldwide Travel Adapter Pl...,4.6 out of 5 stars,4.6,48.0,48.0,SAR 98.00,98.0
9,"Travel Adapter for KSA/UAE/UK, Plug for US/EU/...",4.4 out of 5 stars,4.4,53.0,53.0,SAR 80.00,80.0


# Best Sellers in Electrical Power Accessories (Page 2)

In [193]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966395031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [194]:
response = requests.get(url)
response.status_code

200

In [195]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [196]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [197]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [198]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.0 out of 5 stars\n'

In [199]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [200]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

UK to EU AC Power Adapter Plug Adapter - Travel Charger Power Plug Adapter - Dual Inputs - Safe Grounded - Universal Socket, Plug Kit for The Most of Europe,10A/16A 240V
3.4 out of 5 stars
3.4
4
4
SAR 26.07
26.07


In [201]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [202]:
products_df18 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df18

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,UK to EU AC Power Adapter Plug Adapter - Trave...,3.4 out of 5 stars,3.4,4.0,4.0,SAR 26.07,26.07
1,Powerology 4 AC 3 USB & USB-C PD 35W Multiport...,4.0 out of 5 stars,4.0,2.0,2.0,SAR 138.00,138.0
2,Belkin BSV604ar2M 6 Way/ 6 Plug 2m Surge Prote...,4.5 out of 5 stars,4.5,98.0,98.0,SAR 109.00,109.0
3,Safemore Extension Cord Lead Electrical Surge ...,4.4 out of 5 stars,4.4,6.0,6.0,SAR 249.99,249.99
4,Anker Extension Lead with 2 USB Ports and 4 Wa...,5.0 out of 5 stars,5.0,1.0,1.0,SAR 89.00,89.0
5,"Universal Travel Adapter, NTONPOWER Worldwide ...",,,,,SAR 69.99,69.99
6,LDNIO 4 Power Socket Extension 4 USB (18w) Def...,4.6 out of 5 stars,4.6,4.0,4.0,SAR 81.00,81.0
7,Lawazim 2-Socket Extension Strip with 5 Meters...,3.7 out of 5 stars,3.7,22.0,22.0,SAR 39.00,39.0
8,YUNSYE Power Strips Extension Lead w 6 USB Wal...,3.6 out of 5 stars,3.6,16.0,16.0,SAR 79.98,79.98
9,فيش كهرباء Universal AU US EU to UK AC POWER P...,2.1 out of 5 stars,2.1,5.0,5.0,SAR 14.98,14.98


# Best Sellers in Computer Tablets (Page 1)

In [203]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966433031/ref=zg_bs_nav_1_electronics"

In [204]:
response = requests.get(url)
response.status_code

200

In [205]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [206]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [207]:

print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [208]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n4.4 out of 5 stars\n'

In [209]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [210]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

SAMSUNG Galaxy Tab A7 Lite Tablet - 32GB, 3GB RAM, LTE, Gray (KSA Version)
3.2 out of 5 stars
3.2
10
10
SAR 595.00
595.00


In [211]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [212]:
products_df19 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df19

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"SAMSUNG Galaxy Tab A7 Lite Tablet - 32GB, 3GB ...",3.2 out of 5 stars,3.2,10.0,10.0,SAR 595.00,595.0
1,"2020 Apple iPad Air (10.9-inch, Wi-Fi, 64GB) -...",4.4 out of 5 stars,4.4,157.0,157.0,"SAR 2,449.00",2449.0
2,"2021 Apple iPad Pro (11-inch, Wi-Fi, 128GB) - ...",4.6 out of 5 stars,4.6,54.0,54.0,"SAR 3,499.00",3499.0
3,Samsung Electronics A7 Tablet 10.4 Wi-Fi 64GB ...,4.1 out of 5 stars,4.1,22.0,22.0,SAR 793.32,793.32
4,"2021 Apple iPad Pro (12.9-inch, Wi-Fi, 128GB) ...",4.9 out of 5 stars,4.9,27.0,27.0,"SAR 4,399.00",4399.0
5,"Samsung Electronics Galaxy Tab S7 Wi-Fi, Mysti...",4.7 out of 5 stars,4.7,1307.0,1307.0,"SAR 2,293.47",2293.47
6,"SAMSUNG Galaxy Tab A7 Lite Tablet - 32GB, 3GB ...",3.2 out of 5 stars,3.2,10.0,10.0,SAR 595.00,595.0
7,"SAMSUNG Galaxy Tab A7 Lite Tablet - 32GB, 3GB ...",3.2 out of 5 stars,3.2,10.0,10.0,SAR 666.00,666.0
8,HUION HS64 Graphics Drawing Tablet with Batter...,4.4 out of 5 stars,4.4,51.0,51.0,SAR 111.20,111.2
9,"Microsoft Surface GO 2 [STV-00005], Tablet-PC,...",4.5 out of 5 stars,4.5,11.0,11.0,"SAR 1,499.00",1499.0


# Best Sellers in Computer Tablets (Page 2)

In [213]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966433031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [214]:
response = requests.get(url)
response.status_code

200

In [215]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [216]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [217]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [218]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n2.8 out of 5 stars\n'

In [219]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [220]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

2021 Apple iPad Pro (12.9-inch, Wi-Fi + Cellular, 256GB) - Space Grey (5th Generation)
4.9 out of 5 stars
4.9
27
27
SAR 6,199.00
6199.00


In [221]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [222]:
products_df20 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df20

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"2021 Apple iPad Pro (12.9-inch, Wi-Fi + Cellul...",4.9 out of 5 stars,4.9,27.0,27.0,"SAR 6,199.00",6199.0
1,"Kids tablets Datazone, 7.0 inch Dual Camera Wi...",2.8 out of 5 stars,2.8,5.0,5.0,SAR 170.00,170.0
2,"Microsoft Surface Go 2 Tablet STQ-00005, Intel...",4.2 out of 5 stars,4.2,18.0,18.0,"SAR 1,999.00",1999.0
3,"Atouch Tablet, Kids Tab A10,10.1 inch Tablet,D...",,,,,SAR 342.47,342.47
4,Samsung Galaxy Tab S7 FE 2021 Android Tablet 1...,4.5 out of 5 stars,4.5,15.0,15.0,"SAR 1,858.29",1858.29
5,"Samsung Galaxy Tab S7+ Wi-Fi, Mystic Black - 5...",4.7 out of 5 stars,4.7,1307.0,1307.0,"SAR 3,454.50",3454.5
6,"2021 Apple iPad mini (Wi-Fi, 256GB) - Space Grey",4.1 out of 5 stars,4.1,12.0,12.0,"SAR 3,049.00",3049.0
7,"Hyjoy 8 inch Kids Tablet, 1920 1200 IPS FHD Di...",4.3 out of 5 stars,4.3,142.0,142.0,SAR 506.23,506.23
8,Lenovo Tab P11 with Keyboard Pack and Precisio...,4.0 out of 5 stars,4.0,1.0,1.0,,
9,"SAMSUNG Galaxy Tab S6, 10.5"", 6G RAM, 128GB, L...",4.7 out of 5 stars,4.7,19.0,19.0,"SAR 2,851.41",2851.41


# Best Sellers in Home Video Projectors (Page 1 )

In [223]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966457031/ref=zg_bs_nav_2_16966392031"

In [224]:
response = requests.get(url)
response.status_code

200

In [225]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [226]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [227]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [228]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'SAR 256.95'

In [229]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [230]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

YG-300 LCD Mini Portable Projector with USB/SD/AV/HDMI Slots - Yellow
2.6 out of 5 stars
2.6
160
160
SAR 153.00
153.00


In [231]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [232]:
products_df21 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df21

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,YG-300 LCD Mini Portable Projector with USB/SD...,2.6 out of 5 stars,2.6,160.0,160.0,SAR 153.00,153.0
1,Mini Projector - Portable Smart Home Mobile Ph...,,,,,SAR 256.95,256.95
2,"XGIMI MoGo Pro+ portable smart home projector,...",,,,,"SAR 2,599.00",2599.0
3,Video Projector 3D DLP 1080P HD 3800 Lumens Wi...,3.6 out of 5 stars,3.6,28.0,28.0,"SAR 1,485.59",1485.59
4,"FunLites Upgraded Full HD Projector, Native 10...",4.1 out of 5 stars,4.1,48.0,48.0,SAR 439.99,439.99
5,BlissLights Sky Lite - LED Laser Star Projecto...,4.6 out of 5 stars,4.6,36876.0,36876.0,SAR 512.31,512.31
6,"Benq Proiector DLP 3,600 lumens , White",3.9 out of 5 stars,3.9,15.0,15.0,"SAR 1,899.00",1899.0
7,Fegishilly 3 in 1 Star Galaxy Projector Moving...,,,,,SAR 168.96,168.96
8,"Anker Nebula Solar Portable 1080p Projector, F...",,,,,"SAR 2,980.00",2980.0
9,"Star Projector, Galaxy Projector with Remote C...",4.5 out of 5 stars,4.5,6422.0,6422.0,SAR 152.00,152.0


# Best Sellers in Home Video Projectors (Page 2 )

In [233]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966457031/ref=zg_bs_pg_2?ie=UTF8&pg=2"

In [234]:
response = requests.get(url)
response.status_code

200

In [235]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [236]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [237]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [238]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n3.5 out of 5 stars\n'

In [239]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""
    return

In [240]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

ViewSonic M1 Mini DLP Pico Projector, 50 Lumens
4.0 out of 5 stars
4.0
3,030
3030
SAR 829.00
829.00


In [242]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [243]:
products_df22 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df22

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"ViewSonic M1 Mini DLP Pico Projector, 50 Lumens",4.0 out of 5 stars,4.0,3030.0,3030.0,SAR 829.00,829.0
1,BenQ Portable Projector GV1,3.5 out of 5 stars,3.5,287.0,287.0,"SAR 1,328.99",1328.99
2,MO C9 Mini LED Projector Supports Full HD 1080...,3.2 out of 5 stars,3.2,6.0,6.0,SAR 599.95,599.95
3,"Mini Projector, ELIKLIV Native 1080P Projector...",1.7 out of 5 stars,1.7,3.0,3.0,SAR 479.20,479.2
4,MOTIM YG300 LED Projector 1080P Projection Mac...,,,,,SAR 155.00,155.0
5,"Epson EB-FH06 3LCD, Full HD, 3500 Lumens, 332 ...",4.4 out of 5 stars,4.4,42.0,42.0,"SAR 3,901.02",3901.02
6,Aproca Hard Travel Case Compatible with Anker ...,4.9 out of 5 stars,4.9,17.0,17.0,SAR 176.71,176.71
7,Annefish Projector Portable Wifi Wireless High...,,,,,,
8,DLP Link 3D Gl 144Hz Rechargeable 3D Active Sh...,4.3 out of 5 stars,4.3,637.0,637.0,SAR 214.85,214.85
9,Mini Smart Android Wireless Projector,2.7 out of 5 stars,2.7,49.0,49.0,SAR 898.00,898.0


# Best Sellers in Car Electronics (Page 1)

In [253]:
url="https://www.amazon.sa/-/en/gp/bestsellers/electronics/16966414031/ref=zg_bs_nav_2_16966386031"

In [254]:
response = requests.get(url)
response.status_code

200

In [255]:
response.text[:1000]

'<!doctype html><html lang="en-ae" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->\n<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>\n<!-- sp:end-feature:head-start -->\n\n<!-- sp:feature:cs-optimization -->\n<meta http-equiv=\'x-dns-prefetch-control\' content=\'on\'>\n<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">\n<link rel="dns-prefetch" href="https://m.media-amazon.com">\n<link rel="dns-prefetch" href="https://completion.amazon.com">\n<!-- sp:end-feature:cs-optimization -->\n\n<!-- sp:feature:aui-assets -->\n<link rel="stylesheet" href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL.cs

In [256]:
page = response.text
soup = BeautifulSoup(page, 'lxml')

In [257]:
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="en-ae">
 <!-- sp:feature:head-start -->
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <!-- sp:end-feature:head-start -->
  <!-- sp:feature:cs-optimization -->
  <meta content="on" http-equiv="x-dns-prefetch-control"/>
  <link href="https://images-eu.ssl-images-amazon.com" rel="dns-prefetch"/>
  <link href="https://m.media-amazon.com" rel="dns-prefetch"/>
  <link href="https://completion.amazon.com" rel="dns-prefetch"/>
  <!-- sp:end-feature:cs-optimization -->
  <!-- sp:feature:aui-assets -->
  <link href="https://images-eu.ssl-images-amazon.com/images/I/11EIQ5IGqaL._RC|01ZTHTZObnL.css,41JZEtDv4tL.css,31Y8m1dzTdL.css,013z33uKh2L.css,017DsKjNQJL.css,0131vqwP5UL.css,41EWOOlBJ9L.css,11TIuySqr6L.css,01ElnPiDxWL.css,11bGSgD5pDL.css,01Dm5eKVxwL.css,01IdKcBuAdL.css,01y-XAlI+2L.css,21N4kUH7pxL.css,01oDR3IULNL.css,41CYNGpGlrL.css,01XPHJk60-L.css,114y0SIP+yL.css,21aPhFy+riL

In [258]:
soup.find_all('li', {'class':'zg-item-immersion'})[1].find_all('a')[1].text

'\n3.1 out of 5 stars\n'

In [259]:
def clean_num(s):
    """cleans text scraped from website
    
    Parameters:
    s (String): raw text

    Returns:
    String: cleaned digit text

    """ 

    if s is None:
        return s
    return "".join(re.findall('\d*\.?\d+',s))

def get_data(block, tag, class_text):
    """fetch data from tag and its associated class
    
    Parameters:
    block (int): Book item 
    tag (String): tag for element we want to fetch
    class_text (String): class for element we want to fetch

    Returns:
    String: data text fetched

    """ 
    try:
        return block.find(tag, class_ = class_text).text
    except AttributeError:
        return ""

In [260]:
for i in (soup.find("li", "zg-item-immersion")):
    print(str.strip(get_data(i, "a", "a-link-normal")))
    print(get_data(i, "span", "a-icon-alt"))
    print(clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    print(get_data(i, "a","a-size-small a-link-normal"))
    print(clean_num(get_data(i, "a","a-size-small a-link-normal")))
    print(get_data(i, "span","p13n-sc-price"))
    print(clean_num(get_data(i, "span", "p13n-sc-price")))

70mai A500S Pro Plus+, 2K Front and Interior, Dual Camera 1944p, 1080p, Built-in GPS ADAS, WiFi Smart System, Voice Control




SAR 449.00
449.00


In [261]:
products_list = []
for i in (soup.find_all("li", class_ = "zg-item-immersion")):
    pro_name = (str.strip(get_data(i, "a", "a-link-normal")))
    pro_rate= (get_data(i, "span", "a-icon-alt"))
    pro_rate_clean = (clean_num(get_data(i, "span", "a-icon-alt").split(" ")[0]))
    review_num = (get_data(i, "a","a-size-small a-link-normal"))
    review_num_clean = (clean_num(get_data(i, "a","a-size-small a-link-normal")))
    pro_price = (get_data(i, "span","p13n-sc-price"))
    pro_price_clean = (clean_num(get_data(i, "span", "p13n-sc-price")))
    
    headers = ['pro_name', 'pro_rate', 'pro_rate_clean','review_num',
              'review_num_clean', 'pro_price','pro_price_clean']
    products_dict = dict(zip(headers, [ pro_name,
                                    pro_rate,
                                    pro_rate_clean,
                                    review_num,
                                    review_num_clean, 
                                    pro_price,
                                    pro_price_clean]))
    products_list.append(products_dict)

In [263]:
products_df23 = pd.DataFrame(products_list).replace("",np.nan)  #convert list of dict to df
products_df23

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"70mai A500S Pro Plus+, 2K Front and Interior, ...",,,,,SAR 449.00,449.0
1,Bluetooth V2.0 7 Inch 2 DIN Car Video Stereo P...,3.1 out of 5 stars,3.1,16.0,16.0,,
2,Pioneer 1300W Bass Reflex Car Subwoofer With T...,4.4 out of 5 stars,4.4,64.0,64.0,SAR 339.00,339.0
3,MMOBIEL Speaker Set Left and Right Replacement...,4.1 out of 5 stars,4.1,158.0,158.0,SAR 124.61,124.61
4,YORKING Keyless Entry Remote Key Fob Car Repla...,4.3 out of 5 stars,4.3,35.0,35.0,SAR 26.71,26.71
5,Horande Keyless Entry Replacement Key Fob Cove...,4.4 out of 5 stars,4.4,59.0,59.0,SAR 61.25,61.25
6,"Freewalk Disc Brake Lock, Anti-theft Motorcycl...",5.0 out of 5 stars,5.0,2.0,2.0,SAR 95.99,95.99
7,Vgate vLinker BM Plus Bluetooth BLE OBD2 Scann...,,,,,SAR 195.31,195.31
8,4 Types 12V Add-a-Circuit Adapter & Fuse Kit -...,5.0 out of 5 stars,5.0,1.0,1.0,SAR 70.00,70.0
9,Car Key Fob Keyless Entry Remote with Ignition...,4.3 out of 5 stars,4.3,606.0,606.0,SAR 84.28,84.28


In [264]:
df = pd.concat([products_df1,products_df2, products_df3, products_df4, products_df5, products_df6, products_df7, products_df8, 
             products_df9, products_df10, products_df11, products_df12, products_df13, products_df14, products_df15, 
             products_df16, products_df17, products_df18, products_df19, products_df20,products_df21,products_df22,products_df23])

In [265]:
df

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean
0,"ViewSonic M1 Mini DLP Pico Projector, 50 Lumens",4.0 out of 5 stars,4.0,3030,3030,SAR 829.00,829.00
1,BenQ Portable Projector GV1,3.5 out of 5 stars,3.5,287,287,"SAR 1,328.99",1328.99
2,MO C9 Mini LED Projector Supports Full HD 1080...,3.2 out of 5 stars,3.2,6,6,SAR 599.95,599.95
3,"Mini Projector, ELIKLIV Native 1080P Projector...",1.7 out of 5 stars,1.7,3,3,SAR 479.20,479.20
4,MOTIM YG300 LED Projector 1080P Projection Mac...,,,,,SAR 155.00,155.00
...,...,...,...,...,...,...,...
45,70mai Hardwire Cable Kit for 24H Parking Monit...,,,,,SAR 97.37,97.37
46,Speaker Parking Aid Reversing Fit for Nissan T...,,,,,SAR 125.32,125.32
47,RV Backup Camera Wireless with 7‘’ Touch Key D...,,,,,SAR 904.39,904.39
48,"SVS SoundPath Subwoofer Isolation System, 4-Pack",4.8 out of 5 stars,4.8,1579,1579,SAR 225.88,225.88


In [252]:
df['i'] = y
y=[]
for i in range (0,1000):
    y.append(i) 

NameError: name 'y' is not defined

In [250]:
df =df.set_index(df['i'])

KeyError: 'i'

In [266]:
df.isna().sum()

pro_name              0
pro_rate            128
pro_rate_clean      128
review_num          128
review_num_clean    128
pro_price            38
pro_price_clean      38
dtype: int64

In [301]:
#df.to_csv('Amazom_BestSeller_Products')
#df.to_csv(r'C:\Users\REEEM\Documents\SDAIA\project2\Linear-Regression\Amazom_BestSeller_Products', index=False)
#df.to_parquet("Amazom_BestSeller_Products.parquet")
df.to_csv(r'c:\Users\REEEM\Documents\SDAIA\project2\Linear-Regression\Amazom_BestSeller_Products.csv', sep='\t', mode='a')

In [307]:
pro_df = pd.read_csv('Amazom_BestSeller_Products')

In [308]:
pro_df.in

Unnamed: 0,pro_name,pro_rate,pro_rate_clean,review_num,review_num_clean,pro_price,pro_price_clean,i
0,"SAMSUNG Galaxy A12 Dual SIM Smartphone - 64GB,...",3.0 out of 5 stars,3.0,2,2.0,SAR 539.00,539.00,0
1,Samsung Galaxy A12 LTE Dual SIM Smartphone - 6...,1.0 out of 5 stars,1.0,2,2.0,SAR 539.00,539.00,1
2,"Apple 20W USB-C Power Adapter, White",4.5 out of 5 stars,4.5,359,359.0,SAR 79.00,79.00,2
3,"SAMSUNG Galaxy M12 Dual SIM Smartphone - 64GB,...",4.1 out of 5 stars,4.1,40,40.0,SAR 499.00,499.00,3
4,Apple AirPods with Charging Case,4.1 out of 5 stars,4.1,2777,2777.0,SAR 475.00,475.00,4
...,...,...,...,...,...,...,...,...
995,Lume Cube Broadcast Lighting Kit | Self Broadc...,4.4 out of 5 stars,4.4,1044,1044.0,SAR 674.69,674.69,995
996,Slim Folio Pro for iPad Pro 11-inch (1st and 2...,4.0 out of 5 stars,4.0,1,1.0,SAR 505.22,505.22,996
997,"8"" Windows Tablet with HDMI Port,1280x800 Touc...",2.5 out of 5 stars,2.5,3,3.0,SAR 659.83,659.83,997
998,"Lenovo Yoga Smart Tablet, 10.1"" FHD (1920x1200...",,,,,"SAR 1,026.26",1026.26,998


In [310]:
pro_df.isna().sum()

pro_name             0
pro_rate            77
pro_rate_clean      77
review_num          77
review_num_clean    77
pro_price           25
pro_price_clean     25
i                    0
dtype: int64

<pandas.core.indexing._iLocIndexer at 0x197e483aa90>