##### Selenium is a popular open-source library for automation and scraping. It uses the WebDriver protocol to control Chrome, Firefox, and Safari browsers. Unlike many traditional scraping tools, Selenium quickly collects data from websites that rely on JavaScript.




In [35]:
#!pip install selenium

### <font color = 'bisque'> Import packages

In [36]:
# import packages
from selenium import webdriver 
from selenium.webdriver.common.by import By 

*   **Webdriver** : A set of methods that are used to control web browsers and interact with the elements on the web page.
*   **ChromeOptions** : allows us to state the options in which we are running the script.
*   **find_element** : a function used to locate a element with a specific attribute.
*   **find_elements** : a function used to locate several elements with the same attribute.
*   **By class** :  The By class is used to specify which attribute is used to locate elements on a page.

These are the attributes for By class that we will used:
*    CLASS_NAME = "class name"
*    CSS_SELECTOR = "css selector"

In [37]:
# instantiate a Chrome options object
options = webdriver.ChromeOptions() 
# set the options to use Chrome in headless mode (used for running the script in the background)
options.add_argument("--headless=new") 
# initialize an instance of the Chrome driver (browser) in headless mode
driver = webdriver.Chrome(options=options)

### <font color  = 'bisque'> Collect data using only selenium 

In [38]:
# get the url 
url = 'https://tonaton.com/c_mobile-phones'
driver.get(url)
# find containers 
containers = driver.find_elements(By.CSS_SELECTOR, "[class= 'product__container flex']") 
# length of containers
len(containers)

ReadTimeoutError: HTTPConnectionPool(host='localhost', port=36997): Read timed out. (read timeout=120)

In [None]:
# scrape data from the one container 
container = containers[0]
# get the details 
details = container.find_element(By.CLASS_NAME, 'product__description').text

# get the price 
price = container.find_element(By.CLASS_NAME, 'product__title').text.strip().replace('GH₵ ','').replace(',','')

# get the location
location = container.find_element(By.CLASS_NAME, 'product__location').text

# get the condition
condition = container.find_element(By.CSS_SELECTOR, "[class = 'product__tags flex wrap']").text

# print the data for container
print(details, '--', price, '--', location, '--', condition,)

In [None]:
import pandas as pd

In [45]:
# generalize the scraping over all containers 

# scrape data from all the containers
data = []

for container in containers:
    try:
                # get the url 
        url = 'https://tonaton.com/c_mobile-phones'
        driver.get(url)
        # find containers 
        containers = driver.find_elements(By.CSS_SELECTOR, "[class= 'product__container flex']") 

        # scrape data from the one container 
        container = containers[0]
        # get the details 
        details = container.find_element(By.CLASS_NAME, 'product__description').text

        # get the price 
        price = container.find_element(By.CLASS_NAME, 'product__title').text.strip().replace('GH₵ ','').replace(',','')

        # get the location
        location = container.find_element(By.CLASS_NAME, 'product__location').text

        # get the condition
        condition = container.find_element(By.CSS_SELECTOR, "[class = 'product__tags flex wrap']").text
        dic = {
            
 'details': details,
 'price': price,
 'location': location,
 'condition' : condition,
 
        }
        data.append(dic)
    except:
        pass

df = pd.DataFrame(data)


In [46]:
# dimension 
df.shape

(18, 4)

### <font color  = 'bisque'> Collect data combining selenium and beautifulsoup

In [41]:
# import Beautifulsoup
from bs4 import BeautifulSoup as bs
# get the code 
res = driver.page_source
# store the code in a Beautifulsoup objet 
soup = bs(res, 'html.parser')
# find containers 
containers = soup.find_all('div', class_ = 'product__container flex')
# length of containers 
len(containers)

18

In [None]:
# scrape data from a chosen container 
container = containers[0]

In [None]:
# generalize the scraping over all containers 


In [None]:
# dimension


### <font color  = 'bisque'> Assignment

In [None]:
url = 'https://tonaton.com/c_mobile-phones'
# scrape data (details, price, location, condition, image_link) over 100 pages 
# using selenium only and using selenium combined with BeautifulSoup
# Clean the data