# **Selenium:** Web Automation and Scrapping

### Importing Selenium and Setting up Webdriver for Chrome/Firefox

In [1]:
from selenium import webdriver
import time


browser = webdriver.Chrome() # requires chromedriver.exe
# browser = webdriver.Firefox() # requires geckodriver.exe

URL = "https://www.amazon.com/Best-Sellers-Electronics-Computer-Servers/zgbs/electronics/11036071/"
browser.get(URL)

### Defining a Function to use Webdriver repeatedly

In [5]:
def start_browser():
    from selenium import webdriver
    browser = webdriver.Chrome()
    return browser

### Printing Title of a Website

In [3]:
browser = start_browser()
URL = "https://www.amazon.com/Best-Sellers-Electronics-Computer-Servers/zgbs/electronics/11036071/"
browser.get(URL)
print(browser.title)

Amazon Best Sellers: Best Computer Servers


### Fetch URL name and Html of a website

In [None]:
browser = start_browser()
URL = "https://www.amazon.com/Best-Sellers-Electronics-Computer-Servers/zgbs/electronics/11036071/"
browser.get(URL)

print(browser.current_url)
print(browser.page_source)

## **Scrap Website:** Tech with Tim

#### 1. Grab a Searchbar and Send Commands

In [8]:
browser = start_browser()
URL = "https://www.techwithtim.net/"
browser.get(URL)

from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys # for Keyboard keys

print(browser.title)
search = browser.find_element(By.NAME, "s") # grabs searchbar on website
search.send_keys("test") # sending text to searchbar
search.send_keys(Keys.RETURN) # hitting ENTER on searchbar

Tech With Tim - Python & Java Programming Tutorials - techwithtim.net
<selenium.webdriver.remote.webelement.WebElement (session="51a0195a7e123a86b577208a6c397e73", element="cf494191-02ce-4613-b44c-e7f48361e3db")>


#### 2. Fetch H1 titles of Search Page Results

In [27]:
browser = start_browser()
URL = "https://www.techwithtim.net/"
browser.get(URL)

from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys # for Keyboard keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

print(browser.title)
search = browser.find_element(By.NAME, "s") # grabs searchbar on website
search.send_keys("test") # sending text to searchbar
search.clear() # to empty the searchbar; just in case it has some value typed in already
search.send_keys(Keys.RETURN) # hitting ENTER on searchbar


try:
    main = WebDriverWait(browser, 10).until(
        EC.presence_of_element_located((By.ID, "main")) # Waiting 10 seconds so the page can load elements with ID "main"
    )
except:
    browser.quit()

articles = main.find_elements(By.TAG_NAME, "article") # Searching for tags with name "article"

for article in articles: # looping through all the articles
    header = article.find_element(By.CLASS_NAME,"entry-title").accessible_name # accessing h1 of each article
    print(header)


Tech With Tim - Python & Java Programming Tutorials - techwithtim.net
HTTP Methods – GET & POST
Adding Bootstrap & Template Inheritance
HTML Templates
A Basic Website
Events by Day
Date From Speech P2
Getting Microphone Input
MessageBoxes & Popup Windows
ComboBoxes
Images/QPixmap


#### 3. Page Navigating and Clicking Elements

In [31]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys # for Keyboard keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

browser = webdriver.Chrome()
URL = "https://www.techwithtim.net/"
browser.get(URL)

try:
    link = WebDriverWait(browser, 10).until(
        EC.presence_of_element_located((By.LINK_TEXT, "Python Programming")) # Waiting pageload to Click a button
    )
    link.click()

    element = WebDriverWait(browser, 10).until(
    EC.presence_of_element_located((By.LINK_TEXT, "Beginner Python Tutorials")) # Waiting pageload to Click a button
    )
    element.click()

    started = WebDriverWait(browser, 10).until(
    EC.presence_of_element_located((By.ID, "sow-button-19310003")) # Waiting pageload to Click a button
    )
    started.click()

    browser.back() # Go back to previous page
    browser.forward() # Go to forward page

except:
    browser.quit()

### **Automate Actions:** Cookie Clicker Website

**Selenium Action Chains Documentation:** https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.action_chains

In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys # for Keyboard keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

browser = webdriver.Chrome()
URL = "https://orteil.dashnet.org/cookieclicker/"
browser.get(URL)

try:
    link = WebDriverWait(browser, 10).until(
        EC.presence_of_element_located((By.ID, "langSelect-EN")) # Waiting pageload to Click a button
    )
    link.click()

    browser.implicitly_wait(5)

    cookie = browser.find_element(By.ID, "bigCookie")
    cookie_count = browser.find_element(By.ID, "cookies")
    items = browser.find_element(By.ID, "productPrice0")

    actions = ActionChains(browser)

    for i in range(500):
        actions.click(cookie).perform()
        # print(cookie_count.text.split(" ")[0])
        cookie_count_int = int(cookie_count.text.split(" ")[0])
        item_value = int(items.text)
        if cookie_count_int == item_value:
            upgrade_action = ActionChains(browser)
            upgrade_action.move_to_element(items)
            upgrade_action.click(items).perform()

except:
    browser.quit()


### **Amazon:** Single Page Scrapping

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By

browser = webdriver.Chrome()
# browser.maximize_window() # For maximizing window
# browser.implicitly_wait(20) # gives an implicit wait for 20 seconds

browser.get("https://www.amazon.com/s?i=computers-intl-ship&bbn=16225007011&rh=n%3A16225007011%2Cp_36%3A1253503011&dc&fs=true&qid=1645954406&ref=sr_ex_n_1")

elem_list = browser.find_element(By.CSS_SELECTOR, "div.s-main-slot.s-result-list.s-search-results.sg-row")

items = elem_list.find_elements(By.XPATH, "//div[@data-component-type='s-search-result']")

for item in items:
    title = item.find_element(By.TAG_NAME, "h2").text
    price = "No Price Found"
    image = "No Image Found"
    link = item.find_element(By.CLASS_NAME, "a-link-normal").get_attribute("href")

    try:
        price = item.find_element(By.CSS_SELECTOR, ".a-price").text.replace("\n", ".")   
    except:
        pass

    try:
        image = item.find_element(By.CSS_SELECTOR, ".s-image").get_attribute("src")
    except:
        pass

    print(f"IMAGE: {image}")
    print(f"TITLE: {title}")
    print(f"PRICE: {price}")
    print(f"LINK: {link}\n")
  

### **UnitTest Framework:**