# Selenium

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

In [None]:
CHROMEDRIVER_PATH = ChromeDriverManager().install()

In [None]:
CHROMEDRIVER_PATH

In [None]:
driver = webdriver.Chrome(service=Service(CHROMEDRIVER_PATH))
driver.get("https://www.target.com/")
time.sleep(2)
driver.close()

### Help! I get an error like "chromedriver unexpectedly exited. Status code was: -9"

Try updating Chrome 🤷‍♀️

### Using Selenium is handy for getting content behind "Ajax walls"
https://pythonscraping.com/pages/javascript/ajaxDemo.html

In [None]:
# You may want to pull out the chromedriver path
CHROMEDRIVER_PATH = ChromeDriverManager().install()
CHROMEDRIVER_PATH = '/Users/ryanmitchell/Documents/GitHub/ddt_workshop_2024/chrome-mac-arm64/chrome'


In [None]:
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

driver = webdriver.Chrome(service=Service(CHROMEDRIVER_PATH))
driver.get('http://pythonscraping.com/pages/javascript/ajaxDemo.html')
time.sleep(3)
print(driver.find_element(By.ID, 'content').text)
driver.close()

## Headless browsing

In [None]:
chrome_options = Options()
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(
    service=Service(CHROMEDRIVER_PATH),
    options=chrome_options
)
driver.get('http://pythonscraping.com/pages/javascript/ajaxDemo.html')
time.sleep(3)
print(driver.find_element(By.ID, 'content').text)
driver.close()

## Expected Conditions

In [None]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(
    service=Service(CHROMEDRIVER_PATH),
    options=chrome_options)

driver.get('http://pythonscraping.com/pages/javascript/ajaxDemo.html')
try:
    element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'loadedButton')))
finally:
    print(driver.find_element(By.ID, 'content').text)
    driver.close()

## Other Supported Selenium Browsers

In [None]:
from webdriver_manager.firefox import GeckoDriverManager
from webdriver_manager.microsoft import EdgeChromiumDriverManager

print(GeckoDriverManager().install())
print(EdgeChromiumDriverManager().install())

## Scrapy can be used to solve a lot of difficult problems

In [None]:
from urllib.request import urlopen

html = urlopen('https://www.cloudflare.com/plans/zero-trust-services/#overview')
soup = BeautifulSoup(html.read(), 'html.parser')

In [None]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://www.cloudflare.com/plans/zero-trust-services/#overview")
time.sleep(2)
print(driver.page_source[0:200])
driver.close()

## Performing Actions with Selenium

In [None]:
# We will use https://www.dunkindonuts.com/en/locations 
# to search for Dunkin Donuts Locations
from selenium.webdriver.common.by import By

driver = webdriver.Chrome(service=Service(CHROMEDRIVER_PATH))
driver.get("https://www.dunkindonuts.com/en/locations")

location_field = driver.find_element(By.CSS_SELECTOR, 'input#location')
location_field.send_keys('02155')
time.sleep(2)
button = driver.find_element(By.CSS_SELECTOR, 'div.location-search__submit input')
button.click()
driver.close()


In [None]:
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome(service=Service(CHROMEDRIVER_PATH))
driver.get("https://www.dunkindonuts.com/en/locations")

location_field = driver.find_element(By.CSS_SELECTOR, 'input#location')
text = '02155'
for char in text:
    location_field.send_keys(char)
    time.sleep(0.5)
location_field.send_keys(Keys.ENTER)
button = driver.find_element(By.CSS_SELECTOR, 'div.location-search__submit input')
print(button)
button.click()
driver.close()

## You Don't Always Have to Perform Actions

In [None]:
from urllib.request import urlopen
from bs4 import BeautifulSoup

html = urlopen('https://www.dunkindonuts.com/en/locations?location=02155')
soup = BeautifulSoup(html.read(), 'html.parser')

print(soup.select('.store-item__address'))

In [None]:
driver = webdriver.Chrome(service=Service(CHROMEDRIVER_PATH))
driver.get('https://www.dunkindonuts.com/en/locations?location=02155')

time.sleep(1)
for address in driver.find_elements(By.CSS_SELECTOR, 'div.js-store-address'):
    print('-'*20)
    print(address.text)
driver.close()

### Project: Crawling Target with Selenium

In [None]:
# Hints: 
# Product page URLs always start with "/p/"
# Product listing pages always start with '/pl/'
driver = webdriver.Chrome(service=Service(CHROMEDRIVER_PATH))
driver.get('https://www.target.com/pl/489309002')

product_links = []
visited_product_links = []
time.sleep(2)
for link in driver.find_elements(By.TAG_NAME, 'a'):
    print(link.get_attribute('href'))

driver.close()
