# Selenium 2

In [33]:
import requests 
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options


In [34]:
#options
options=Options()
options.headless= False #hide gui
options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized")
options.add_experimental_option(
#make chrome not load images and java scrpit
    "prefs",{"profile.managed_default_content_settings.images":2}

)

In [35]:
driver=webdriver.Chrome(ChromeDriverManager().install(),options=options)

# Scraping Twitch

In [36]:
#go to acces this website
driver.get("https://www.twitch.tv/directory/game/Art")

In [37]:
#driver.quit()

In [38]:
#from parsel import Selector
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
#object which blocks our program until a specific condition is met. In this case, our condition is a presence of an element that we select through a CSS selector.

#hecking whether the page has loaded
element = WebDriverWait(driver=driver, timeout=5).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, 'div[data-target=directory-first-item]'))
)

In [39]:
from parsel import Selector

sel = Selector(text=driver.page_source)
parsed = []
for item in sel.xpath("//div[contains(@class,'tw-tower')]/div[@data-target]"):
    parsed.append({
        'title': item.css('h3::text').get(),
        'url': item.css('.tw-link::attr(href)').get(),
        'username': item.css('.tw-link::text').get(),
        'tags': item.css('.tw-tag ::text').getall(),
        'viewers': ''.join(item.css('.tw-media-card-stat::text').re(r'(\d+)')),
    })

In [40]:
#for parsing we use Beautiful soup

In [41]:
import pandas as pd
pd.DataFrame(parsed)

Unnamed: 0,title,url,username,tags,viewers
0,Weekend Marathon!,/bobross/videos,,[English],17
1,1,/eo58/videos,,[Korean],1
2,싱글벙글 그림 공부,/tmrtir/videos,,"[Drawing, Korean]",90
3,[ENG/CZ] 💀Orc lady💀| !deviantart,/tofusenshi/videos,,"[Digital Art, Czech Republic]",189
4,리노참치 그림방송,/rinotuna/videos,,[Korean],616
5,🌱 Local dog creates an OC design with the Pestie!,/yuniiho/videos,,"[Vtuber, Digital Art, Anime]",873
6,(ﾉ◕ヮ◕)ﾉ*:･ﾟ✧ Cosy vibes with Lana | !Giveaway ...,/michi/videos,,"[No Backseating, English]",72
7,ART COLLAB W/ MICHI| !day1 !adv !social,/lanalane/videos,,"[Depression, Chilled, Anime]",70
8,3D Waifus 💼Nephtunie Day 3💼,/shonzo/videos,,"[English, Speedrun, Vtuber]",198
9,Drawing! ❖ !chatgame !socials,/orobou/videos,,"[Vtuber, Anime]",70


# Browser Automation


## is used for testing 

## for us we only setup our driver protocol and get our url as we act like clients then finally in the last we close the browser

# the general process followed when performing web scraping is:

- Use the webdriver for the browser being used to get a specific URL.
- Perform automation to obtain the information required.
- Download the content required from the webpage returned.
- Perform data parsing and manipulation on the content.
- Reformat, if needed, and store the data for further analysis.

# Selenium is used along with BeautifulSoup to scrape and then carry out data manipulation

- Selenium is needed in order to carry out web scraping and automate the chrome browser
- BeautifulSoup is needed as an HTML parser, to parse the HTML content we scrape
- Re is imported in order to use regex to match our keyword
- 

## kinds of web automation :
    Filling out forms or carrying out searches

Example of Google search automation using Selenium with Python.



In [None]:

from selenium import webdriver

from selenium.webdriver.chrome.service import Service

from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.common.by import By



driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))



driver.get(“https://www.google.com/”)

search = driver.find_element(by=By.NAME,value=“q”)

search.send_keys(“Selenium”)

search.send_keys(Keys.ENTER)



- First, the driver loads google.com, which finds the search bar using the name locator. It types “Selenium” into the searchbar and then hits enter.

Maximizing the window


## driver.maximize_window()


Taking Screenshots


## driver.save_screenshot(‘article.png’)

#### Using locators to find elements

Let’s say we don’t want to get the entire page source and instead only want to web scrape a select few elements. This can be carried out by using Locators in Selenium

## These are some of the locators compatible for use with Selenium:

- Name
- ID
- Class Name
- Tag Name
- CSS Selector
- XPath

 header=driver.find_element(By.ID, “toc0”)

#### Scrolling

In [2]:
#driver.execute_script(“window.scrollTo(0, document.body.scrollHeight);”)

#### This scrolls to the bottom of the page, and is often helpful for websites that have infinite scrolling.

# What is XPath in Selenium?


XPath is a technique in Selenium to navigate through the HTML structure of a page. XPath enables testers to navigate through the XML structure of any document, and this can be used on both HTML and XML documents.



### While other locators in Selenium that search for elements using tags or CSS class names are simpler to use,

### XPath provides an option to dynamically search for an element within a web page

In [3]:
#find_element_by_xpath() method 

## Types of XPath in Selenium


### 1. Absolute Path


In [None]:
<html>
<head>...</head>
<body>
<form id="loginForm">
<input name="email" type="text" value="Business Email" />
<input name="name" type="text" value="First Name" />
<input name="name" type="text" value="Last Name" />
</form>
</body>
</html>

In [None]:
html/body/form/input[3]

## 2. Relative Path


A relative path, or a double slash search, begins with double slashes. The double slashes signify a break in the absolute path. Here is how to select the same business email field using a relative path.

In [None]:
//form/input[3]

## How to handle dynamic elements in Selenium using XPath?

In [None]:
1. Using attributes
in case more than one form 

In [None]:
//form[@id='loginForm']/input[3]  # shows the use of a single attribute

In [None]:
//input[@name='name'][@value='Last Name']
#use multiple attributes of the same tag to locate it on the page

In [None]:
2. Logical operators in selections
//input[@id='name' or @name='name']

In [None]:
3. Using text

One may search for an element using the text that it contains too. For instance, to select a link that says “Click Me”, one can use the following search:



In [None]:
//a[text()='Click Me']

This snippet searches for any hyperlink that contains the text “Click Me” within it. Replace the tag with a wildcard * to search for any element that contains the text “Click Me”.



In [None]:
//*[text()='Click Me']