In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import pandas as pd
import datetime
from selenium.webdriver.common.action_chains import ActionChains
import os
import time
import numpy as np
from selenium.webdriver.support.ui import Select


class Driver:
    '''
    Basic abstraction
    '''
    def __init__(self,path,chrome_options,webdriver,headless=None):
        
        
        user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36'
        
        self.webdriver = webdriver
        self.chrome_options = Options()
        if headless:
            self.chrome_options.add_argument("--headless")
        self.chrome_options.add_argument(f'user-agent={user_agent}')
        self.chrome_options.add_argument("--window-size=1225x900")
        self.browser = self.webdriver.Chrome(path,options = chrome_options)
        
    def get(self,url):
        '''
        get response from the url sended
        '''
        return self.browser.get(url)
    
    def quit(self):
        '''
        Finish the process
        '''
        self.browser.quit()

    @property
    def current_url(self):
        '''
        browser.current_url
        '''
        return self.browser.current_url
    
    @property
    def page_source(self):
        '''
        browser.page_source
        '''
        return self.browser.page_source
    
    def scroll_page(self,SCROLL_PAUSE_TIME=1):
        '''
        Scroll down to fully load the page
        '''
        while True:
            last_height = self.browser.execute_script("return document.body.scrollHeight")

            #Scroll down to bottom
            self.browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")

            #Wait to load page
            time.sleep(SCROLL_PAUSE_TIME)

            #Calculate new scroll height and compare with last scroll height
            new_height = self.browser.execute_script("return document.body.scrollHeight")
            if new_height == last_height:

                # try again (can be removed)
                self.browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                # Wait to load page
                time.sleep(SCROLL_PAUSE_TIME)
                # Calculate new scroll height and compare with last scroll height
                new_height = self.browser.execute_script("return document.body.scrollHeight")
                # check if the page height has remained the same
                if new_height == last_height:
                    #if so, you are done
                    break

                else:
                    #if not, move on to the next loop
                    last_height = new_height
                    continue 
    
    
    
    
    def wait_elements_by(self,criteria,locator,timeout=10):
        '''
        Buscar elementos
        
        criteria:
                    • CSS_SELECTOR
                    • CLASS_NAME
                    • ID
                    • LINK_TEXT
                    • PARTIAL_LINK_TEXT
                    • TAG_NAME
                    • XPATH
                    
        locator:
                    ej: 
                    CLASS_NAME: price
                    CSS_SELECTOR: .price.items
                    TAG_NAME: button
        '''
        criteria = criteria.upper()
        browser = self.browser
        wait = WebDriverWait(browser,timeout)
        if criteria == 'CSS_SELECTOR':
            return wait.until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR,locator)))
        
        elif criteria == 'CLASS_NAME':
            return wait.until(
                EC.presence_of_all_elements_located((By.CLASS_NAME,locator)))
        
        elif criteria == 'ID':
            return wait.until(
                EC.presence_of_all_elements_located((By.ID,locator)))
        
        elif criteria == 'LINK_TEXT':
            return wait.until(
                EC.presence_of_all_elements_located((By.LINK_TEXT,locator)))
        
        elif criteria == 'PARTIAL_LINK_TEXT':
            return wait.until(
                EC.presence_of_all_elements_located((By.PARTIAL_LINK_TEXT,locator)))
        
        elif criteria == 'TAG_NAME':
            return wait.until(
                EC.presence_of_all_elements_located((By.TAG_NAME,locator)))
            
        else:
            return wait.until(
                EC.presence_of_all_elements_located((By.XPATH,locator)))
    
    
    def wait_element_by(self,criteria,locator,timeout=10):
        '''
        Buscar elemento
        
        criteria:
                    • CSS_SELECTOR
                    • CLASS_NAME
                    • ID
                    • LINK_TEXT
                    • PARTIAL_LINK_TEXT
                    • TAG_NAME
                    • XPATH
                    
        locator:
                    ej: 
                    CLASS_NAME: price
                    CSS_SELECTOR: .price.items
                    TAG_NAME: button
        '''
        criteria = criteria.upper()
        browser = self.browser
        wait = WebDriverWait(browser,timeout)

        if criteria == 'CSS_SELECTOR':
            return wait.until(
                EC.presence_of_element_located((By.CSS_SELECTOR,locator)))
        
        elif criteria == 'CLASS_NAME':
            return WebDriverWait(browser, timeout).until(
                EC.presence_of_element_located((By.CLASS_NAME,locator)))
        
        elif criteria == 'ID':
            return wait.until(
                EC.presence_of_element_located((By.ID,locator)))
        
        elif criteria == 'LINK_TEXT':
            return wait.until(
                EC.presence_of_element_located((By.LINK_TEXT,locator)))
        
        elif criteria == 'PARTIAL_LINK_TEXT':
            return wait.until(
                EC.presence_of_element_located((By.PARTIAL_LINK_TEXT,locator)))
        
        elif criteria == 'TAG_NAME':
            return wait.until(
                EC.presence_of_element_located((By.TAG_NAME,locator)))
            
        else:
            return wait.until(
                EC.presence_of_element_located((By.XPATH,locator)))
        
    
    def snapshot(self,save_as):
        '''
        browser.save_screenshot()
        save_as : 'foo.png'
        '''
        self.browser.save_screenshot(save_as)
        

    def search_select(self,criteria,locator,timeout=10):
        '''
        Search elements select
        
        search_select("CLASS_NAME","Items",timeout=10)
        '''
        browser = self.browser
        select = Select(wait_elements_by(self,criteria,locator,timeout))
        
        return select
    
        
        
        

In [2]:
##############################
#            INIT            #
##############################

#your chromedriver path, in this case my chromedriver.exe stays in the same directory with the script
path = os.path.abspath('chromedriver.exe')

browser = Driver(path,Options(),webdriver)

In [3]:
##############################
#           CASE 1           #
##############################
url = 'https://pythonscraping.com/pages/form.html'
browser.get(url)

In [4]:
browser.current_url

'https://pythonscraping.com/pages/form.html'

In [5]:
browser.wait_element_by('TAG_NAME','h2').text

'Tell me your name!'

In [6]:
#get inputs
inputs_tag = browser.wait_elements_by('TAG_NAME','input')
for input_tag in inputs_tag:
    if input_tag.get_attribute('name').lower() == 'firstname':
        input_first_name = input_tag
    elif input_tag.get_attribute('name').lower() == 'lastname':
        input_last_name = input_tag
    elif input_tag.get_attribute('type').lower() == 'submit':
        input_submit = input_tag
    else:
        #Not matched
        print(input_tag.text)        

In [7]:
#execute the actions
input_first_name.send_keys('Lucas')
input_last_name.send_keys('Damian')
input_submit.click()

In [8]:
#check the text
browser.wait_element_by('TAG_NAME','body').text

'Hello there, Lucas Damian!'

In [9]:
##############################
#           CASE 2           #
##############################

url = 'https://pythonscraping.com/pages/form2.html'
browser.get(url)

In [10]:
browser.wait_element_by('TAG_NAME','h2').text

'Upload a file!'

In [11]:
#get inputs
inputs_tag = browser.wait_elements_by('TAG_NAME','input')
for input_tag in inputs_tag:
    if input_tag.get_attribute('type').lower() == 'file':
        input_file = input_tag
    elif input_tag.get_attribute('type').lower() == 'submit':
        input_submit = input_tag
    else:
        #Not matched
        print(input_tag.text)

In [12]:
#The path of the file to upload
path_file = os.path.abspath('foo.jpg')
input_file.send_keys(path_file)

In [13]:
input_submit.click()

In [14]:
browser.wait_element_by('TAG_NAME','body').text

'Sorry, there was an error uploading your file.'

In [15]:
##############################
#           CASE 3           #
##############################

url = 'https://pythonscraping.com/pages/page3.html'
browser.get(url)

In [16]:
browser.wait_element_by('TAG_NAME','h1').text

'Totally Normal Gifts'

In [17]:
print(browser.wait_element_by('ID','content').text)

Here is a collection of totally normal, totally reasonable gifts that your friends are sure to love! Our collection is hand-curated by well-paid, free-range Tibetan monks.
We haven't figured out how to make online shopping carts yet, but you can send us a check to:
123 Main St.
Abuja, Nigeria
We will then send your totally amazing gift, pronto! Please include an extra $5.00 for gift wrapping.


In [18]:
html = browser.wait_element_by('ID','giftList')
table_html = html.get_attribute('outerHTML')

#The fast way with pandas
df_list = pd.read_html(table_html)
df = df_list[0]
df

Unnamed: 0,Item Title,Description,Cost,Image
0,Vegetable Basket,This vegetable basket is the perfect gift for ...,$15.00,
1,Russian Nesting Dolls,"Hand-painted by trained monkeys, these exquisi...","$10,000.52",
2,Fish Painting,"If something seems fishy about this painting, ...","$10,005.00",
3,Dead Parrot,This is an ex-parrot! Or maybe he's only resting?,$0.50,
4,Mystery Box,"If you love suprises, this mystery box is for ...",$1.50,


In [19]:
#The long and complete way with selenium

#get elements from table
gifts = html.find_elements_by_class_name('gift')
#get headers from table
headers = [i.text for i in html.find_elements_by_tag_name('th')]

results = []
for gift in gifts:
    tr_list = []
    for td in gift.find_elements_by_tag_name('td'):
        
        if td.text == '':
            #get src from image
            src = td.find_element_by_tag_name('img').get_attribute('src')
            tr_list.append(src)
        else:
            tr_list.append(td.text)
    results.append(tr_list)

In [20]:
#Results
df = pd.DataFrame(data=results, columns=headers)
df

Unnamed: 0,Item Title,Description,Cost,Image
0,Vegetable Basket,This vegetable basket is the perfect gift for ...,$15.00,https://pythonscraping.com/img/gifts/img1.jpg
1,Russian Nesting Dolls,"Hand-painted by trained monkeys, these exquisi...","$10,000.52",https://pythonscraping.com/img/gifts/img2.jpg
2,Fish Painting,"If something seems fishy about this painting, ...","$10,005.00",https://pythonscraping.com/img/gifts/img3.jpg
3,Dead Parrot,This is an ex-parrot! Or maybe he's only resting?,$0.50,https://pythonscraping.com/img/gifts/img4.jpg
4,Mystery Box,"If you love suprises, this mystery box is for ...",$1.50,https://pythonscraping.com/img/gifts/img6.jpg


In [21]:
##############################
#           CASE 4           #
##############################

url = 'https://pythonscraping.com/pages/javascript/ajaxDemo.html'
browser.get(url)

for _ in range(5):
    try:
        loaded_button = browser.wait_element_by('ID','loadedButton',timeout=30)
        loaded_button.click()
        print('Clicked')
        break
    except:
        print('Try again..')


Clicked


In [22]:
##############################
#           CASE 5           #
##############################

#LOGIN WITH RECAPTCHA
url = 'https://pythonscraping.com/pages/recaptcha/humansonly.html'

browser.get(url)

In [23]:
txt_name = browser.wait_element_by('ID','name')
txt_name.send_keys('Lucas')
txt_color = browser.wait_element_by('ID','color')
txt_color.send_keys('Black')
cb_captcha = browser.wait_element_by('TAG_NAME','iframe')
cb_captcha.click()
btn_submit = browser.wait_element_by('ID','submit')
time.sleep(2)
btn_submit.click()


In [24]:
body = browser.wait_element_by('TAG_NAME','body')

In [25]:
print(body.text)

Here's the results!
Array ( [name] => Lucas [color] => Black [g-recaptcha-response] => 03AGdBq25EYNiL6O35jBP_U7JbVc64ZwGoozWadKlTgtrOIPK93r0x3zYURFgNO0hYtwRpY2ulGhfKfMfVyQmIsH4IYR_pi1gHkNKelLLybC1a45RBMn4vi6xLFmOXUXc6BpCV7rQtpCOU7DqzhiAZOq6ruoh-nd9PumrNW4bwrofHVqiToFNQadtpuVXTCpj1uSydsJMv1Tc65CmIr62AFqIsM-LF0BEbf81jlOE2WgZ2wPqZ0WUdgGViKhzzNMPSOykl04_99oLFDWDCraBz-cMaj6kixpZS2uvr_kVwx2V-tvjfaM_NA66_UhJC9m244XDOlHKVGhapcpFM-fJcC8_2NJu9dSfSeYCDSPJnpjgAkZW1TXNy2JbrLEGfDcoyutQzpwCU1Tfix2LtNkRmsFnAAka8E98sBSqYrpLgd9XZGDqLzOcjQ1CL7w42hGn35yoRrNOfu3XZhDOIIMAqvQOTT3xweTz3mQ )
URL is:
https://www.google.com/recaptcha/api/siteverify?secret=6Lfq-wETAAAAAGV2pROuCPCjn_9M1JWjjO_Pgdqp&response=03AGdBq25EYNiL6O35jBP_U7JbVc64ZwGoozWadKlTgtrOIPK93r0x3zYURFgNO0hYtwRpY2ulGhfKfMfVyQmIsH4IYR_pi1gHkNKelLLybC1a45RBMn4vi6xLFmOXUXc6BpCV7rQtpCOU7DqzhiAZOq6ruoh-nd9PumrNW4bwrofHVqiToFNQadtpuVXTCpj1uSydsJMv1Tc65CmIr62AFqIsM-LF0BEbf81jlOE2WgZ2wPqZ0WUdgGViKhzzNMPSOykl04_99oLFDWDCraBz-cMaj6kixpZS2uvr_kVwx2V-tvjfaM_NA66

In [26]:
browser.quit()