## Scrape using requests and BeautifulSoup

In [1]:
# Imports:
import requests
from bs4 import BeautifulSoup
import time                      # for sleeping between multiple requests
import re                        # for regular expressions
headers = {'user-agent': 'scrapingCourseBot'}

In [2]:
r = requests.get('http://testing-ground.scraping.pro/blocks', headers=headers)
soup = BeautifulSoup(r.text, 'lxml')

# Retrieve first item name:
result = soup.find("div", class_="name").text
print(result)

Dell Latitude D610-1.73 Laptop Wireless Computer


In [3]:
# Retrieve all item names: Note: we scrape both tables now:
results = soup.find_all("div", class_="name")
for res in results:
    print(res.text)

Dell Latitude D610-1.73 Laptop Wireless Computer
Samsung Chromebook (Wi-Fi, 11.6-Inch)
Apple MacBook Pro MD101LL/A 13.3-Inch Laptop (NEWEST VERSION)
Acer Aspire AS5750Z-4835 15.6-Inch Laptop (Black)
HP Pavilion g7-2010nr 17.3-Inch Laptop (Black)
ASUS A53Z-AS61 15.6-Inch Laptop (Mocha)
Dell Latitude D610-1.73 Laptop Wireless Computer
Samsung Chromebook (Wi-Fi, 11.6-Inch)
Apple MacBook Pro MD101LL/A 13.3-Inch Laptop (NEWEST VERSION)
Acer Aspire AS5750Z-4835 15.6-Inch Laptop (Black)
HP Pavilion g7-2010nr 17.3-Inch Laptop (Black)
ASUS A53Z-AS61 15.6-Inch Laptop (Mocha)


## Scrape using Selenium / webdriver (headless Firefox)

In [4]:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options

In [5]:
profile = webdriver.FirefoxProfile()
profile.set_preference("general.useragent.override", "scrapingCourseBot")

options = Options()
options.headless = True

driver = webdriver.Firefox(profile, options=options)
driver.get('http://testing-ground.scraping.pro/blocks')

# Retrieve first item name:
elem = driver.find_element_by_css_selector('div.name')
print(elem.text)

Dell Latitude D610-1.73 Laptop Wireless Computer


In [7]:
# Retrieve all item names: Note: we scrape both tables now:
elems = driver.find_elements_by_css_selector('div.name')
for e in elems:
    print(e.text)
driver.quit()

Dell Latitude D610-1.73 Laptop Wireless Computer
Samsung Chromebook (Wi-Fi, 11.6-Inch)
Apple MacBook Pro MD101LL/A 13.3-Inch Laptop (NEWEST VERSION)
Acer Aspire AS5750Z-4835 15.6-Inch Laptop (Black)
HP Pavilion g7-2010nr 17.3-Inch Laptop (Black)
ASUS A53Z-AS61 15.6-Inch Laptop (Mocha)
Dell Latitude D610-1.73 Laptop Wireless Computer
Samsung Chromebook (Wi-Fi, 11.6-Inch)
Apple MacBook Pro MD101LL/A 13.3-Inch Laptop (NEWEST VERSION)
Acer Aspire AS5750Z-4835 15.6-Inch Laptop (Black)
HP Pavilion g7-2010nr 17.3-Inch Laptop (Black)
ASUS A53Z-AS61 15.6-Inch Laptop (Mocha)


## Scrape using Selenium / webdriver (headless Chrome)

In [17]:
options = webdriver.ChromeOptions()
options.headless = True
options.add_argument('user-agent=scrapingCourseBot')

driver = webdriver.Chrome(options=options)
driver.get('http://testing-ground.scraping.pro/blocks');

# Retrieve first item name:
elem = driver.find_element_by_css_selector('div.name')
print(elem.text)

Dell Latitude D610-1.73 Laptop Wireless Computer


In [13]:
# Retrieve all item names: Note: we scrape both tables now:
elems = driver.find_elements_by_css_selector('div.name')
for e in elems:
    print(e.text)

Dell Latitude D610-1.73 Laptop Wireless Computer
Samsung Chromebook (Wi-Fi, 11.6-Inch)
Apple MacBook Pro MD101LL/A 13.3-Inch Laptop (NEWEST VERSION)
Acer Aspire AS5750Z-4835 15.6-Inch Laptop (Black)
HP Pavilion g7-2010nr 17.3-Inch Laptop (Black)
ASUS A53Z-AS61 15.6-Inch Laptop (Mocha)
Dell Latitude D610-1.73 Laptop Wireless Computer
Samsung Chromebook (Wi-Fi, 11.6-Inch)
Apple MacBook Pro MD101LL/A 13.3-Inch Laptop (NEWEST VERSION)
Acer Aspire AS5750Z-4835 15.6-Inch Laptop (Black)
HP Pavilion g7-2010nr 17.3-Inch Laptop (Black)
ASUS A53Z-AS61 15.6-Inch Laptop (Mocha)


In [16]:
driver.quit()