# The 'webbrowser' module
Will open the web page of a web address passed to it in the default browser.

In [1]:
import webbrowser, sys, pyperclip

In [2]:
# Example use:

webbrowser.open('https://automatetheboringstuff.com')

True

In [5]:
"""
Create a program 'mapIt.py', run from the command line which will open up a map from Google maps of the address passed to it.

syntax:

mapIt.py <address>


example:

mapIt.py 5363 Joseph Lane San Jose CA

The program (stored in 'mapIt.py'):

#! /Users/gary/anaconda/envs/py3k/bin/python3

import webbrowser, sys, pyperclip

sys.argv # ['mapit.py', '870', 'Valencia', 'St.']

# Check if command line arguments were passed
if len(sys.argv) > 1:
    # ['mapit.py', '870', 'Valencia', 'St.'] -> '870 Valencia St.'
    address = ' '.join(sys.argv[1:])
else:
    address = pyperclip.paste()

# https://www.google.com/maps/place/<ADDRESS>
webbrowser.open('https://www.google.com/maps/place/' + address)

"""

sys.argv # ['mapit.py', '870', 'Valencia', 'St.']

# Check if command line arguments were passed
if len(sys.argv) > 1:
    # ['mapit.py', '870', 'Valencia', 'St.'] -> '870 Valencia St.'
    address = ' '.join(sys.argv[1:])
else:
    address = pyperclip.paste()
    
# https://www.google.com/maps/place/<ADDRESS>    
webbrowser.open('https://www.google.com/maps/place/' + address)

True

### The requests Module
https://requests.readthedocs.org/en/latest/

In [25]:
import requests

res = requests.get('https://automatetheboringstuff.com/files/rj.txt')

In [26]:
# status code 200: Everything OK

res.status_code

200

In [27]:
# Get the length of the returned object

len(res.text)

174130

In [24]:
print(res.text[:500])

ï»¿The Project Gutenberg EBook of Romeo and Juliet, by William Shakespeare

This eBook is for the use of anyone anywhere at no cost and with
almost no restrictions whatsoever.  You may copy it, give it away or
re-use it under the terms of the Project Gutenberg License included
with this eBook or online at www.gutenberg.org/license


Title: Romeo and Juliet

Author: William Shakespeare

Posting Date: May 25, 2012 [EBook #1112]
Release Date: November, 1997  [Etext #1112]

Language: English


*** S


In [17]:
# The '.raise_for_status()' method will return nothing if the download was successful and return an error if not successful.

res.raise_for_status()

In [18]:
# Error if file not found.

badRes = requests.get('http://automatetheboringstuff.com/nonexistantFile.txt')

In [19]:
badRes.raise_for_status()

HTTPError: 404 Client Error: Not Found for url: http://automatetheboringstuff.com/nonexistantFile.txt

In [28]:
# The 'open' function will open the file for reading but MUST pass it the 'wb' ('write binary') option in order to maintain Unicode encoding of the text (see http://bit.ly/unipain for more information). Here, 'playFile' is a stream object to a file called 'RomeoAndJuliet.txt'

playFile = open('RomeoAndJuliet.txt', 'wb')

In [29]:
# To write the stream to a file, use the 'iter_content()' method. Pass it the number of byte-sized chunks to write in each iteration of the 'for' loop. The 'bytes' data type is explained at http://bit.ly/unipain.

for chunk in res.iter_content(100000):
    playFile.write(chunk)

In [31]:
playFile.close()

### Web scraping: The BeautifulSoup Module
Import with 'import bs4'.

Allows the parsing of a web page (as a single character string).

Use browsers 'developer tools' to determine html strings.

In [58]:
import bs4, requests

In [59]:
res = requests.get('http://www.amazon.com/Automate-Boring-Stuff-Python-Programming/dp/1593275994/')

In [60]:
res.raise_for_status()

In [61]:
soup = bs4.BeautifulSoup(res.text, 'html.parser')

In [62]:
soup.select('a > h5 > div > div.a-column.a-span7.a-text-right.a-span-last > span.a-size-medium.a-color-price.header-price')

[<span class="a-size-medium a-color-price header-price">	
 	    
 	    	
 		        $24.65
         	
 	        
         
         </span>]

In [63]:
elems = soup.select('a > h5 > div > div.a-column.a-span7.a-text-right.a-span-last > span.a-size-medium.a-color-price.header-price')

In [64]:
elems[0].text.strip()

'$24.65'

### A program that does all the things above.

    import bs4, requests
    
    def getAmazonPrice(productUrl):
        res = requests.get(productUrl)
        res.raise_for_status()
        
        soup = bs4.BeautifulSoup(res.text, 'html.parser')
        elems = soup.select('a > h5 > div > div.a-column.a-span7.a-text-right.a-span-last > span.a-size-medium.a-color-price.header-price')
        return elems[0].text.strip()
    
    
    
example use:

price = getAmazonPrice('http://www.amazon.com/Automate-Boring-Stuff-Python-Programming/dp/1593275994')

print('The price is ' + price)

In [67]:
# Implementation of the above function.

import bs4, requests

def getAmazonPrice(productUrl):
    res = requests.get(productUrl)
    res.raise_for_status()
    
    soup = bs4.BeautifulSoup(res.text, 'html-parser')
    elems = soup.select('a > h5 > div > div.a-column.a-span7.a-text-right.a-span-last > span.a-size-medium.a-color-price.header-price')
    
    return elems[0].text.strip()

price = getAmazonPrice('http://www.amazon.com/Automate-Boring-Stuff-Python-Programming/dp/1593275994/')

print('The price is' + price)


FeatureNotFound: Couldn't find a tree builder with the features you requested: html-parser. Do you need to install a parser library?

### Selenium: Allows control of 'FireFox' webbrowser using Python
Must load it with 'from selenium import webdriver'

https://selenium-python-readthedocs.org

In [69]:
from selenium import webdriver

In [108]:
browser = webdriver.Firefox()

In [109]:
browser.get('https://automatetheboringstuff.com')

In [76]:
elem = browser.find_element_by_css_selector('.entry-content > ol:nth-child(20) > li:nth-child(1) > a:nth-child(1)')

In [77]:
elem.click()

In [78]:
elems = browser.find_elements_by_css_selector('p')
len(elems)

109

In [100]:
searchElem = browser.find_element_by_css_selector('.search-field')

In [101]:
searchElem.send_keys('zophie')

In [102]:
searchElem.submit()

In [103]:
browser.back()

In [104]:
browser.forward()

In [105]:
browser.refresh()

In [106]:
browser.quit()

In [110]:
elem = browser.find_element_by_css_selector('.entry-content > p:nth-child(6) > em:nth-child(1)')

In [111]:
elem.text

'Automate the Boring Stuff with Python'

In [112]:
elem = browser.find_element_by_css_selector('html')

In [113]:
elem.text

'Skip to content\nAutomate the Boring Stuff with Python\nLearn to Code\n\n(Skip to Table of Contents)\n"The best part of programming is the triumph of seeing the machine do something useful. Automate the Boring Stuff with Python frames all of programming as these small triumphs; it makes the boring fun."\n—Hilary Mason, Founder of Fast Forward Labs\nWil Wheaton on reading Automate the Boring Stuff with Python: "I’m having a lot of fun breaking things and then putting them back together, and just remembering the joy of turning a set of instructions into something useful and fun, like I did when I was a kid."\nAutomate the Boring Stuff with Python is recommended on the Open Source Summer 2015 Reading List!\nEveryone should learn to code, but not everyone needs to become a software engineer or computer scientist. Automate the Boring Stuff with Python is written for office workers, students, administrators, and anyone who uses a computer to learn how to code small, practical programs to au