#  Automate the boring stuff takeaways
 1. [Regular expressions](#Regular-expressions)
 2. [Working with Files](#Working-with-Files)
 3. [Web Scraping](#Web-Scraping)
 4. [Documents](#Documents)
 5. [Email](#Email)
 6. [GUI Automation](#GUI-Automation)


## Regular expressions

Regular expressions, called regex for short, are descriptions for a pattern of text.

're' module in Python:

- *re.compile()*: to create a regex object - *returns a regex object*

- *regex_object.search('string')*: to search for the pattern on string - *returns a match object*

- *match_object.group*: string of actual matched text.


| Shorthand | Represents |
| - | - | 
| \d | Any numeric digit from 0 to 9.|
| \D | Any character that is not a numeric digit from 0 to 9. |
| \w | Any letter, numeric digit, or the underscore character. |
| \W | Any character that is not a letter, numeric digit, or the underscore character. |
| \s | Any space, tab, or newline character. |
| \S | Any character that is not a space, tab, or newline. |
| \  | Escape characters |
| &#124; | Match one of many |
| ? | Optional match |
| * | Match zero or more |
| + | Match one or more |
| {n, m} | Match *n to m* repetitions |
| ^ | Match must occur at the beginning |
| $ | Match must occur at the end |
| . | Wildcard |


In [None]:
import re
import pyperclip


Examples


In [None]:
phoneRegex = re.compile(r'''(
    (\d{3}|\(\d{3}\))?              # area code
    (\s|-|\.)?                      # separator
    \d{3}                           # first 3 digits
    (\s|-|\.)                       # separator
    \d{4}                           # last 4 digits
    (\s*(ext|x|ext.)\s*\d{2,5})?    # extension
    )''', re.VERBOSE)

emailRegex = re.compile(r'''(
    [a-zA-Z0-9._%+-]+   # username
    @                   # @ symbol
    [a-zA-Z0-9.-]+      # domain name
    (\.[a-zA-Z]{2,4})   # dot-something
    )''', re.VERBOSE)
# Clipboard text
text = str(pyperclip.paste())

In [None]:
matches = []
for groups in phoneRegex.findall(text):
    matches.append(groups[0])
for groups in emailRegex.findall(text):
    matches.append(groups[0])

if len(matches) > 0:
    pyperclip.copy('\n'.join(matches))
    print('Copied to clipboard:')
    print('\n'.join(matches))
else:
    print('No phone numbers or email addresses found.')


### Working with Files

In [None]:
import os
import shelve
import pprint
import shutil
import zipfile

Working directory and paths

In [None]:
# current working directory
print('Current working directory: ')
print(os.getcwd())

In [None]:
# change directory
print('\nChange directory to \\test')
os.chdir('.\\test')
print('Current working directory: ')
print(os.getcwd())
os.chdir('..\\')

In [None]:
# Absolute and Relative paths
# Absolute paths always start with root folder, complete path
# Relative paths to program current working directory
absolute_path = r'D:\Documents\PycharmProjects\Automate-the-Boring-Stuff-w-Python'
relative_path = r'.\Automate-the-Boring-Stuff-w-Python'

In [None]:
# Relative to absolute path
print('\nAbsolute path to "test"')
print(os.path.abspath('test'))
print('Absolute path to "..\\test"')
print(os.path.abspath('..\\test'))

In [None]:
# Absolute path check
print('\nAbsolute path check: ')
print(os.path.isabs(absolute_path))
print(os.path.isabs(relative_path))

In [None]:
# Relative path from certain path (path, start) default start cwd
print('\nRelative path from {} to D:\\'.format(absolute_path))
print(os.path.relpath(absolute_path, 'D:\\'))

In [None]:
# Using path join and getting dir and base name
test_path = os.path.join(absolute_path, 'test', 'test_file.txt')
print('\nPath dir and base: ')
print(test_path)
print(os.path.dirname(test_path))
print(os.path.basename(test_path))
print(os.path.split(test_path))

In [None]:
# make directory
if not os.path.exists('.\\test\\makefolder'):
    os.makedirs('.\\test\\makefolder')


Open and read files

In [None]:
# Opening files
print('\nOpening file in {}'.format(test_path))
test_file = open(test_path)

In [None]:
# Reading files
test_content = test_file.read()
print('File content: {}'.format(test_content))

zen_file = open('.\\test\\zen.txt')
print(zen_file.readlines())

In [None]:
# Writing to files r=read w=write a=append
ender_file = open('ender.txt', 'w')
ender_file.write('The enemy gate is down\n')
ender_file.close()
ender_file = open('ender.txt', 'a')
ender_file.write('The giant\'s drink')
ender_file.close()
ender_file = open('ender.txt')
content = ender_file.read()
ender_file.close()
print(content)

In [None]:
# Storing variables with shelve
shelf_file = shelve.open('my_data')
heroes = ['Jim', 'Finn', 'Thrall']
shelf_file['heroes'] = heroes
shelf_file.close()

In [None]:
# retrieving
shelf_file = shelve.open('my_data')
print('\nStoring and retrieving data with shelve: ', shelf_file['heroes'])

In [None]:
# Data is stored like a dictionary
print(list(shelf_file.keys()))
print(list(shelf_file.values()))

In [None]:
# pretty print into .py file as a python variable and import
cats = [{'name': 'Zophie', 'desc': 'chubby'}, {'name': 'Pooka', 'desc': 'fluffy'}]
print(pprint.pformat(cats))
cats_file = open('my_cats.py', 'w')
cats_file.write('cats = ' + pprint.pformat(cats) + '\n')
cats_file.close()

Copy and move file and folders

In [None]:
# create copy folder
if not os.path.exists('.\\testcopy'):
    os.makedirs('.\\testcopy')

In [None]:
# copy file
if os.path.exists('test\\test_file.txt'):
    shutil.copy('test\\test_file.txt', 'testcopy\\test_file_copy.txt')

In [None]:
# copy directory
if not os.path.exists('.\\testbackup'):
    shutil.copytree('test', 'testbackup')

In [None]:
# move folder
if os.path.exists('test\\zen.txt'):
    shutil.move('test\\zen.txt', 'testcopy')

In [None]:
# rename by move to same folder with different name
if os.path.exists('test\\test_file.txt'):
    shutil.move('test\\test_file.txt', 'test\\test_file1.txt')
else:
    shutil.move('test\\test_file1.txt', 'test\\test_file.txt')

Delete files and folders

In [None]:
# delete single file
# os.unlink('testcopy\\test_file_copy.txt')

# delete empty folder
if not os.path.exists('test\\makefolder'):
    os.makedirs('test\\makefolder')
else:
    os.rmdir('test\\makefolder')

# Delete folder and contents
# if os.path.exists('.\\testbackup'):
#     shutil.rmtree('.\\testbackup')

Iterate over all folders and subfolders

In [None]:
for foldername, subfolders, filenames in os.walk('.\\'):
    print('Current folder: '+foldername)
    print('Current subfolders: '+str(subfolders))
    print('Current filenames: '+str(filenames))

Read zipped file

In [None]:
examplezip = zipfile.ZipFile('testcopy\\ender.zip')
print(examplezip.namelist())

Zipped file info

In [None]:
zenInfo = examplezip.getinfo('zen.txt')
print(zenInfo.file_size)
print(zenInfo.compress_size)
print('Compressed file is %sx smaller!' % (round(zenInfo.file_size / zenInfo.compress_size, 2)))

Extract zip file to cwd or especified path

In [None]:
# exampleZip.extractall()
# exampleZip.extract('name', 'path')
examplezip.close()

Create zip files

In [None]:
newZip = zipfile.ZipFile('new.zip', 'w')
newZip.write('ender.txt', compress_type=zipfile.ZIP_DEFLATED)
newZip.close()


### Web Scraping


In [None]:
import webbrowser
import requests
import bs4

Get Address from paperclip and open+append to google maps

In [None]:
address = pyperclip.paste()
webbrowser.open('https://www.google.com/maps/place/' + address)

Using request module to download files from the Web

In [None]:
# Get url
res = requests.get('https://automatetheboringstuff.com/files/rj.txt')
# Check for errors
try:
    res.raise_for_status()
except Exception as exc:
    print('There was a problem: %s' % exc)

# Save downloaded content to file
playFile = open('RomeoAndJuliet.txt', 'wb')
for chunk in res.iter_content(100000):
    playFile.write(chunk)
playFile.close()

Parse HTML data using beautiful soup


In [None]:
# Get url
res = requests.get('http://nostarch.com')
res.raise_for_status()
noStarchSoup = bs4.BeautifulSoup(res.text)
type(noStarchSoup)

In [None]:
# Find and print the names of books from the parsed HTML
books = noStarchSoup.select('div.field.field-name-body.field-type-text-with-summary.field-label-hidden > div > div > p > a')
for book in books:
    print(book.text)

Using Selenium to control browser


In [None]:
# Open google, click on images, fill form and submit
import time
from selenium import webdriver

driver = webdriver.Chrome()
driver.get('http://google.com')
time.sleep(1)
element = driver.find_element_by_css_selector('#gbw > div > div > div.gb_Fd.gb_f.gb_5f.gb_Vf > div:nth-child(2) > a')

element.click()

search_box = driver.find_element_by_xpath('//*[@id="tsf"]/div[2]/div/div[1]/div/div[1]/input')


search_box.send_keys('Automate the boring stuff')
search_box.submit()
time.sleep(3) 
driver.quit()


### Documents



Working with excel files


In [None]:
import openpyxl

Open and read data from workbooks


In [None]:
wb = openpyxl.load_workbook('example.xlsx')
sheet_names = wb.sheetnames
sheet1 = wb['Sheet1']
print(sheet_names)
type(sheet1)

In [None]:
# sheet1['B1'].value
for i in range(1, 8):
    print(i, sheet1.cell(row=i, column=2).value)

Editing excel documents


In [None]:
# Create, add data and save new excel
new_wb = openpyxl.Workbook()
sheet = new_wb['Sheet']
sheet['A1'] = 42
sheet['A2'] = 'Hello'
wb.save('new_wb.xlsx')

Reading PDF's


In [None]:
import PyPDF2

In [None]:
pdfFile = open('meetingminutes1.pdf', 'rb')
reader = PyPDF2.PdfFileReader(pdfFile)
print(reader.numPages)
page = reader.getPage(0)
print(page.extractText())

Combine pdf's

In [None]:
pdfFile2 = open('meetingminutes2.pdf', 'rb')
reader2 = PyPDF2.PdfFileReader(pdfFile2)
merger = PyPDF2.PdfFileMerger()
merger.append(reader)
merger.append(reader2)

merger.write("document-output.pdf")
# It is also possible to rotate, copy and slice pages

Working with Word documents

In [None]:
import docx
# Load and read paragraphs
doc = docx.Document('demo.docx')
len(doc.paragraphs)
print(doc.paragraphs[1].text)
paragraph = doc.paragraphs[1]
print(paragraph.runs[0].text)
print(paragraph.runs[1].bold)
# Change style
paragraph.runs[1].underline = True

New word document

In [None]:
new_doc = docx.Document()
new_doc.add_paragraph('Hello world')
new_doc.add_paragraph('Goodbye')
new_doc.save('new_doc.docx')


### Email


In [None]:
# Simple mail transfer protocol
import smtplib
# connect
smtObj = smtplib.SMTP('smtp-mail.outlook.com')
smtObj.ehlo()
smtObj.starttls()

In [None]:
import os

In [None]:
smtObj.login(os.environ['EMAIL'], os.environ['PASSWORD'])

In [None]:
smtObj.sendmail(os.environ['EMAIL'], 'perezmarquezfelix@gmail.com', 'Subject: Hello \n\n It\'s me')

In [None]:
smtObj.close()


### GUI Automation


In [None]:
import pyautogui

Controlling the mouse with Python

In [None]:
pyautogui.size()

In [None]:
# Move mouse to position
pyautogui.moveTo(960,540, duration=0.5)

In [None]:
# Move mouse relative
pyautogui.moveRel(500,100, duration=0.5)

In [None]:
# Retrieve mouse position and click
pyautogui.position()
pyautogui.click()
pyautogui.doubleClick()
# pyautogui.rightClick()
# pyautogui.middleClick()

Controlling the keyboard

In [None]:
pyautogui.moveTo(222, 52, duration=1)
pyautogui.doubleClick()
pyautogui.press('esc')
pyautogui.typewrite('google.com', interval=0.2)
#pyautogui.KEYBOARD_KEYS

Screenshots and image recognition

In [None]:
# Take screenshots

# pyautogui.screenshot('screenshot_example.png')

# Find the position of object in image

pyautogui.locateOnScreen('image.png') # locateCenterOnScreen
