## The Python Standard Library

##### Module = smallest unit of code reusability. File containing Python definitions and statements
##### Package = logical collection of modules. Often bundles large products and broad functionality
##### Standard Library = collection of packages and modules. Distributed with Python by default
##### Script = Any Python code invoked as an executable. Usually from the command line

In [2]:
# 1)  Import from a Module

import math

# Any python file (including your own) can be a module
# from my_script import my_function, my_variable

In [3]:
# Import from a Package

# Packages give structure to modules
# __init__.py distinguishes packages from normal directories

In [4]:
# eg)

# sound/
# ├── __init__.py
# ├── effects/
# │ ├── __init__.py
# │ ├── echo.py
# │ ├── reverse.py
# │ └── surround.py

# from sound.effects import echo

In [5]:
# Import Conventions

# Prefer import ... instead of from ... import ...  Why? Explicit namespaces avoid name conflicts

# Avoid from ... import *  Why? Unclear what is being imported, strange behavior

In [6]:
# Executing Modules as Scripts

# Refresher: Running Modules as Scripts
# We can run a module (demo.py) as a script
# $ python3 demo.py # Doing so sets __name__ = '__main__'
# <output>

In [7]:
# Aside: Finding Modules

# if builtin module exists: load builtin module
# else:
# look for builtin module in the current directory of script
# look through PYTHONPATH
# look in installation default
# load if found, else raise ImportError

# Collections: container datatypes

### 1) collections.namedtuple : create tuple subclasses with named fields

In [9]:
# eg) 
import collections

Point = collections.namedtuple('Point', ['x', 'y'])

In [10]:
p = Point(11, y=22) # positional or keyword arguments

In [11]:
# Fields are accessible by name! "Readability counts."
-p.x, 2 * p.y # => -11, 44

(-11, 44)

In [12]:
# readable __repr__ with a name=value style
print(p) # Point(x=11, y=22)

Point(x=11, y=22)


In [13]:
# Subscriptable, like regular tuples
p[0] * p[1] # => 242

242

In [14]:
# Unpack, like regular tuples
x, y = p # x == 11, y == 22

In [15]:
x,y

(11, 22)

In [17]:
# Usually don't need to unpack if attributes have names

other = Point(11,43)

math.hypot(p.x - other.x, p.y - other.y)

21.0

In [18]:
# bad
# Can you guess the context of this code?
p = (170, 0.1, 0.6)
if p[1] >= 0.5:
    print("Whew, that is bright!")
if p[2] >= 0.5:
    print("Wow, that is light!")

Wow, that is light!


In [19]:
# good
Color = collections.namedtuple("Color",["hue", "saturation", "luminosity"])
pixel = Color(170, 0.1, 0.6)
if pixel.saturation >= 0.5:
    print("Whew, that is bright!")
if pixel.luminosity >= 0.5:
    print("Wow, that is light!")

Wow, that is light!


### 2) collections.defaultdict :  dict subclass with factory function for missing values

In [21]:
# Have:
input_data = [('yellow', 1), ('blue', 2),('yellow', 3), ('blue', 4), ('red', 1)]

# Want:
# output = {'blue': [2, 4], 'red': [1], 'yellow': [1, 3]}

In [22]:
# One approach
output = {}
for k, v in input_data:
    if k not in output:
        output[k] = []
        output[k].append(v)

print(output) # => {'blue': [2, 4], 'red': [1], 'yellow': [1, 3]}

{'yellow': [1], 'blue': [2], 'red': [1]}


In [23]:
# A better approach
output = collections.defaultdict(lambda: list())
for k, v in input_data:
    output[k].append(v)

print(output) # => {'red': [1], 'yellow': [1, 3], 'blue': [2, 4]})

defaultdict(<function <lambda> at 0x7f1b32ff3378>, {'yellow': [1, 3], 'blue': [2, 4], 'red': [1]})


In [24]:
dict(output)

{'yellow': [1, 3], 'blue': [2, 4], 'red': [1]}

In [25]:
# Zero-Argument Callable

In [26]:
# defaultdict with default value []
collections.defaultdict(lambda: list())
# equivalent to
collections.defaultdict(list)

defaultdict(list, {})

In [27]:
# example

In [28]:
# defaultdict with default value 0
collections.defaultdict(lambda: 0)
# equivalent to
collections.defaultdict(int)

defaultdict(int, {})

In [31]:
# example

s = 'mississippi'
# Want: d = {'i': 4, 'p': 2, 'm': 1, 's': 4}

In [33]:
d = collections.defaultdict(int) # or... lambda: 0

for letter in s:
    d[letter] += 1
    

print(d)# => defaultdict(<class 'int'>, {'i': 4, 'p': 2, 'm': 1, 's': 4})

defaultdict(<class 'int'>, {'m': 1, 'i': 4, 's': 4, 'p': 2})


In [34]:
dict(d)

{'m': 1, 'i': 4, 's': 4, 'p': 2}

### 3) collections.Counter: dict subclass for counting hashable objects

In [35]:
# Have 
s = 'mississippi'

# Want: [('s', 4), ('m', 1), ('i', 4), ('p', 2)]
# count of occurences of each alphabet

In [36]:
count = collections.Counter(s)

print(count) # => Counter({'i': 4, 'm': 1, 'p': 2, 's': 4})

Counter({'i': 4, 's': 4, 'p': 2, 'm': 1})


In [37]:
print(list(count.items())) # => [('s', 4), ('m', 1), ('i', 4), ('p', 2)]

[('m', 1), ('i', 4), ('s', 4), ('p', 2)]


In [38]:
list(count)

['m', 'i', 's', 'p']

In [39]:
count.values()

dict_values([1, 4, 4, 2])

In [43]:
# eg)
# Tally occurrences of words in a list

colors = ['red', 'blue', 'red', 'green', 'blue']

In [44]:
# One approach
counter = collections.Counter()
for color in colors:
    counter[color] += 1
    
print(counter) # => Counter({'blue': 2, 'green': 1, 'red': 2})

Counter({'red': 2, 'blue': 2, 'green': 1})


In [45]:
# A better approach
counter = collections.Counter(colors)
print(counter) # => Counter({'blue': 2, 'green': 1, 'red': 2})

Counter({'red': 2, 'blue': 2, 'green': 1})


In [47]:
# Get most common elements!
collections.Counter('abracadabra').most_common(3) # => [('a', 5), ('b', 2), ('r', 2)]

[('a', 5), ('b', 2), ('r', 2)]

In [51]:
# Supports basic arithmetic

In [52]:
collections.Counter('which') + collections.Counter('witch') # => Counter({'c': 2, 'h': 3, 'i': 2, 't': 1, 'w': 2})

Counter({'w': 2, 'h': 3, 'i': 2, 'c': 2, 't': 1})

In [53]:
collections.Counter('abracadabra') - collections.Counter('alakazam') # => Counter({'a': 1, 'b': 2, 'c': 1, 'd': 1, 'r': 2})

Counter({'a': 1, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

## re : Regular expression operations

#### "regular expression" == "search pattern" for strings

##### 1) Search for pattern match anywhere in string; return None if not found

In [55]:
import re

m = re.search(r"(\w+) (\w+)", "Physicist Isaac Newton")
print(m.group(0)) # "Isaac Newton" - the entire match
print(m.group(1)) # "Isaac" - first parenthesized subgroup
print(m.group(2)) # "Newton" - second parenthesized subgroup

Physicist Isaac
Physicist
Isaac


##### 2) Match pattern against start of string; return None if not found

In [57]:
m = re.match(r"(?P<fname>\w+) (?P<lname>\w+)", "Malcolm Reynolds")
print(m.group('fname')) # => 'Malcolm'
print(m.group('lname')) # => 'Reynolds'

Malcolm
Reynolds


##### 3) Substitute occurrences of one pattern with another

In [59]:
re.sub(r'@\w+\.com', '@stanford.edu', 'sam@go.com poohbear@bears.com')# => sam@stanford.edu poohbear@stanford.edu

'sam@stanford.edu poohbear@stanford.edu'

##### 4) Compile patterns

In [62]:
pattern = re.compile(r'[a-z]+[0-9]{3}') # compile pattern for fast ops
match = re.search(pattern, '@@@abc123') # pattern is first argument
print(match.span()) # (3, 9)

(3, 9)


example

Write a regular expression to match a phone number like
650 867-5309
Hint: \d captures [0-9], i.e. any digit
Hint: \d{3} captures 3 consecutive digits

is_phone("650 867-5309") # => True
is_phone("650.867.5309") # => False

Hint: Use named groups to return the area code

In [64]:
# solution

def is_phone(num):
    return bool(re.match('\d{3} \d{3}-\d{4}', num))

def get_area_code(num):
    m = re.match('(?P<areacode>\d{3}) \d{3}-\d{4}', num)
    if not m:
        return None
    return m.group('areacode')

In [65]:
is_phone("650 867-5309")

True

In [66]:
get_area_code("650 867-5309")

'650'

### 4) collections.Counter and re

In [69]:
# Find the three most common words in Hamlet
import re

with open('hamlet.txt') as f:
    words = re.findall(r'\w+', f.read().lower())

print(collections.Counter(words).most_common(3))# => [('the', 1091), ('and', 969), ('to', 767)]

FileNotFoundError: [Errno 2] No such file or directory: 'hamlet.txt'

# itertools : iterators for efficient looping

## 1) Combinatorics

In [73]:
import itertools

def view(it):
    print(*[''.join(els) for els in it])

In [75]:
print(view(itertools.product('ABCD', 'EFGH'))) # => AE AF AG AH BE BF BG BH CE CF CG CH DE DF DG DH


AE AF AG AH BE BF BG BH CE CF CG CH DE DF DG DH
None


In [76]:
print(view(itertools.product('ABCD', repeat=2))) # => AA AB AC AD BA BB BC BD CA CB CC CD DA DB DC DD

AA AB AC AD BA BB BC BD CA CB CC CD DA DB DC DD
None


In [77]:
print(view(itertools.permutations('ABCD', 2))) # => AB AC AD BA BC BD CA CB CD DA DB DC

AB AC AD BA BC BD CA CB CD DA DB DC
None


In [78]:
print(view(itertools.combinations('ABCD', 2))) # => AB AC AD BC BD CD

AB AC AD BC BD CD
None


In [79]:
print(view(itertools.combinations_with_replacement('ABCD', 2))) # => AA AB AC AD BB BC BD CC CD D

AA AB AC AD BB BC BD CC CD DD
None


## 2) Infinite Iterators

In [84]:
# start, [step] -> start, start + step, ...
itertools.count(10) # -> 10, 11, 12, 13, 14, ...

count(10)

In [85]:
# Cycle through elements of an iterable
itertools.cycle('ABC') # -> 'A', 'B', 'C', 'A', ...

<itertools.cycle at 0x7f1b32703048>

In [86]:
# Repeat a single element over and over.
itertools.repeat(10) # -> 10, 10, 10, 10, ...

repeat(10)

## 3) json:  JSON encoder and decoder

In [92]:
import json

squares = {1:1, 2:4, 3:9, 4:16}

In [95]:
# Serialize to/from string

output = json.dumps(squares) # output == "{1:1, 2:4, 3:9, 4:16}"

In [96]:
print(type(output),output)

<class 'str'> {"1": 1, "2": 4, "3": 9, "4": 16}


In [97]:
json.loads(output) # => {1:1, 2:4, 3:9, 4:16}

{'1': 1, '2': 4, '3': 9, '4': 16}

In [99]:
# Serialize to/from file

with open('tmp.json', 'w') as outfile:
    json.dump(squares, outfile)

with open('tmp.json', 'r') as infile:
    input = json.load(infile)

In [100]:
# All variants support useful keyword arguments
json.dumps(squares, indent=4, sort_keys=True, separators=(',', ': '))

'{\n    "1": 1,\n    "2": 4,\n    "3": 9,\n    "4": 16\n}'

In [1]:
# functools 
# python one liners

## 4) random : Generate pseudo-random numbers

In [5]:
import random

# Random float x with 0.0 <= x < 1.0
print(random.random()) # => 0.37444887175646646

# Random float x, 1.0 <= x < 10.0
print(random.uniform(1, 10)) # => 1.1800146073117523

# Random integer from 1 to 6 (inclusive)
print(random.randint(1, 6)) # => 4 (https://xkcd.com/221/)

# Random integer from 0 to 9 (inclusive)
print(random.randrange(10)) # => 7

# Random even integer from 0 to 100 (inclusive)
print(random.randrange(0, 101, 2)) # => 26

# Choose a single element
print(random.choice('abcdefghij')) # => 'c'

items = [1, 2, 3, 4, 5, 6, 7]
random.shuffle(items)
print(items) # => [7, 3, 2, 5, 6, 4, 1]

# k samples without replacement
print(random.sample(range(5), k=3)) # => [3, 1, 4]

# Sample from statistical distributions (others exist)
print(random.normalvariate(mu=0, sigma=3)) # => 2.373780578271

0.04280974235063639
1.8158373774217313
6
8
72
h
[4, 7, 6, 1, 3, 5, 2]
[4, 2, 1]
1.535517085270559


# 5) sys : System-specific parameters and functions

In [8]:
import sys

# Open file objects for standard input, error, output
# sys.stdin ('r') / sys.stderr ('w') / sys.stdout ('w')
sys.stdin.readline()
sys.stderr.write('hello world\n')
sys.stdout.flush()
# Raise SystemExit
sys.exit(arg)

hello world


NameError: name 'arg' is not defined

In [10]:
# What if we want to do something like...
# $ python3 -i demo.py <arguments>
# sys.argv to the rescue

# File: demo.py
if __name__ == '__main__':
    import sys
    print(sys.argv)

python3 demo.py 1 2 3
['demo.py', '1', '2', '3']

python3 subdir/../demo.py foo
['subdir/../demo.py', 'foo']

# For more advanced command line tools,use argparse (if needed, cmd and getopt)

SyntaxError: invalid syntax (<ipython-input-10-04c6d50f1890>, line 10)

### 1) pathlib — Object-oriented filesystem paths

In [14]:
import pathlib

p = pathlib.Path('/etc')
q = p / 'ssh' # Overloaded __div__ method
q # => PosixPath('/etc/ssh')

PosixPath('/etc/ssh')

In [15]:
q.exists() # => True

True

In [16]:
q.is_dir() # => True

True

In [17]:
# Print all python files somewhere in the current dir
p = pathlib.Path.cwd() # Current working directory
for f in p.glob('**/*.py'):
    print(f)

/home/dipesh/python-deliberate-practice/cs40/one.py
/home/dipesh/python-deliberate-practice/cs40/two.py
/home/dipesh/python-deliberate-practice/cs40/tldr.py
/home/dipesh/python-deliberate-practice/cs40/eight.py
/home/dipesh/python-deliberate-practice/cs40/five.py
/home/dipesh/python-deliberate-practice/cs40/seven.py
/home/dipesh/python-deliberate-practice/cs40/ten.py
/home/dipesh/python-deliberate-practice/cs40/four.py
/home/dipesh/python-deliberate-practice/cs40/three.py
/home/dipesh/python-deliberate-practice/cs40/nine.py
/home/dipesh/python-deliberate-practice/cs40/ref.py
/home/dipesh/python-deliberate-practice/cs40/six.py


### 2) subprocess and shlex

In [21]:
import subprocess

subprocess.call(["ls", "-l"]) # => Automatically authenticate to Myth servers
command = "kinit name@myth.stanford.edu --keytab=/etc/some-keytab"
args = shlex.split(command) # args = ["kinit", ... ]
subprocess.call(args) # => 0

# For more complex needs, use Popen
# Emulate 'ps aux | grep Spotify'
sp_ps = subprocess.Popen(["ps", "aux"], stdout=subprocess.PIPE)
sp_grep = subprocess.Popen(["grep", "Spotify"], stdin=sp_ps.stdout)


NameError: name 'shlex' is not defined

### Debugging Tools

###### 1) pprint — data pretty printer

In [22]:
ugly = {
    'data': {
        'after': 't3_3q8aog',
        'before': None,
        'kind': 'pagination',
        'children': [{'a':1}, {'a':2}, {'b':1}, {}],
        'uuid': '40b6f818'
    }
}
ugly['recursive'] = ugly # Contains recursive reference

print(ugly)
# {'data': {'before': None, 'kind': 'pagination',
# 'uuid': '40b6f818', 'after': 't3_3q8aog', 'children':
# [{'a': 1}, {'a': 2}, {'b': 1}, {}]}, 'recursive': {...}}

{'data': {'after': 't3_3q8aog', 'before': None, 'kind': 'pagination', 'children': [{'a': 1}, {'a': 2}, {'b': 1}, {}], 'uuid': '40b6f818'}, 'recursive': {...}}


In [23]:
import pprint

pprint.pprint(ugly, width=56, depth=2)
# {'data': {'after': 't3_3q8aog',
# 'before': None,
# 'children': [...],
# 'kind': 'pagination',
# 'uuid': '40b6f818'},
# 'recursive': <Recursion on dict with id=4372885384>}

{'data': {'after': 't3_3q8aog',
          'before': None,
          'children': [...],
          'kind': 'pagination',
          'uuid': '40b6f818'},
 'recursive': <Recursion on dict with id=140405718608632>}


##### 2) timeit - time short snippets

In [25]:
# Command Line Interface
python3 -m timeit '"-".join(str(n) for n in range(100))'
# 10000 loops, best of 3: 30.2 usec per loop
python3 -m timeit '"-".join([str(n) for n in range(100)])'
# 10000 loops, best of 3: 27.5 usec per loop
python3 -m timeit '"-".join(map(str, range(100)))'
# 10000 loops, best of 3: 23.2 usec per loop

SyntaxError: invalid syntax (<ipython-input-25-651e1ec30e91>, line 2)

In [27]:
# Python Interface
import timeit

timeit.timeit('"-".join(str(n) for n in range(100))', number=10000) # => 0.3018611848820001

0.6407003019999138

In [28]:
timeit.timeit('"-".join([str(n) for n in range(100)])', number=10000) # => 0.2727368790656328

0.5675450219998766

In [29]:
timeit.timeit('"-".join(map(str, range(100)))', number=10000) # => 0.23702679807320237

0.3037543929999629

## "Cute" Modules

In [32]:
import unicodedata

# 1) turtle — Turtle graphics
# 2) unicodedata — Unicode Database
print(unicodedata.lookup('SLICE OF PIZZA'))
# 3) antigravity

🍕


In [40]:
# Builtin Functions
# 1) Common One-Liners
print(any([True, True, False])) # => True
print(all([True, True, False])) # => False

True
False


In [42]:
print(int('45')) # => 45
print(int('0x2a', 16)) # => 42
print(int('1011', 2)) # => 11
print(hex(42)) # => '0x2a'
print(bin(42)) # => '0b101010'
print(ord('a')) # => 97
print(chr(97)) # => 'a'
print(round(123.45, 1)) # => 123.4
print(round(123.45, -2)) # => 100
print(max(2, 3)) # => 3
print(max([0, 4, 1])) # => 4
print(min(['apple', 'banana', 'pear'], key=len)) # => 0
print(sum([3, 5, 7])) # => 15
print(pow(3, 5)) # => 243 (= 3 ** 5)
print(pow(3, 5, 10)) # => 3 (= (3 ** 5) % 10, efficiently)

quotient, remainder = divmod(10, 6) # quotient, remainder => (1, 4)
print(quotient,remainder )

45
42
11
0x2a
0b101010
97
a
123.5
100.0
3
4
pear
15
243
3
1 4


In [43]:
# Flatten a list of lists (slower than itertools.chain)
sum([[3, 5], [1, 7], [4]], []) # => [3, 5, 1, 7, 4]

[3, 5, 1, 7, 4]

In [44]:
# Other Modules
# 6.1. string — Common string operations
# 7.1. struct — Interpret bytes as packed binary data
# 8.1. datetime — Basic date and time types
# 9.5. fractions — Rational numbers
# 9.7. statistics — Mathematical statistics functions
# 10.3. operator — Standard operators as functions
# 12.1. pickle — Python object serialization
# 14.1. csv — CSV File Reading and Writing
# 16.1. os — Miscellaneous operating system interfaces
# 16.3. time — Time access and conversions
# 16.4. argparse — Parser for command-line options,arguments and sub-commands
# 16.6. logging — Logging facility for Python
# 17.1. threading — Thread-based parallelism
# 17.2. multiprocessing — Process-based parallelism
# 18.1. socket — Low-level networking interface
# 18.5. asyncio – Asynchronous I/O, event loop, coroutines and tasks
# 18.8. signal — Set handlers for asynchronous events
# 26.3. unittest — Unit testing framework
# 26.6. 2to3 - Automated Python 2 to 3 code translation
# 27.3. pdb — The Python Debugger
# 27.6. trace — Trace or track Python statement execution
# 29.12. inspect — Inspect live objects