In [1]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


# Spaces

In [2]:
for i in [1, 2, 3, 4, 5]:
    print(i)                    # first line in "for i" block
    for j in [1, 2, 3, 4, 5]:
        print(j)                # first line in "for j" block
        print(i + j)            # last line in "for j" block
    print(i)                    # last line in "for i" block
print("done looping")

1
1
2
2
3
3
4
4
5
5
6
1
2
1
3
2
4
3
5
4
6
5
7
2
3
1
4
2
5
3
6
4
7
5
8
3
4
1
5
2
6
3
7
4
8
5
9
4
5
1
6
2
7
3
8
4
9
5
10
5
done looping


In [3]:
long_winded_computation = (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 +
                           13 + 14 + 15 + 16 + 17 + 18 + 19 + 20)

In [4]:
list_of_lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [5]:
easier_to_read_list_of_lists = [[1, 2, 3],
                                [4, 5, 6],
                                [7, 8, 9]]

In [6]:
two_plus_three = 2 + \
                 3

In [7]:
for i in [1, 2, 3, 4, 5]:

    # notice the blank line
    print(i)

1
2
3
4
5


# Modules

In [8]:
import re
my_regex = re.compile("[0-9]+", re.I)

In [9]:
import re as regex
my_regex = regex.compile("[0-9]+", regex.I)

In [10]:
import matplotlib.pyplot as plt

In [11]:
from collections import defaultdict, Counter
lookup = defaultdict(int)
my_counter = Counter()

In [12]:
match = 10
from re import *    # re has a match function
print(match)

<function match at 0x0000014F084C2C18>


# Functions

In [13]:
def double(x):
    """This is where you put an optional docstring that explains what the
    function does. For example, this function multiplies its input by 2"""
    return x * 2

In [14]:
def apply_to_one(f):
    """Calls the function f with 1 as its argument"""
    return f(1)

In [15]:
my_double = double             # refers to the previously defined function
x = apply_to_one(my_double)    # equals 2

In [16]:
y = apply_to_one(lambda x: x + 4)      # equals 5

In [17]:
another_double = lambda x: 2 * x       # Don't do this

def another_double(x):                 # Do this instead
    return 2 * x

In [18]:
def my_print(message = "my default message"):
    print(message)

In [19]:
my_print("hello")   # prints 'hello'
my_print()          # prints 'my default message'

hello
my default message


In [20]:
# subtract function
def subtract(a=0, b=0):
    return a-b


subtract(10, 5)  # return 5
subtract(0, 5)   # return -5
subtract(b=5)    # return -5

-5

# Strings

In [21]:
single_quoted_string = 'data science'
double_quoted_string = "data science"

In [22]:
tab_string = "\t"       # represents the tab character
len(tab_string)         # is 1

1

In [23]:
not_tab_string = r"\t"  # represents the characters '\' and 't'
len(not_tab_string)     # is 2

2

In [24]:
multi_line_string = """This is the first line.
and this is the second line
and this is the third line"""

# Exception

In [25]:
try:
    print(0 / 0)
except ZeroDivisionError:
    print("cannot divide by zero")

cannot divide by zero


# Lists

In [26]:
integer_list = [1, 2, 3]
heterogeneous_list = ["string", 0.1, True]
list_of_lists = [integer_list, heterogeneous_list, []]

list_length = len(integer_list)     # equals 3
list_sum    = sum(integer_list)     # equals 6

In [27]:
x = list(range(10))

zero = x[0]          # equals 0, lists are 0-indexed
one = x[1]           # equals 1
nine = x[-1]         # equals 9, 'Pythonic' for last element
eight = x[-2]        # equals 8, 'Pythonic' for next-to-last element
x[0] = -1            # now x is [-1, 1, 2, 3, ..., 9]

In [28]:
first_three = x[:3]                 # [-1, 1, 2]
three_to_end = x[3:]                # [3, 4, ..., 9]
one_to_four = x[1:5]                # [1, 2, 3, 4]
last_three = x[-3:]                 # [7, 8, 9]
without_first_and_last = x[1:-1]    # [1, 2, ..., 8]
copy_of_x = x[:]                    # [-1, 1, 2, ..., 9]

In [29]:
1 in [1, 2, 3]    # True
0 in [1, 2, 3]    # False

False

In [30]:
x = [1, 2, 3]
x.extend([4, 5, 6])     # x is now [1, 2, 3, 4, 5, 6]

In [31]:
x = [1, 2, 3]
y = x + [4, 5, 6]       # y is [1, 2, 3, 4, 5, 6]; x is unchanged

In [32]:
x = [1, 2, 3]
x.append(0)      # x is now [1, 2, 3, 0]
y = x[-1]        # equals 0
z = len(x)       # equals 4

In [33]:
x, y = [1, 2]    # now x is 1, y is 2

In [34]:
_, y = [1, 2]    # now y == 2, didn't care about the first element

# Tuples

In [35]:
my_list = [1, 2]
my_tuple = (1, 2)
other_tuple = 3, 4
my_list[1] = 3      # my_list is now [1, 3]

In [36]:
my_tuple[1] = 3    # TypeError: 'tuple' object does not support item assignment

TypeError: 'tuple' object does not support item assignment

In [37]:
try:
    my_tuple[1] = 3
except TypeError:
    print("cannot modify a tuple")

cannot modify a tuple


In [38]:
def sum_and_product(x, y):
    return (x + y), (x * y)


sp = sum_and_product(2, 3)     # sp is (5, 6)
s, p = sum_and_product(5, 10)  # s is 15, p is 50

In [39]:
x, y = 1, 2     # now x is 1, y is 2
x, y = y, x     # Pythonic way to swap variables; now x is 2, y is 1

# Dictionaries

In [40]:
empty_dict = {}                     # Pythonic dictionary
empty_dict2 = dict()                # less Pythonic
grades = {"Joel": 80, "Tim": 95}    # dictionary literal

In [41]:
joels_grade = grades["Joel"]        # equals 80

In [42]:
try:
    kates_grade = grades["Kate"]
except KeyError:
    print("no grade for Kate!")

no grade for Kate!


In [43]:
joel_has_grade = "Joel" in grades     # True
kate_has_grade = "Kate" in grades     # False

In [44]:
joels_grade = grades.get("Joel", 0)   # equals 80
kates_grade = grades.get("Kate", 0)   # equals 0
no_ones_grade = grades.get("No One")  # default default is None

In [45]:
grades["Tim"] = 99                    # replaces the old value
grades["Kate"] = 100                  # adds a third entry
num_students = len(grades)            # equals 3

In [46]:
grades

{'Joel': 80, 'Tim': 99, 'Kate': 100}

In [47]:
tweet = {
    "user" : "joelgrus",
    "text" : "Data Science is Awesome",
    "retweet_count" : 100,
    "hashtags" : ["#data", "#science", "#datascience", "#awesome", "#yolo"]
}

In [48]:
tweet_keys   = tweet.keys()     # iterable for the keys
tweet_values = tweet.values()   # iterable for the values
tweet_items  = tweet.items()    # iterable for the (key, value) tuples

In [49]:
tweet_keys

dict_keys(['user', 'text', 'retweet_count', 'hashtags'])

In [50]:
tweet_values

dict_values(['joelgrus', 'Data Science is Awesome', 100, ['#data', '#science', '#datascience', '#awesome', '#yolo']])

In [51]:
tweet_items

dict_items([('user', 'joelgrus'), ('text', 'Data Science is Awesome'), ('retweet_count', 100), ('hashtags', ['#data', '#science', '#datascience', '#awesome', '#yolo'])])

In [52]:
"user" in tweet_keys            # True, but not Pythonic
"user" in tweet                 # Pythonic way of checking for keys
"joelgrus" in tweet_values      # True (slow but the only way to check)

True

### defaultdict

In [53]:
word_counts = {}
document = {}
for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

In [54]:
word_counts = {}
for word in document:
    try:
        word_counts[word] += 1
    except KeyError:
        word_counts[word] = 1

In [55]:
word_counts = {}
for word in document:
    previous_count = word_counts.get(word, 0)
    word_counts[word] = previous_count + 1

In [56]:
from collections import defaultdict   # dict subclass that calls a factory function to supply missing values

In [57]:
word_counts = defaultdict(int)          # int() produces 0
for word in document:
    word_counts[word] += 1

In [58]:
dd_list = defaultdict(list)             # list() produces an empty list
dd_list[2].append(1)                    # now dd_list contains {2: [1]}

dd_dict = defaultdict(dict)             # dict() produces an empty dict
dd_dict["Joel"]["City"] = "Seattle"     # {"Joel" : {"City": Seattle"}}

dd_pair = defaultdict(lambda: [0, 0])
dd_pair[2][1] = 1                       # now dd_pair contains {2: [0, 1]}

### Counter

In [59]:
from collections import Counter   # dict subclass for counting hashable objects

In [60]:
c = Counter([0, 1, 2, 0])          # c is (basically) {0: 2, 1: 1, 2: 1}

In [61]:
c

Counter({0: 2, 1: 1, 2: 1})

In [62]:
# recall, document is a list of words
word_counts = Counter(document)

In [63]:
# print the 10 most common words and their counts
for word, count in word_counts.most_common(10):
    print(word, count)

# Sets

In [64]:
s = set()
s.add(1)       # s is now {1}
s.add(2)       # s is now {1, 2}
s.add(2)       # s is still {1, 2}
x = len(s)     # equals 2
y = 2 in s     # equals True
z = 3 in s     # equals False

In [65]:
hundreds_of_other_words = []
stopwords_list = ["a", "an", "at"] + hundreds_of_other_words + ["yet", "you"]

In [66]:
"zip" in stopwords_list     # False, but have to check every element

False

In [67]:
item_list = [1, 2, 3, 1, 2, 3]
num_items = len(item_list)                # 6
item_set = set(item_list)                 # {1, 2, 3}
num_distinct_items = len(item_set)        # 3
distinct_item_list = list(item_set)       # [1, 2, 3]

# Control structures

In [68]:
if 1 > 2:
    message = "if only 1 were greater than two..."
elif 1 > 3:
    message = "elif stands for 'else if'"
else:
    message = "when all else fails use else (if you want to)"

In [69]:
parity = "even" if x % 2 == 0 else "odd"

In [70]:
x = 0
while x < 10:
    print(f"{x} is less than 10")
    x += 1

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [71]:
# range(10) is the numbers 0, 1, ..., 9
for x in range(10):
    print(f"{x} is less than 10")

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [72]:
for x in range(10):
    if x == 3:
        continue  # go immediately to the next iteration
    if x == 5:
        break     # quit the loop entirely
    print(x)

0
1
2
4


# Booleans

In [73]:
one_is_less_than_two = 1 < 2          # equals True
true_equals_false = True == False     # equals False

In [74]:
x = None
x == None   # this is the not the Pythonic way to check for None
x is None   # this is the Pythonic way to check for None

True

In [75]:
def some_function_that_returns_a_string():
    return ""


s = some_function_that_returns_a_string()
if s:
    first_char = s[0]
else:
    first_char = ""

In [76]:
first_char = s and s[0]

In [77]:
safe_x = x or 0

In [78]:
all([True, 1, {3}])   # True, all are truthy
all([True, 1, {}])    # False, {} is falsy
any([True, 1, {}])    # True, True is truthy
all([])               # True, no falsy elements in the list
any([])               # False, no truthy elements in the list

False

# Sorting

In [79]:
x = [4, 1, 2, 3]
y = sorted(x)     # y is [1, 2, 3, 4], x is unchanged
x.sort()          # now x is [1, 2, 3, 4]

In [80]:
# sort the list by absolute value from largest to smallest
x = sorted([-4, 1, -2, 3], key=abs, reverse=True)  # is [-4, 3, -2, 1]

In [81]:
# sort the words and counts from highest count to lowest
wc = sorted(word_counts.items(),
            key=lambda word_and_count: word_and_count[1], reverse=True)

# Sequences generators

In [82]:
even_numbers = [x for x in range(5) if x % 2 == 0]  # [0, 2, 4]
squares      = [x * x for x in range(5)]            # [0, 1, 4, 9, 16]
even_squares = [x * x for x in even_numbers]        # [0, 4, 16]

In [83]:
square_dict = {x: x * x for x in range(5)}  # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
square_set  = {x * x for x in [1, -1]}      # {1}

In [84]:
zeros = [0 for _ in even_numbers]      # has the same length as even_numbers

In [85]:
pairs = [(x, y)
         for x in range(10)
         for y in range(10)]   # 100 pairs (0,0) (0,1) ... (9,8), (9,9)

In [86]:
increasing_pairs = [(x, y)                       # only pairs with x < y,
                    for x in range(10)           # range(lo, hi) equals
                    for y in range(x + 1, 10)]   # [lo, lo + 1, ..., hi - 1]

# Generator functions and generator expression

In [87]:
def lazy_range(n):
    # range sequency's lazy version
    i = 0
    while i < n:
        yield 1
        i += 1

In [88]:
def natural_numbers():   # return 1, 2, 3 ...
    n = 1
    while True:
        yield n
        n += 1

In [89]:
# lazy list of even numbers less than 20
lazy_events_below_20 = (i for i in lazy_range(20) if i % 2 == 0)

# Random numbers

In [90]:
import random

In [91]:
# four uniform random variables 
four_uniform_randoms = [random.random() for _ in range(4)]

In [92]:
four_uniform_randoms     # random.random() produces numbers uniformly between 0 and 1
                         # it's the random function we'll usemost often

[0.7312609708089982,
 0.3699098828751294,
 0.8345050746286649,
 0.49561913861804774]

In [93]:
random.seed(10)         # set the seed to 10
print(random.random())  # 0.57140259469
random.seed(10)         # reset the seed to 10
print(random.random())  # 0.57140259469 again

0.5714025946899135
0.5714025946899135


In [94]:
random.randrange(10)    # choose randomly from range(10) = [0, 1, ..., 9]
random.randrange(3, 6)  # choose randomly from range(3, 6) = [3, 4, 5]

4

In [95]:
up_to_ten = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
random.shuffle(up_to_ten)
print(up_to_ten)        # results will probably be different

[5, 6, 9, 2, 3, 7, 8, 4, 1, 10]


In [96]:
my_best_friend = random.choice(["Alice", "Bob", "Charlie"])     # one of the meanings
my_best_friend

'Bob'

In [97]:
lottery_numbers = range(60)
winning_numbers = random.sample(lottery_numbers, 6)
winning_numbers

[4, 15, 47, 23, 2, 26]

In [98]:
four_with_replacement = [random.choice(range(10)) for _ in range(4)]
four_with_replacement

[2, 9, 5, 6]

# Regular expressions

In [99]:
import re

print(all([                                # all of these are true, because
    not re.match("a", "cat"),              #  'cat' doesn't start with 'a'
    re.search("a", "cat"),                 #  'cat' has an 'a' in it
    not re.search("c", "dog"),             #  'dog' doesn't have a 'c' in it
    3 == len(re.split("[ab]", "carbs")),   #  split on a or b to ['c','r','s']
    "R-D-" == re.sub("[0-9]", "-", "R2D2") #  replace digits with dashes
    ]))

True


# Object oriented programming

In [102]:
class Set:
    
    
    def __init__(self, values=None):
        s1 = Set()
        s2 = Set([1, 2, 2, 3])
        
        self.dict = {}
        
        if values is not None:
            for value in values: self.add(value)
                
                
    def __repr__(self):
        return "Set: " + str(self.dict.keys())
    
    
    def add(self, value):
        self.dict[value] = True
        
        
    def contains(self, value):
        return value in self.dict
    
    
    def remove(self, value):
        del self.dict[value]

# Functional programming tools

In [105]:
def exp(base, power):
    return base ** power

In [106]:
def two_to_the(power):
    return exp(2, power)

In [108]:
from functools import partial


two_to_the = partial(exp, 2)
two_to_the(3)

8

In [109]:
square_of = partial(exp, power=2)
square_of(3)

9

In [110]:
def double(x):
    return 2 * x


xs = [1, 2, 3, 4]
twice_xs = [double(x) for x in xs]
twice_xs = map(double, xs)
list_doubler = partial(map, double)
twice_xs = list_doubler(xs)

In [111]:
def multiply(x, y):
    return x * y


products = map(multiply, [1, 2], [4,5])   # [1 * 4, 2 * 5] = [4, 10]

In [117]:
# even check
def is_even(x):        # True if x is even, False if x is odd
    return x % 2 == 0


x_evens = [x for x in xs if is_even(x)]   # evens list = [2, 4]
x_evens = filter(is_even, xs)             # same as above
list_evener = partial(filter, is_even)    # function that filters the list
x_evens = list_evener(xs)                 # [2, 4] again

In [118]:
from functools import reduce

x_product = reduce(multiply, xs)          # = 1 * 2 * 3 * 4 = 24
list_product = partial(reduce, multiply)  # function that simplifies the list
x_product = list_product(xs)              # 24 again

# enumerate function

In [120]:
documents = []                            # a list of some documents, here it is empty

for i in range(len(documents)):           # not Pythonic
    document = documents[i]
    do_something(i, document)
    
i = 0                                     # not Pythonic too
for document in documents:
    do_something(i, document)
    i += 1
    
for i, document in enumerate(documents):  # Pythonic
    do_something(i, document)
    
for i in range(len(documents)):           # Pythonic
    do_something(i)

    for i, _ in enumerate(documents):     # Pythonic
        do_something(i)

# zip function and arguments unpacking

In [122]:
list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]
list(zip(list1, list2))    # is [('a', 1), ('b', 2), ('c', 3)]

[('a', 1), ('b', 2), ('c', 3)]

In [126]:
pairs = [('a', 1), ('b', 2), ('c', 3)]
letters, numbers = zip(*pairs)

letters, numbers = zip(('a', 1), ('b', 2), ('c', 3))  # [('a', 'b', 'c'), (1, 2, 3)]

In [127]:
letters, numbers

(('a', 'b', 'c'), (1, 2, 3))

In [128]:
def add(a, b):
    return a + b


add(1, 2)      # returns 3
add([1, 2])    # TypeError
add(*[1, 2])   # returns 3

TypeError: add() missing 1 required positional argument: 'b'

# args and kwargs variables

In [129]:
def doubler(f):
    # Here we define a new function that keeps a reference to f
    def g(x):
        return 2 * f(x)

    # And return that new function
    return g

In [130]:
def f1(x):
    return x + 1

In [131]:
g = doubler(f1)
g(3)              # (3 + 1) * 2 should equal 8
g(-1)             # (-1 + 1) * 2 should equal 0

0

In [132]:
def f2(x, y):
    return x + y

In [133]:
g = doubler(f2)
g(1, 2)           # TypeError

TypeError: g() takes 1 positional argument but 2 were given

In [134]:
def magic(*args, **kwargs):
    print("unnamed args:", args)
    print("keyword args:", kwargs)

magic(1, 2, key="word", key2="word2")  # prints unnamed args: (1, 2)
                                       # keyword args: {'key': 'word', 'key2': 'word2'}

unnamed args: (1, 2)
keyword args: {'key': 'word', 'key2': 'word2'}


In [135]:
def other_way_magic(x, y, z):
    return x + y + z

x_y_list = [1, 2]
z_dict = {"z": 3}
other_way_magic(*x_y_list, **z_dict)    # 1 + 2 + 3 should be 6

6

In [136]:
def doubler_correct(f):                  # works no matter what kind of inputs f expects
    def g(*args, **kwargs):              # whatever arguments g is supplied, pass them through to f
        return 2 * f(*args, **kwargs)
    return g

In [137]:
g = doubler_correct(f2)
g(1, 2)                                    # doubler should work now

6