# Part 1 - Whitespace Formatting

Python uses indentation to define control flow. This is different from many other languages, which use curly braces or keywords. This means that whitespace is significant in Python, and must be used properly.

In [8]:
for i in [1, 2, 3, 4, 5]:
    print("----------")
    print("Current i: ", i)
    print("")
    for j in [1, 2, 3, 4, 5]:
        print("j: ",  j)
        print("i + j: ",  (i + j))
        print("")
    print("Now i: ", i)

----------
Current i:  1

j:  1
i + j:  2

j:  2
i + j:  3

j:  3
i + j:  4

j:  4
i + j:  5

j:  5
i + j:  6

Now i:  1
----------
Current i:  2

j:  1
i + j:  3

j:  2
i + j:  4

j:  3
i + j:  5

j:  4
i + j:  6

j:  5
i + j:  7

Now i:  2
----------
Current i:  3

j:  1
i + j:  4

j:  2
i + j:  5

j:  3
i + j:  6

j:  4
i + j:  7

j:  5
i + j:  8

Now i:  3
----------
Current i:  4

j:  1
i + j:  5

j:  2
i + j:  6

j:  3
i + j:  7

j:  4
i + j:  8

j:  5
i + j:  9

Now i:  4
----------
Current i:  5

j:  1
i + j:  6

j:  2
i + j:  7

j:  3
i + j:  8

j:  4
i + j:  9

j:  5
i + j:  10

Now i:  5


# Part 2 - Modules

In [13]:
import re as regex

my_regex = regex.compile("[0-9]+", regex.I)

print(my_regex)

re.compile('[0-9]+', re.IGNORECASE)


# Part 3 - Functions

In [22]:
def triple(x):
    return x * 3

Python functions are first-class, which means that we can assign them to variables and pass them into functions just like any other arguments:

In [23]:
def apply_to_one(f):
    return f(1)

my_triple = triple
x = apply_to_one(my_triple)

print(x)

3


It is also easy to create short anonymous functions, or lambdas:

In [25]:
y = apply_to_one(lambda x: x + 4)

print(y)

5


# Part 4 - Strings 

In [26]:
# Single / Double quotes, but the quote must be the same
single_quoted_string = 'data science'
double_quoted_string = "data science"

# Backslash for special characters
tab_string = "\t"
print(len(tab_string))

1


In [27]:
# If you want to use backslash as a character, use raw string
not_tab_string = r"\t"
print(len(not_tab_string))

2


In [29]:
# Multiline strings
multi_line_string = """This is the first line.
This is the second line.
This is the third line."""

print(multi_line_string)

This is the first line.
This is the second line.
This is the third line.


In [30]:
# f-string
first_name = "Joel"
last_name = "Grus"

full_name1 = first_name + " " + last_name
full_name2 = "{0} {1}".format(first_name, last_name)
full_name3 = f"{first_name} {last_name}"

print(full_name1)
print(full_name2)
print(full_name3)

Joel Grus
Joel Grus
Joel Grus


# Part 5 - Exceptions

In [31]:
try:
    print(0 / 0)
except ZeroDivisionError:
    print("Cannot divide by zero")

Cannot divide by zero


# Part 6 - Lists

In [32]:
integer_list = [1, 2, 3]
heterogeneous_list = ["string", 0.1, True]
list_of_lists = [integer_list, heterogeneous_list, []]

print(len(integer_list))
print(sum(integer_list))

3
6


In [34]:
# List slicing
x = list(range(10))
print(x)

print(x[:5])
print(x[5:])
print(x[4:7])
print(x[1:-1])
print(x[::2])
print(x[1::2])
print(x[::-1])
print(x[::-2])
print(x[5:1:-1])

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 2, 3, 4]
[5, 6, 7, 8, 9]
[4, 5, 6]
[1, 2, 3, 4, 5, 6, 7, 8]
[0, 2, 4, 6, 8]
[1, 3, 5, 7, 9]
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
[9, 7, 5, 3, 1]
[5, 4, 3, 2]


In [35]:
# Check if a list contains a value
print(1 in [1, 2, 3])
print(0 in [1, 2, 3])

True
False


In [39]:
# Concatenate lists
x = [1, 2, 3]
x.extend([4, 5, 6])
print(x)

[1, 2, 3, 4, 5, 6]


In [41]:
# If you want to keep the original list, use the + operator
x = [1, 2, 3]
y = x + [4, 5, 6]
print(x)
print(y)

[1, 2, 3]
[1, 2, 3, 4, 5, 6]


In [42]:
# It's often convenient to unpack lists if you know how many elements they contain
x, y = [1, 2]
print(x)
print(y)

1
2


In [43]:
# If you don't need the second element, you can use _, else it will raise an error
_, y = [1, 2]
print(y)

2


# Part 7 - Tuples

Tuples are like lists, but they are immutable, meaning that they cannot be changed after they are created. They are also hashable, which means that they can be used as keys in dictionaries.

In [44]:
my_list = [1, 2]
my_tuple = (1, 2)
other_tuple = 3, 4
my_list[1] = 3

try:
    my_tuple[1] = 3
except TypeError:
    print("Cannot modify a tuple")

Cannot modify a tuple


In [48]:
# Tuples are a convenient way to return multiple values from functions
def sum_and_product(x, y):
    return x + y, x * y

sp = sum_and_product(3, 7)
print(sp)

s, p = sum_and_product(5, 10)
print(s,p)

(10, 21)
15 50


In [49]:
# Tuples (and lists) can also be used for multiple assignment
x, y = 1, 2
x, y = y, x

print(x, y)

2 1


# Part 8 - Dictionaries

In [52]:
empty_dict = {} # Pythonic
empty_dict2 = dict() # less Pythonic
grades = {"Joel": 80, "Tim": 95} # dictionary literal

# Get the value of a key
joels_grade = grades["Joel"]
print(joels_grade)

# But if the key doesn't exist, it will raise an error
try:
    kates_grade = grades["Kate"]
except KeyError:
    print("No grade for Kate!")

# Check if a key exists, this membership check is fast even for large dictionaries
joel_has_grade = "Joel" in grades
kate_has_grade = "Kate" in grades

print(joel_has_grade)
print(kate_has_grade)

80
No grade for Kate!
True
False


In [53]:
# Get method, which returns a default value if the key is not present instead of raising an exception
joels_grade = grades.get("Joel", 0)
kates_grade = grades.get("Kate", 0)
no_ones_grade = grades.get("No One")

print(joels_grade)
print(kates_grade)
print(no_ones_grade)


80
0
None


In [54]:
# Assigning a value to a key will create a new key
grades["Tim"] = 99
grades["Kate"] = 100
num_students = len(grades)
print(num_students)

3


In [60]:
# We will frequently use dictionaries as a simple way to represent structured data
tweet = {
    "user": "joelgrus",
    "text": "Data Science is Awesome",
    "retweet_count": 100,
    "hashtags": ["#data", "#science", "#datascience"]
}

# Look for all
tweet_keys = tweet.keys()
tweet_values = tweet.values()
tweet_items = tweet.items()

print(tweet_keys)
print(tweet_values)
print(tweet_items)

# Dictionary keys must be immutable; in particular, you cannot use lists as keys
# If you need a multipart key, you should use a tuple or figure out a way to turn the key into a string

dict_keys(['user', 'text', 'retweet_count', 'hashtags'])
dict_values(['joelgrus', 'Data Science is Awesome', 100, ['#data', '#science', '#datascience']])
dict_items([('user', 'joelgrus'), ('text', 'Data Science is Awesome'), ('retweet_count', 100), ('hashtags', ['#data', '#science', '#datascience'])])


# Part 9 - defaultdict

Imagine that you are trying to count the frequency of words in a document. You could use a dictionary, with the words as keys and the counts as values. 

In [62]:
word_counts = {}
for word in tweet["text"].split():
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

print(word_counts)

{'Data': 1, 'Science': 1, 'is': 1, 'Awesome': 1}


You could also use the "forgiveness is better than permission" approach and just handle the exception from trying to look up a missing key:

In [63]:
word_counts = {}
for word in tweet["text"].split():
    try:
        word_counts[word] += 1
    except KeyError:
        word_counts[word] = 1

print(word_counts)

{'Data': 1, 'Science': 1, 'is': 1, 'Awesome': 1}


A third approach is to use get, which behaves gracefully with missing keys:

In [65]:
word_counts = {}
for word in tweet["text"].split():
    previous_count = word_counts.get(word, 0)
    word_counts[word] = previous_count + 1

print(word_counts)

{'Data': 1, 'Science': 1, 'is': 1, 'Awesome': 1}


Every one of these is slightly unwieldy, which is why defaultdict is useful. A defaultdict is like a regular dictionary, except that when you try to look up a key it doesn't contain, it first adds a value for it using a zero-argument function you provided when you created it. In order to use defaultdicts, you have to import them from collections:

In [66]:
from collections import defaultdict

word_counts = defaultdict(int) # int() produces 0
for word in tweet["text"].split():
    word_counts[word] += 1

print(word_counts)

defaultdict(<class 'int'>, {'Data': 1, 'Science': 1, 'is': 1, 'Awesome': 1})


In [69]:
# They can also be useful with list or dict or even your own functions
dd_list = defaultdict(list) # list() produces an empty list
dd_list[2].append(1) # now dd_list contains {2: [1]}
print(dd_list)

defaultdict(<class 'list'>, {2: [1]})


In [70]:
dd_dict = defaultdict(dict)
dd_dict["Joel"]["City"] = "Seattle"
print(dd_dict)

defaultdict(<class 'dict'>, {'Joel': {'City': 'Seattle'}})


In [71]:
dd_pair = defaultdict(lambda: [0, 0])
dd_pair[2][1] = 1
print(dd_pair)

defaultdict(<function <lambda> at 0x000001BC2157A660>, {2: [0, 1]})


These will be useful when we're using dictionaries to "collect" results by some key and don't want to have to check every time to see if the key exists yet.

# Part 10 - Counter

A Counter turns a sequence of values into a defaultdict(int)-like object mapping keys to counts:

In [72]:
from collections import Counter

c = Counter([0, 1, 2, 0])
print(c)

Counter({0: 2, 1: 1, 2: 1})


In [73]:
# This gives you a very simple way to solve the word_counts problem
word_counts = Counter(tweet["text"].split())
print(word_counts)

Counter({'Data': 1, 'Science': 1, 'is': 1, 'Awesome': 1})


In [75]:
# A Counter instance has a most_common method that is frequently useful
for word, count in word_counts.most_common(2): # print the top 2 most common words and their counts
    print(word, count)

Data 1
Science 1


# Part 11 - Sets

Another data structure is set, which represents a collection of **_distinct_** elements. Defined by curly braces:

In [76]:
primes_below_10 = {2, 3, 5, 7}

In [78]:
# However, that doesn't work with an empty set, as {} already means an empty dictionary. In that case you will need to use set():
s = set()
s.add(1)
s.add(2)
s.add(2)
x = len(s)
y = 2 in s
z = 3 in s

print(s)
print(x)
print(y)
print(z)

{1, 2}
2
True
False


We will use sets for two main reasons: the in operation, a fast way to check if something is part of a set, and the ability to find the distinct items in a collection.

In [90]:
import time
import random
import string

# Generate a large list of random words
hundreds_of_other_words = [''.join(random.choices(string.ascii_lowercase, k=16)) for _ in range(1000000)]
stopwords_list = ["a", "an", "at"] + hundreds_of_other_words + ["yet", "you"]
stopwords_set = set(stopwords_list)

list_start_time = time.time()
print("zip" in stopwords_list) # False, but have to check every element
list_time = time.time() - list_start_time
print("time for list: ", list_time)

set_start_time = time.time()
print("zip" in stopwords_set) # very fast to check
set_time = time.time() - set_start_time
print("time for set: ", set_time)

print("Set is faster than list by percentage: ", (list_time - set_time) / list_time * 100)

False
time for list:  0.009048700332641602
False
time for set:  0.0
Set is faster than list by percentage:  100.0


In [91]:
# Finding distinct items using set
item_list = [1, 2, 3, 1, 2, 3]
num_items = len(item_list)

item_set = set(item_list)
num_distinct_items = len(item_set)
distinct_item_list = list(item_set)

print(num_items)
print(num_distinct_items)
print(distinct_item_list)

6
3
[1, 2, 3]


# Part 12 - Control Flow

In [92]:
if 1 > 2:
    message = "if only 1 were greater than two..."
elif 1 > 3:
    message = "elif stands for 'else if'"
else:
    message = "when all else fails use else (if you want to)"

In [93]:
# Can also write a ternary if-then-else on one line
parity = "even" if x % 2 == 0 else "odd"

In [94]:
# While loop
x = 0
while x < 10:
    print(f"{x} is less than 10")
    x += 1

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [95]:
# For loop
for x in range(10):
    print(f"{x} is less than 10")

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [96]:
# Continue and break
for x in range(10):
    if x == 3:
        continue
    if x == 5:
        break
    print(x)

0
1
2
4


# Part 13 - Truthiness

In [97]:
one_is_less_than_two = 1 < 2
true_equals_false = True == False

print(one_is_less_than_two)
print(true_equals_false)

True
False


In [100]:
# None is the Python null value type

x = None
assert x == 1, "Assert will raise an error if the condition is not met"
assert x == None, "This is not the Pythonic way to check for None"
assert x is None, "This is the Pythonic way to check for None"

AssertionError: Assert will raise an error if the condition is not met

List of "falsy" values:

In [102]:
# False
# None
# []
# {}
# ""
# set()
# 0
# 0.0

In [111]:
# All other values are True. This allows you to easily use if statements to check for empty lists, empty strings, None, etc. For example:

some_function_that_returns_a_string = lambda: None

s = some_function_that_returns_a_string()
if s:
    first_char = s[0]
else:
    first_char = ""

print(first_char)

# You can shorten it to
first_char = s and s[0]
print(first_char)


None


In [112]:
# Similarly, if x is either a number or possibly None
safe_x = x or 0
print(safe_x)

0


In [114]:
# Python has an "all" function, which takes a list and returns True precisely when every element is truthy, 
# and an "any" function, which returns True when at least one element is truthy

print(all([True, 1, {3}])) # True, all are truthy

print(all([True, 1, {}])) # False, {} is falsy
print(any([True, 1, {}])) # True, True is truthy

print(all([])) # True, no falsy elements in the list
print(any([])) # False, no truthy elements in the list

True
False
True
True
False


# Part 14 - Sorting

In [116]:
# If don't want to mess up the original list, use "sorted", which returns a new sorted list:
x = [4, 1, 2, 3]
y = sorted(x)
print(x)
print(y)

# sort the list in place
x.sort()
print(x)


[4, 1, 2, 3]
[1, 2, 3, 4]
[1, 2, 3, 4]


In [117]:
# Sort the list by absolute value from largest to smallest
x = sorted([-4, 1, -2, 3], key=abs, reverse=True)
print(x)

[-4, 3, -2, 1]


In [125]:
# Sort the words and counts from highest count to lowest
tweet["text"] = "This tweet is the tweet with lot of tweet words in it."

word_counts = Counter(tweet["text"].split())
wc = sorted(word_counts.items(), 
            key=lambda word_and_count: word_and_count[1], # Since word_counts is a dictionary, it will be a tuple of (word, count), so we need to sort by count
            reverse=True)
print(wc)

[('tweet', 3), ('This', 1), ('is', 1), ('the', 1), ('with', 1), ('lot', 1), ('of', 1), ('words', 1), ('in', 1), ('it.', 1)]


# Part 15 - List Comprehensions

In [126]:
even_numbers = [x for x in range(5) if x % 2 == 0]
print(even_numbers)

squares = [x * x for x in range(5)]
print(squares)

even_squares = [x * x for x in even_numbers] # [0, 2, 4] -> [0, 4, 16]
print(even_squares)

[0, 2, 4]
[0, 1, 4, 9, 16]
[0, 4, 16]


In [127]:
# You can also create a set or a dictionary using list comprehensions
square_dict = {x: x * x for x in range(5)}
print(square_dict)

square_set = {x * x for x in [1, -1]}
print(square_set)

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
{1}


In [128]:
# If you don't need the value from the list, it's common to use an underscore as the variable
zeroes = [0 for _ in even_numbers] # [0, 2, 4] -> [0, 0, 0]
print(zeroes)

[0, 0, 0]


In [131]:
# A list comprehension can include multiple for's
pairs = [(x, y)
         for x in range(5)
         for y in range(3)]

print(pairs)

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2), (3, 0), (3, 1), (3, 2), (4, 0), (4, 1), (4, 2)]


In [135]:
# And later for's can use the results of earlier ones
increasing_pairs = [(x, y)
                    for x in range(5)
                    for y in range(x + 1, 7)]

print(increasing_pairs)

[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (2, 3), (2, 4), (2, 5), (2, 6), (3, 4), (3, 5), (3, 6), (4, 5), (4, 6)]


# Part 16 - Automated Testing and assert

In [138]:
assert 1 + 1 == 2
assert 1 + 1 == 2, "1 + 1 should equal 2 but didn't"

In [140]:
def smallest_item(xs):
    return min(xs)

assert smallest_item([10, 20, 5, 40]) == 5
assert smallest_item([1, 0, -1, 2]) == -1
assert smallest_item([1, 0, -1, 2]) == 1, "This is wrong"

AssertionError: This is wrong

# Part 17 - Object-Oriented Programming

In [141]:
class CountingClicker:
    """A class can/should have a docstring, just like a function"""
    def __init__(self, count=0): # This is the constructor, or called "dunder" methods (Double-UNDERscore)
        self.count = count

clicker1 = CountingClicker()
clicker2 = CountingClicker(100)
clicker3 = CountingClicker(count=100)

assert clicker1.count == 0
assert clicker2.count == 100
assert clicker3.count == 100


In [142]:
# Another dunder method is __repr__, which should return a string representation of the object
class CountingClicker:
    def __init__(self, count=0):
        self.count = count
    def __repr__(self):
        return f"CountingClicker(count={self.count})"
    
    # And finally, implement public API of the class
    def click(self, num_times=1):
        """Click the clicker some number of times."""
        self.count += num_times
    def read(self):
        return self.count
    def reset(self):
        self.count = 0

clicker = CountingClicker()
assert clicker.read() == 0, "clicker should start with count 0"
clicker.click()
clicker.click()
assert clicker.read() == 2, "after two clicks, clicker should have count 2"
clicker.reset()
assert clicker.read() == 0, "after reset, clicker should be back to 0"

In [143]:
# Subclassing is a way to define a new class using an existing class as a base
class NoResetClicker(CountingClicker):
    # This class has all the same methods as CountingClicker
    # Except that it has a reset method that does nothing
    def reset(self):
        pass

clicker2 = NoResetClicker()
assert clicker2.read() == 0
clicker2.click()
assert clicker2.read() == 1
clicker2.reset()
assert clicker2.read() == 1, "reset shouldn't do anything"

# Part 18 - Iterables and Generators

- One nice thing about a list is that you can retrieve specific elements by their indices.
- But you don’t always need this! A list of a billion numbers takes up a lot of memory. 
- If you only want the elements one at a time, there’s no good reason to keep them all around. 
- If you only end up needing the first several elements, generating the entire billion is hugely wasteful.

- Often all we need is to iterate over the collection using for and in. 
- In this case we can create **_generators_**, which can be iterated over just like lists but generate their values lazily on demand.

In [144]:
def generate_range(n):
    i = 0
    while i < n:
        yield i # every call to yield produces a value of the generator
        i += 1

for i in generate_range(10):
    print(f"i: {i}")

i: 0
i: 1
i: 2
i: 3
i: 4
i: 5
i: 6
i: 7
i: 8
i: 9


In [151]:
# With a generator, you can even iterate over an infinite sequence
def natural_numbers():
    n = 1
    while True:
        yield n
        n += 1

# This will run forever
# for i in natural_numbers():
#     print(i)

In [152]:
# A second way to create generators is by using for comprehensions wrapped in parentheses
evens_below_20 = (i for i in generate_range(20) if i % 2 == 0)
print(evens_below_20)
print(list(evens_below_20))

<generator object <genexpr> at 0x000001BC2C1B1E50>
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]


In [153]:
# Such a "generator comprehension" doesn't do any work until you iterate over it (using for or next)
data = natural_numbers()
evens = (x for x in data if x % 2 == 0)
even_squares = (x ** 2 for x in evens)
even_squares_ending_in_six = (x for x in even_squares if x % 10 == 6)
# and so on

print(next(even_squares_ending_in_six))
print(next(even_squares_ending_in_six))
print(next(even_squares_ending_in_six))

16
36
196


In [154]:
# Enumerate is a built-in function that gives you an iterator of tuples, where the first tuple is the index and the second is the element
names = ["Alice", "Bob", "Charlie", "Debbie"]

for i, name in enumerate(names):
    print(f"name {i} is {name}")

name 0 is Alice
name 1 is Bob
name 2 is Charlie
name 3 is Debbie


# Part 19 - Randomness

In [155]:
import random
random.seed(10) # this ensures reproducibility

four_uniform_randoms = [random.random() for _ in range(4)]
print(four_uniform_randoms)

[0.5714025946899135, 0.4288890546751146, 0.5780913011344704, 0.20609823213950174]


randrange

In [160]:
random.randrange(10) # choose randomly from range(10) = [0, 1, ..., 9]

7

In [159]:
random.randrange(3, 6) # choose randomly from range(3, 6) = [3, 4, 5]

5

shuffle

In [161]:
# shuffle
up_to_ten = list(range(10))
random.shuffle(up_to_ten)
print(up_to_ten)

[6, 8, 4, 7, 0, 2, 9, 3, 1, 5]


choice

In [165]:
my_best_friend = random.choice(["Alice", "Bob", "Charlie"])
print(my_best_friend)

Charlie


sample

In [167]:
# Choose a sample of elements without replacement (i.e., with no duplicates)
lottery_numbers = range(60)
winning_numbers = random.sample(lottery_numbers, 6)
print(winning_numbers)

[23, 8, 29, 49, 55, 15]


In [197]:
# Choose a sample of elements with replacement (i.e., allowing duplicates)
four_with_replacement = [random.choice(range(10)) for _ in range(4)]
print(four_with_replacement)

[9, 7, 7, 2]


# Part 20 - Regular Expressions

Provides a way of searching text. They are incredibly useful but also fairly complicated, so much so that there are entire books written about them. Refers: https://docs.python.org/3/library/re.html

In [198]:
import re

re_examples = [
    not re.match("a", "cat"), # cat doesn't start with a
    re.search("a", "cat"), # cat has an a in it
    not re.search("c", "dog"), # dog doesn't have a c in it
    3 == len(re.split("[ab]", "carbs")), # split on a or b to ['c', 'r', 's']
    "R-D-" == re.sub("[0-9]", "-", "R2D2") # replace digits with dashes
]

assert all(re_examples), "all the regex examples should be True"

# Part 21 - Object-Oriented Programming

Avoid using Python functions "partial", "map", "reduce", and "filter". They are not Pythonic.

# Part 22 - zip and Argument Unpacking

In [199]:
list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]

# zip is lazy, so you have to do something like the following
pairs = [pair for pair in zip(list1, list2)]
print(pairs)

[('a', 1), ('b', 2), ('c', 3)]


In [200]:
# If the lists are different lengths, zip stops as soon as the first list ends
list1 = ['a', 'b', 'c']
list2 = [1, 2]
pairs = [pair for pair in zip(list1, list2)]
print(pairs)

[('a', 1), ('b', 2)]


In [202]:
# You can also "unzip" a list using a strange trick
letters, numbers = zip(*pairs)

print(letters)
print(numbers)

('a', 'b')
(1, 2)


In [204]:
# The asterisk performs argument unpacking, which uses the elements of pairs as individual arguments to zip. It ends up the same as if you'd called
letters, numbers = zip(('a', 1), ('b', 2))

print(letters)
print(numbers)

('a', 'b')
(1, 2)


In [206]:
# Can use argument unpacking with any function
def add(a, b): 
    return a + b

print(add(1, 2))

try:
    add([1, 2])
except TypeError:
    print("add expects two inputs")

print(add(*[1, 2]))

3
add expects two inputs
3


# Part 23 - args and kwargs

In [207]:
def doubler(f):
    def g(x):
        return 2 * f(x)
    return g

In [209]:
# This works in some cases
def f1(x):
    return x + 1

g = doubler(f1)
assert g(3) == 8, "(3 + 1) * 2 should equal 8"
assert g(-1) == 0, "(-1 + 1) * 2 should equal 0"

In [210]:
# However, it doesn't work with functions that take more than a single argument
def f2(x, y):
    return x + y

g = doubler(f2)
try:
    g(1, 2)
except TypeError:
    print("as defined, g only takes one argument")

as defined, g only takes one argument


In [211]:
# What we need is a way to specify a function that takes arbitrary arguments. We can do this with argument unpacking and a little bit of magic:
def magic(*args, **kwargs):
    print("unnamed args: ", args)
    print("keyword args: ", kwargs)

magic(1, 2, key="word", key2="word2")

# That is, when we define a function like this, args is a tuple of its unnamed arguments and kwargs is a dict of its named arguments.

unnamed args:  (1, 2)
keyword args:  {'key': 'word', 'key2': 'word2'}


In [212]:
# It works the other way too, if you want to use a list (or tuple) and dict to supply arguments to a function:
def other_way_magic(x, y, z):
    return x + y + z

x_y_list = [1, 2]
z_dict = {"z": 3}
assert other_way_magic(*x_y_list, **z_dict) == 6, "1 + 2 + 3 should be 6"

In [215]:
# Correct way
def doubler_correct(f):
    """works no matter what kind of inputs f expects"""
    def g(*args, **kwargs):
        """whatever arguments g is supplied, pass them through to f"""
        return 2 * f(*args, **kwargs)
    return g

g = doubler_correct(f2) # def f2(x, y): return x + y
assert g(1, 2) == 6, "doubler should work now"

# Part 24 - Type Annotations

In [217]:
def add(a, b):
    return a + b

assert add(10, 5) == 15, "+ is valid for numbers"
assert add([1, 2], [3]) == [1, 2, 3], "+ is valid for lists"
assert add("hi ", "there") == "hi there", "+ is valid for strings"

try:
    add(10, "five")
except TypeError:
    print("cannot add an int to a string")

cannot add an int to a string


In [218]:
# In a statically typed language, you might define a function that takes a number and returns a number like this:
def add(a: int, b: int) -> int: # This is a type annotation
    return a + b

add(10, 5) # You'd like this to be OK
add("hi ", "there") # You'd like this to be not OK

# However, Python is a dynamically typed language, which means that type hints are not enforced. 
# You can still call the function with arguments of any type, and as long as the operation in the function makes sense for those types, it will work.

'hi there'

In [219]:
# Type annotations can make your code easier to understand and reason about.
def total(xs: list) -> float:
    return sum(xs)

# This isn’t wrong, but the type is not specific enough. It’s clear we really want xs to be [a list of floats], not (say) [a list of strings].
from typing import List

def total(xs: List[float]) -> float:
    return sum(xs)

In [220]:
# This is how to type-annotate variables when you define them.
# But this is unnecessary; it's "obvious" x is an int.
x: int = 5

# However, sometimes it's less obvious from the initialization
values = [] # what type is values?
best_so_far = None # what type is best_so_far?

# In these cases, you can use a type annotation without an initialization
from typing import Optional

values: List[int] = []
best_so_far: Optional[float] = None # allowed to be either a float or None

In [221]:
# The typing module contains many other types, only a few of which we’ll ever use:

# The type annotations in this snippet are all unnecessary
from typing import Dict, Iterable, Tuple

# keys are strings, values are ints
counts: Dict[str, int] = {'data': 1, 'science': 2}

# lists and generators are both iterable
lazy = True
if lazy:
    evens: Iterable[int] = (x for x in range(10) if x % 2 == 0)
else:
    evens = [0, 2, 4, 6, 8]

# Tuples specify a type for each element
triple: Tuple[int, float, int] = (10, 2.3, 5)

In [223]:
# Finally, since Python has first-class functions, you can use functions as arguments to other functions:
from typing import Callable

# The type hint says that "repeater" is a function that takes two arguments (a string and an int) and returns a string.
def twice(repeater: Callable[[str, int], str], s: str) -> str:
    print(f'Calling repeater function with arguments: {s} and 2')
    result = repeater(s, 2)
    print(f'Repeater function returned: {result}')
    return result

def comma_repeater(s: str, n: int) -> str:
    print(f'Creating {n} copies of the string: {s}')
    n_copies = [s for _ in range(n)]
    print(f'Joining copies with commas: {n_copies}')
    result = ', '.join(n_copies)
    print(f'Comma repeater returned: {result}')
    return result

print('Calling twice function with comma_repeater and "type hints"')
assert twice(comma_repeater, "type hints") == "type hints, type hints"
print('Assertion passed')

Calling twice function with comma_repeater and "type hints"
Calling repeater function with arguments: type hints and 2
Creating 2 copies of the string: type hints
Joining copies with commas: ['type hints', 'type hints']
Comma repeater returned: type hints, type hints
Repeater function returned: type hints, type hints
Assertion passed


In [224]:
# As type annotations are not enforced, you can use them to provide additional documentation to your functions.

Number = int
Numbers = List[Number]

def total(xs: Numbers) -> Number:
    return sum(xs)