# Chapter 2 - Crash course in Python

In [1]:
from __future__ import division

## Functions

In [25]:
def double(x):
    """This Function multiplies its input by 2"""
    return x*2
def apply_to_one(f):
    """Calls the function f with one as its argument"""
    return f(1)

my_double = double
x = apply_to_one(my_double)

print x

2


In [8]:
y = apply_to_one(lambda x: x + 4)
print y

5


In [12]:
def my_print(message="my default message"):
    print message
    
my_print("hello")
my_print()

hello
my default message


In [10]:
def subtract(a=0,b=0):
    return a-b

print subtract(10,5)
print subtract(0,5)
print subtract(b=5)

5
-5
-5


## Strings

In [20]:
single_quoted_string = 'data science'
double_quoted_string = "data science"
multi_line_string = """This is the First line.
This is the Second line.
This is the Third line"""
print(single_quoted_string)
print(double_quoted_string)
print(multi_line_string)

data science
data science
This is the First line.
This is the Second line.
This is the Third line


In [18]:
tab_string = "\t"
print len(tab_string)
not_tab_string = r"\t"
print len(not_tab_string)

1
2


## Exceptions

In [22]:
try:
    print 0/0
except ZeroDivisionError:
    print "cannot divide by zero"

cannot divide by zero


## Lists

In [26]:
integer_list = [1,2,3]
heterogeneous_list = ["String",0.1,True]
list_of_lists = [integer_list, heterogeneous_list, []]

print(list_of_lists)
print len(integer_list)
print sum(integer_list)

[[1, 2, 3], ['String', 0.1, True], []]
3
6


In [27]:
x = range(10)
print x[0]
print x[1]
print x[-1]
print x[-2]
x[0] = -1
print x

0
1
9
8
[-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [28]:
print x[:3]
print x[3:]
print x[1:5]
print x[-3:]
print x[1:-1]
print x[:]

[-1, 1, 2]
[3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4]
[7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8]
[-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [31]:
1 in x

True

In [32]:
0 in x

False

In [34]:
x = [1,2,3]
x.extend([4,5,6])
print x

[1, 2, 3, 4, 5, 6]


In [35]:
y = x + [7,8,9]
print x
print y

[1, 2, 3, 4, 5, 6]
[1, 2, 3, 4, 5, 6, 7, 8, 9]


In [36]:
x.append(0)
print x
print y

[1, 2, 3, 4, 5, 6, 0]
[1, 2, 3, 4, 5, 6, 7, 8, 9]


In [37]:
x,y = [1,2]
print x
print y

1
2


## Tuples

In [38]:
my_list = [1,2]
my_tuple = (1,2)
other_tuple = 3,4
my_list[1] = 3
print my_list

try:
    my_tuple[1] = 3
except TypeError:
    print "cannot modify a tuple"

[1, 3]
cannot modify a tuple


Tuples are a convenient way to return multiple values from functions:

In [39]:
def sum_and_product(x,y):
    return (x+y),(x*y)

sp = sum_and_product(2,3)
s,p = sum_and_product(5,10)

print sp
print s
print p

(5, 6)
15
50


In [1]:
x,y = 1,2
x,y = y,x # Pythonic way to swap variables
print x
print y

2
1


## Dictionaries

In [2]:
empty_dict = {}
grades = {"Joel" : 80,"Tim":95}
print grades["Joel"]
try:
    print grades["Kate"]
except KeyError:
    print "no grade for Kate"

80
no grade for Kate


In [3]:
print "Joel" in grades
print "Kate" in grades

True
False


In [4]:
print grades.get("Joel",0)
print grades.get("Kate",0)
print grades.get("no one")

80
0
None


In [5]:
grades["Tim"] = 99
grades["Kate"] = 100
print len(grades)
print "Kate" in grades

3
True


In [6]:
tweet = {
    "user" : "joelgrus",
    "text" : "Data science is awesome",
    "retweet_count" : 100,
    "hashtags" : ["#data","#science","#datascience","#awesome"]
}

In [7]:
print tweet.keys()

['text', 'retweet_count', 'hashtags', 'user']


In [8]:
print tweet.values()

['Data science is awesome', 100, ['#data', '#science', '#datascience', '#awesome'], 'joelgrus']


In [9]:
print tweet.items()

[('text', 'Data science is awesome'), ('retweet_count', 100), ('hashtags', ['#data', '#science', '#datascience', '#awesome']), ('user', 'joelgrus')]


In [10]:
"user" in tweet

True

In [12]:
"joelgrus" in tweet.values()

True

defaultdict

In [14]:
from collections import defaultdict

document = """Hello I am a Document, my name is Document McDocumentFace
I like to repeat myself
to repeat myself is my favorite thing
repeating myself makes me a happy Document"""

word_counts = defaultdict(int)
for word in document:
    word_counts[word] += 1

print word_counts

defaultdict(<type 'int'>, {'\n': 3, ' ': 25, ',': 1, 'D': 4, 'F': 1, 'I': 2, 'H': 1, 'M': 1, 'a': 11, 'c': 6, 'e': 20, 'g': 2, 'f': 4, 'i': 6, 'h': 2, 'k': 2, 'm': 13, 'l': 6, 'o': 8, 'n': 7, 'p': 5, 's': 6, 'r': 4, 'u': 4, 't': 11, 'v': 1, 'y': 6})


In [15]:
dd_list = defaultdict(list)
dd_list[2].append(1)

dd_dict = defaultdict(dict)
dd_dict["Joel"]["City"] = "Seattle"

dd_pair = defaultdict(lambda: [0,0])
dd_pair[2][1]=1

print dd_list
print dd_dict
print dd_pair

defaultdict(<type 'list'>, {2: [1]})
defaultdict(<type 'dict'>, {'Joel': {'City': 'Seattle'}})
defaultdict(<function <lambda> at 0x7f776c2a9398>, {2: [0, 1]})


Counter

In [17]:
from collections import Counter
c = Counter([0,1,2,0])
word_counts = Counter(document)
print c
print word_counts

Counter({0: 2, 1: 1, 2: 1})
Counter({' ': 25, 'e': 20, 'm': 13, 'a': 11, 't': 11, 'o': 8, 'n': 7, 'c': 6, 'i': 6, 'l': 6, 's': 6, 'y': 6, 'p': 5, 'D': 4, 'f': 4, 'r': 4, 'u': 4, '\n': 3, 'I': 2, 'g': 2, 'h': 2, 'k': 2, ',': 1, 'F': 1, 'H': 1, 'M': 1, 'v': 1})


In [18]:
# print the 10 most common words and their counts
for word, count in word_counts.most_common(10):
    print word, count

  25
e 20
m 13
a 11
t 11
o 8
n 7
c 6
i 6
l 6


## Sets 

In [20]:
s = set()
s.add(1)
s.add(2)
s.add(2)

print len(s)
print 2 in s
print 3 in s

2
True
False


In [22]:
stopwords_list = ["a","an","at","yet","you"]
stopwords_set = set(stopwords_list)
print "zip" in stopwords_list
print "zip" in stopwords_set   # Does the same but faster

False
False


In [24]:
item_list = [1,2,3,1,2,3]
print len(item_list)
print set(item_list)
print len(set(item_list)) # number of distinct items in the list
print list(set(item_list)) # List of distinct items

6
set([1, 2, 3])
3
[1, 2, 3]


## Control Flow

In [35]:
def if_statement(a):
    if a<2:
        return "if the first case is true..."
    elif a>3:
        return "elif stands for 'else if'"
    else:
        return "when all else fails use else"
    
print if_statement(1)
print if_statement(4)
print if_statement(2)
    
def parity(x):
    return "even" if x%2 == 0 else "odd"

print parity(1)
print parity(2)

if the first case is true...
elif stands for 'else if'
when all else fails use else
odd
even


In [39]:
x = 0

while x < 5:
    print x, "is less than 5"
    x += 1

0 is less than 5
1 is less than 5
2 is less than 5
3 is less than 5
4 is less than 5


In [40]:
for x in range(5):
    print x, "is less than 5"

0 is less than 5
1 is less than 5
2 is less than 5
3 is less than 5
4 is less than 5


In [41]:
for x in range(10): 
    if x == 3:
        continue
    if x == 5:
        break
    print x

0
1
2
4


## Truthiness

In [43]:
print 1<2
print True == False

True
False


In [45]:
x = None
print x is None

True


In [47]:
def Check_string(s):
    if s:
        return s[0]
    else:
          return "Empty"
    
print Check_string(None)
print Check_string("Test")


Empty
T


In [49]:
print all([True,1,{3}]) # all true values
print all([True,1,{}]) # empty dictionary is false
print any([True,1,{}])
print all([]) # no statements
print any([])

True
False
True
True
False


## Sorting

In [51]:
x = [4,1,2,3]
y = sorted(x)
print x
print y
x.sort()
print x

[4, 1, 2, 3]
[1, 2, 3, 4]
[1, 2, 3, 4]


In [52]:
# sort the list by absolute value from largest to smallest
z = sorted([-4,1,-2,3],key=abs,reverse=True)
print z

[-4, 3, -2, 1]


In [54]:
# sort the words and counts from highest count to lowest
wc = sorted(word_counts.items(),key=lambda(word,count): count,reverse=True)
print wc

[(' ', 25), ('e', 20), ('m', 13), ('a', 11), ('t', 11), ('o', 8), ('n', 7), ('c', 6), ('i', 6), ('l', 6), ('s', 6), ('y', 6), ('p', 5), ('D', 4), ('f', 4), ('r', 4), ('u', 4), ('\n', 3), ('I', 2), ('g', 2), ('h', 2), ('k', 2), (',', 1), ('F', 1), ('H', 1), ('M', 1), ('v', 1)]


## List Comprehensions

In [57]:
even_numbers = [x for x in range(5) if x%2 == 0]
squares = [x*x for x in range(5)]
even_squares = [x*x for x in even_numbers]

print even_numbers
print squares
print even_squares

[0, 2, 4]
[0, 1, 4, 9, 16]
[0, 4, 16]


In [58]:
square_dict = {x : x*x for x in range(5)}
square_set = {x*x for x in [1,-1]}

print square_dict
print square_set

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
set([1])


In [59]:
zeros = [0 for _ in even_numbers]
print zeros

[0, 0, 0]


In [64]:
pairs = [(x,y) for x in range(5) for y in range(3)]
print pairs
print x

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2), (3, 0), (3, 1), (3, 2), (4, 0), (4, 1), (4, 2)]
4


In [65]:
increasing_pairs = [(x,y) for x in range(5) for y in range(x+1,3)]
print increasing_pairs

[(0, 1), (0, 2), (1, 2)]


## Generators and iterators

In [67]:
def lazy_range(n):
    """a lazy version of range"""
    i = 0
    while i<n:
        yield i
        i+=1

for i in lazy_range(10):
    print i

0
1
2
3
4
5
6
7
8
9


In [72]:
def natural_numbers(limit):
    """returns 1,2,3,..."""
    n = 1
    while n<limit:
        yield n
        n += 1      
        
natural_numbers(10)

<generator object natural_numbers at 0x7f776c253fa0>

In [73]:
lazy_evens_below_20 = (i for i in lazy_range(20) if i%2 == 0)
print lazy_evens_below_20

<generator object <genexpr> at 0x7f776c2534b0>


## Randomness

In [74]:
import random

four_uniform_randoms = [random.random() for _ in range(4)]
print four_uniform_randoms

[0.9397651294476852, 0.9519280681786741, 0.6560170105882628, 0.6507802418790327]


In [75]:
random.seed(10)
print random.random()
random.seed(10)
print random.random()

0.57140259469
0.57140259469


In [77]:
print random.randrange(10)
print random.randrange(3,6)

2
5


In [78]:
up_to_ten = range(10)
random.shuffle(up_to_ten)
print up_to_ten

[4, 0, 2, 6, 9, 7, 3, 1, 5, 8]


In [79]:
my_best_friend = random.choice(["Alice","Bob","Charlie"])
print my_best_friend

Charlie


In [80]:
lottery_numbers = range(60)
winning_numbers = random.sample(lottery_numbers,6)
print winning_numbers

[36, 22, 16, 38, 25, 37]


In [81]:
four_with_replacement = [random.choice(range(10))
                        for _ in range(4)]
print four_with_replacement

[6, 1, 7, 9]


## Regular Expressions

In [91]:
import re
print all([
    not re.match("a","cat"),   # Cat does not start with a
    re.search("a","cat"),      # cat does contain a
    not re.search("c","dog"),  # dog does not contain c
    3 == len(re.split("[ab]","carbs")), # split on a or b ['c','r','s']
    "R-D-" == re.sub("[0-9]","-","R2D2") # replace digits with dashes
])

split_string = re.split("[ab]","carbs")
print split_string
new_string = re.sub("[0-9]","-","R2D2") 
print new_string

True
['c', 'r', 's']
R-D-


## OOP